diff --git a/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/graph/ConnectedComponents.scala b/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/graph/ConnectedComponents.scala
index 4462e458e3deb300052d494725ced8efe75c3add..d2611732723e7246ed27ba5bf38085b5ed521341 100644
--- a/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/graph/ConnectedComponents.scala
+++ b/flink-examples/flink-scala-examples/src/main/scala/org/apache/flink/examples/scala/graph/ConnectedComponents.scala
@@ -76,7 +76,7 @@ object ConnectedComponents {
     val edges = getEdgesDataSet(env).flatMap { edge => Seq(edge, (edge._2, edge._1)) }
 
     // open a delta iteration
-    val verticesWithComponents = vertices.iterateDelta(vertices, maxIterations, Array(0)) {
+    val verticesWithComponents = vertices.iterateDelta(vertices, maxIterations, Array("_1")) {
       (s, ws) =>
 
         // apply the step logic: join with the edges
diff --git a/flink-java/src/main/java/org/apache/flink/api/java/operators/Keys.java b/flink-java/src/main/java/org/apache/flink/api/java/operators/Keys.java
index 482370ec98d16338a5e095a4287179a2c9b57387..40ce2389f3a1164c35f90b2d768e5074b0f3517d 100644
--- a/flink-java/src/main/java/org/apache/flink/api/java/operators/Keys.java
+++ b/flink-java/src/main/java/org/apache/flink/api/java/operators/Keys.java
@@ -23,6 +23,7 @@ import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
 
+import com.google.common.base.Joiner;
 import org.apache.flink.api.common.InvalidProgramException;
 import org.apache.flink.api.common.typeinfo.AtomicType;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
@@ -306,7 +307,12 @@ public abstract class Keys<T> {
 			}
 			return Ints.toArray(logicalKeys);
 		}
-		
+
+		@Override
+		public String toString() {
+			Joiner join = Joiner.on('.');
+			return "ExpressionKeys: " + join.join(keyFields);
+		}
 	}
 	
 	private static String[] removeDuplicates(String[] in) {
diff --git a/flink-java/src/test/java/org/apache/flink/api/java/type/extractor/PojoTypeExtractionTest.java b/flink-java/src/test/java/org/apache/flink/api/java/type/extractor/PojoTypeExtractionTest.java
index 01d32c1fe96a3bcda3fc62482abd345ac05d5913..83c81f7cb9896607e4e5da2d1c17ee9437ebc8a0 100644
--- a/flink-java/src/test/java/org/apache/flink/api/java/type/extractor/PojoTypeExtractionTest.java
+++ b/flink-java/src/test/java/org/apache/flink/api/java/type/extractor/PojoTypeExtractionTest.java
@@ -345,7 +345,9 @@ public class PojoTypeExtractionTest {
 		Assert.assertEquals(typeInfo.getTypeClass(), WC.class);
 		Assert.assertEquals(typeInfo.getArity(), 2);
 	}
-	
+
+	// Kryo is required for this, so disable for now.
+	@Ignore
 	@Test
 	public void testPojoAllPublic() {
 		TypeInformation<?> typeForClass = TypeExtractor.createTypeInfo(AllPublic.class);
diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
index 895b96407895aa0b7fe487d7a39ec375d082ec1b..7a2c699de4f3089afdeb216d6c2b6f885d63801b 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
@@ -550,14 +550,11 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
   /**
    * Creates a new DataSet containing the distinct elements of this DataSet. The decision whether
    * two elements are distinct or not is made based on only the specified fields.
-   *
-   * This only works on CaseClass DataSets
    */
   def distinct(firstField: String, otherFields: String*): DataSet[T] = {
-    val fieldIndices = fieldNames2Indices(javaSet.getType, firstField +: otherFields.toArray)
     wrap(new DistinctOperator[T](
       javaSet,
-      new Keys.ExpressionKeys[T](fieldIndices, javaSet.getType, true)))
+      new Keys.ExpressionKeys[T](firstField +: otherFields.toArray, javaSet.getType)))
   }
 
   /**
@@ -615,8 +612,6 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
    * This only works on CaseClass DataSets.
    */
   def groupBy(firstField: String, otherFields: String*): GroupedDataSet[T] = {
-    // val fieldIndices = fieldNames2Indices(javaSet.getType, firstField +: otherFields.toArray)
-
     new GroupedDataSet[T](
       this,
       new Keys.ExpressionKeys[T](firstField +: otherFields.toArray, javaSet.getType))
@@ -862,10 +857,8 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
    */
   def iterateDelta[R: ClassTag](workset: DataSet[R], maxIterations: Int, keyFields: Array[String])(
     stepFunction: (DataSet[T], DataSet[R]) => (DataSet[T], DataSet[R])) = {
-    val fieldIndices = fieldNames2Indices(javaSet.getType, keyFields)
-
 
-    val key = new ExpressionKeys[T](fieldIndices, javaSet.getType, false)
+    val key = new ExpressionKeys[T](keyFields, javaSet.getType)
     val iterativeSet = new DeltaIteration[T, R](
       javaSet.getExecutionEnvironment,
       javaSet.getType,
@@ -931,12 +924,10 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
    * significant amount of time.
    */
   def partitionByHash(firstField: String, otherFields: String*): DataSet[T] = {
-    val fieldIndices = fieldNames2Indices(javaSet.getType, firstField +: otherFields.toArray)
-
     val op = new PartitionOperator[T](
       javaSet,
       PartitionMethod.HASH,
-      new Keys.ExpressionKeys[T](fieldIndices, javaSet.getType, false))
+      new Keys.ExpressionKeys[T](firstField +: otherFields.toArray, javaSet.getType))
     wrap(op)
   }
 
diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/GroupedDataSet.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/GroupedDataSet.scala
index d7159396589fb1eb57c34d028e7524e8cbccab24..e7d89784df30a588b5d60b423ca0da7a17452827 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/GroupedDataSet.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/GroupedDataSet.scala
@@ -47,7 +47,7 @@ class GroupedDataSet[T: ClassTag](
 
   // These are for optional secondary sort. They are only used
   // when using a group-at-a-time reduce function.
-  private val groupSortKeyPositions = mutable.MutableList[Int]()
+  private val groupSortKeyPositions = mutable.MutableList[Either[Int, String]]()
   private val groupSortOrders = mutable.MutableList[Order]()
 
   /**
@@ -64,7 +64,7 @@ class GroupedDataSet[T: ClassTag](
     if (field >= set.getType.getArity) {
       throw new IllegalArgumentException("Order key out of tuple bounds.")
     }
-    groupSortKeyPositions += field
+    groupSortKeyPositions += Left(field)
     groupSortOrders += order
     this
   }
@@ -76,9 +76,7 @@ class GroupedDataSet[T: ClassTag](
    * This only works on CaseClass DataSets.
    */
   def sortGroup(field: String, order: Order): GroupedDataSet[T] = {
-    val fieldIndex = fieldNames2Indices(set.getType, Array(field))(0)
-
-    groupSortKeyPositions += fieldIndex
+    groupSortKeyPositions += Right(field)
     groupSortOrders += order
     this
   }
@@ -88,14 +86,32 @@ class GroupedDataSet[T: ClassTag](
    */
   private def maybeCreateSortedGrouping(): Grouping[T] = {
     if (groupSortKeyPositions.length > 0) {
-      val grouping = new SortedGrouping[T](
-        set.javaSet,
-        keys,
-        groupSortKeyPositions(0),
-        groupSortOrders(0))
+      val grouping = groupSortKeyPositions(0) match {
+        case Left(pos) =>
+          new SortedGrouping[T](
+            set.javaSet,
+            keys,
+            pos,
+            groupSortOrders(0))
+
+        case Right(field) =>
+          new SortedGrouping[T](
+            set.javaSet,
+            keys,
+            field,
+            groupSortOrders(0))
+
+      }
       // now manually add the rest of the keys
       for (i <- 1 until groupSortKeyPositions.length) {
-        grouping.sortGroup(groupSortKeyPositions(i), groupSortOrders(i))
+        groupSortKeyPositions(i) match {
+          case Left(pos) =>
+            grouping.sortGroup(pos, groupSortOrders(i))
+
+          case Right(field) =>
+            grouping.sortGroup(field, groupSortOrders(i))
+
+        }
       }
       grouping
     } else {
@@ -209,7 +225,7 @@ class GroupedDataSet[T: ClassTag](
       }
     }
     wrap(
-      new GroupReduceOperator[T, R](createUnsortedGrouping(),
+      new GroupReduceOperator[T, R](maybeCreateSortedGrouping(),
         implicitly[TypeInformation[R]], reducer))
   }
 
@@ -227,7 +243,7 @@ class GroupedDataSet[T: ClassTag](
       }
     }
     wrap(
-      new GroupReduceOperator[T, R](createUnsortedGrouping(),
+      new GroupReduceOperator[T, R](maybeCreateSortedGrouping(),
         implicitly[TypeInformation[R]], reducer))
   }
 
diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeAnalyzer.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeAnalyzer.scala
index f0ba195fcf50b829f4439ea8aa08e3661a795e64..3a4decae487a60d339bae169cdcd77356b91484c 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeAnalyzer.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeAnalyzer.scala
@@ -17,7 +17,6 @@
  */
 package org.apache.flink.api.scala.codegen
 
-import scala.Option.option2Iterable
 import scala.collection.GenTraversableOnce
 import scala.collection.mutable
 import scala.reflect.macros.Context
@@ -59,12 +58,17 @@ private[flink] trait TypeAnalyzer[C <: Context] { this: MacroContextHolder[C]
           case PrimitiveType(default, wrapper) => PrimitiveDescriptor(id, tpe, default, wrapper)
           case BoxedPrimitiveType(default, wrapper, box, unbox) =>
             BoxedPrimitiveDescriptor(id, tpe, default, wrapper, box, unbox)
-          case ListType(elemTpe, iter) => analyzeList(id, tpe, elemTpe, iter)
+          case ListType(elemTpe, iter) =>
+            analyzeList(id, tpe, elemTpe, iter)
           case CaseClassType() => analyzeCaseClass(id, tpe)
-          case BaseClassType() => analyzeClassHierarchy(id, tpe)
           case ValueType() => ValueDescriptor(id, tpe)
           case WritableType() => WritableDescriptor(id, tpe)
-          case _ => GenericClassDescriptor(id, tpe)
+          case JavaType() =>
+            // It's a Java Class, let the TypeExtractor deal with it...
+            c.warning(c.enclosingPosition, s"Type $tpe is a java class. Will be analyzed by " +
+              s"TypeExtractor at runtime.")
+            GenericClassDescriptor(id, tpe)
+          case _ => analyzePojo(id, tpe)
         }
       }
     }
@@ -78,110 +82,63 @@ private[flink] trait TypeAnalyzer[C <: Context] { this: MacroContextHolder[C]
       case desc => ListDescriptor(id, tpe, iter, desc)
     }
 
-    private def analyzeClassHierarchy(id: Int, tpe: Type): UDTDescriptor = {
-
-      val tagField = {
-        val (intTpe, intDefault, intWrapper) = PrimitiveType.intPrimitive
-        FieldAccessor(
-          NoSymbol,
-          NoSymbol,
-          NullaryMethodType(intTpe),
-          isBaseField = true,
-          PrimitiveDescriptor(cache.newId, intTpe, intDefault, intWrapper))
+    private def analyzePojo(id: Int, tpe: Type): UDTDescriptor = {
+      val immutableFields = tpe.members filter { _.isTerm } map { _.asTerm } filter { _.isVal }
+      if (immutableFields.nonEmpty) {
+        // We don't support POJOs with immutable fields
+        c.warning(
+          c.enclosingPosition,
+          s"Type $tpe is no POJO, has immutable fields: ${immutableFields.mkString(", ")}.")
+        return GenericClassDescriptor(id, tpe)
       }
-      
-      val subTypes = tpe.typeSymbol.asClass.knownDirectSubclasses.toList flatMap { d =>
-
-        val dTpe =
-          {
-            val tArgs = (tpe.typeSymbol.asClass.typeParams, typeArgs(tpe)).zipped.toMap
-            val dArgs = d.asClass.typeParams map { dp =>
-              val tArg = tArgs.keySet.find { tp =>
-                dp == tp.typeSignature.asSeenFrom(d.typeSignature, tpe.typeSymbol).typeSymbol
-              }
-              tArg map { tArgs(_) } getOrElse dp.typeSignature
-            }
 
-            appliedType(d.asType.toType, dArgs)
-          }
+      val fields = tpe.members
+        .filter { _.isTerm }
+        .map { _.asTerm }
+        .filter { _.isVar }
+        .filterNot { _.annotations.exists( _.tpe <:< typeOf[scala.transient]) }
 
-        if (dTpe <:< tpe) {
-          Some(analyze(dTpe))
-        } else {
-          None
-        }
+      if (fields.isEmpty) {
+        c.warning(c.enclosingPosition, "Type $tpe has no fields that are visible from Scala Type" +
+          " analysis. Falling back to Java Type Analysis (TypeExtractor).")
+        return GenericClassDescriptor(id, tpe)
       }
 
-      val errors = subTypes flatMap { _.findByType[UnsupportedDescriptor] }
-
-      errors match {
-        case _ :: _ =>
-          val errorMessage = errors flatMap {
-            case UnsupportedDescriptor(_, subType, errs) =>
-              errs map { err => "Subtype " + subType + " - " + err }
-          }
-          UnsupportedDescriptor(id, tpe, errorMessage)
-
-        case Nil if subTypes.isEmpty =>
-          UnsupportedDescriptor(id, tpe, Seq("No instantiable subtypes found for base class"))
-        case Nil =>
-          val (tParams, _) = tpe.typeSymbol.asClass.typeParams.zip(typeArgs(tpe)).unzip
-          val baseMembers =
-            tpe.members filter { f => f.isMethod } filter { f => f.asMethod.isSetter } map {
-            f => (f, f.asMethod.setter, f.asMethod.returnType)
-          }
-
-          val subMembers = subTypes map {
-            case BaseClassDescriptor(_, _, getters, _) => getters
-            case CaseClassDescriptor(_, _, _, _, getters) => getters
-            case _ => Seq()
-          }
-
-          val baseFields = baseMembers flatMap {
-            case (bGetter, bSetter, bTpe) =>
-              val accessors = subMembers map {
-                _ find { sf =>
-                  sf.getter.name == bGetter.name &&
-                    sf.tpe.termSymbol.asMethod.returnType <:< bTpe.termSymbol.asMethod.returnType
-                }
-              }
-              accessors.forall { _.isDefined } match {
-                case true =>
-                  Some(
-                    FieldAccessor(
-                      bGetter,
-                      bSetter,
-                      bTpe,
-                      isBaseField = true,
-                      analyze(bTpe.termSymbol.asMethod.returnType)))
-                case false => None
-              }
-          }
+      // check whether all fields are either: 1. public, 2. have getter/setter
+      val invalidFields = fields filterNot {
+        f =>
+          f.isPublic ||
+            (f.getter != NoSymbol && f.getter.isPublic && f.setter != NoSymbol && f.setter.isPublic)
+      }
 
-          def wireBaseFields(desc: UDTDescriptor): UDTDescriptor = {
+      if (invalidFields.nonEmpty) {
+        c.warning(c.enclosingPosition, s"Type $tpe is no POJO because it has non-public fields '" +
+          s"${invalidFields.mkString(", ")}' that don't have public getters/setters.")
+        return GenericClassDescriptor(id, tpe)
+      }
 
-            def updateField(field: FieldAccessor) = {
-              baseFields find { bf => bf.getter.name == field.getter.name } match {
-                case Some(FieldAccessor(_, _, _, _, fieldDesc)) =>
-                  field.copy(isBaseField = true, desc = fieldDesc)
-                case None => field
-              }
-            }
+      // check whether we have a zero-parameter ctor
+      val hasZeroCtor = tpe.declarations exists  {
+        case m: MethodSymbol
+          if m.isConstructor && m.paramss.length == 1 && m.paramss(0).length == 0 => true
+        case _ => false
+      }
 
-            desc match {
-              case desc @ BaseClassDescriptor(_, _, getters, baseSubTypes) =>
-                desc.copy(
-                  getters = getters map updateField,
-                  subTypes = baseSubTypes map wireBaseFields)
-              case desc @ CaseClassDescriptor(_, _, _, _, getters) =>
-                desc.copy(getters = getters map updateField)
-              case _ => desc
-            }
-          }
+      if (!hasZeroCtor) {
+        // We don't support POJOs without zero-paramter ctor
+        c.warning(
+          c.enclosingPosition,
+          s"Class $tpe is no POJO, has no zero-parameters constructor.")
+        return GenericClassDescriptor(id, tpe)
+      }
 
-          BaseClassDescriptor(id, tpe, tagField +: baseFields.toSeq, subTypes map wireBaseFields)
+      val fieldDescriptors = fields map {
+        f =>
+          val fieldTpe = f.getter.asMethod.returnType.asSeenFrom(tpe, tpe.typeSymbol)
+          FieldDescriptor(f.name.toString.trim, f.getter, f.setter, fieldTpe, analyze(fieldTpe))
       }
 
+      PojoDescriptor(id, tpe, fieldDescriptors.toSeq)
     }
 
     private def analyzeCaseClass(id: Int, tpe: Type): UDTDescriptor = {
@@ -216,7 +173,7 @@ private[flink] trait TypeAnalyzer[C <: Context] { this: MacroContextHolder[C]
               }
               val fields = caseFields map {
                 case (fgetter, fsetter, fTpe) =>
-                  FieldAccessor(fgetter, fsetter, fTpe, isBaseField = false, analyze(fTpe))
+                  FieldDescriptor(fgetter.name.toString.trim, fgetter, fsetter, fTpe, analyze(fTpe))
               }
               val mutable = enableMutableUDTs && (fields forall { f => f.setter != NoSymbol })
               if (mutable) {
@@ -226,8 +183,9 @@ private[flink] trait TypeAnalyzer[C <: Context] { this: MacroContextHolder[C]
                 case errs @ _ :: _ =>
                   val msgs = errs flatMap { f =>
                     (f: @unchecked) match {
-                      case FieldAccessor(fgetter, _,_,_, UnsupportedDescriptor(_, fTpe, errors)) =>
-                        errors map { err => "Field " + fgetter.name + ": " + fTpe + " - " + err }
+                      case FieldDescriptor(
+                        fName, _, _, _, UnsupportedDescriptor(_, fTpe, errors)) =>
+                        errors map { err => "Field " + fName + ": " + fTpe + " - " + err }
                     }
                   }
                   UnsupportedDescriptor(id, tpe, msgs)
@@ -296,11 +254,6 @@ private[flink] trait TypeAnalyzer[C <: Context] { this: MacroContextHolder[C]
       def unapply(tpe: Type): Boolean = tpe.typeSymbol.asClass.isCaseClass
     }
 
-    private object BaseClassType {
-      def unapply(tpe: Type): Boolean =
-        tpe.typeSymbol.asClass.isAbstractClass && tpe.typeSymbol.asClass.isSealed
-    }
-    
     private object ValueType {
       def unapply(tpe: Type): Boolean =
         tpe.typeSymbol.asClass.baseClasses exists {
@@ -315,6 +268,10 @@ private[flink] trait TypeAnalyzer[C <: Context] { this: MacroContextHolder[C]
         }
     }
 
+    private object JavaType {
+      def unapply(tpe: Type): Boolean = tpe.typeSymbol.asClass.isJava
+    }
+
     private class UDTAnalyzerCache {
 
       private val caches = new DynamicVariable[Map[Type, RecursiveDescriptor]](Map())
diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeDescriptors.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeDescriptors.scala
index 8201a6888c6faa4705d1b696e3c2ab3aca75431e..66299c75b373f794ca4abe8ac6aedc054525a996 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeDescriptors.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeDescriptors.scala
@@ -36,10 +36,8 @@ private[flink] trait TypeDescriptors[C <: Context] { this: MacroContextHolder[C]
     
     def canBeKey: Boolean
 
-    def mkRoot: UDTDescriptor = this
-
     def flatten: Seq[UDTDescriptor]
-    def getters: Seq[FieldAccessor] = Seq()
+    def getters: Seq[FieldDescriptor] = Seq()
 
     def select(member: String): Option[UDTDescriptor] =
       getters find { _.getter.name.toString == member } map { _.desc }
@@ -48,7 +46,7 @@ private[flink] trait TypeDescriptors[C <: Context] { this: MacroContextHolder[C]
       case Nil => Seq(Some(this))
       case head :: tail => getters find { _.getter.name.toString == head } match {
         case None => Seq(None)
-        case Some(d : FieldAccessor) => d.desc.select(tail)
+        case Some(d : FieldDescriptor) => d.desc.select(tail)
       }
     }
 
@@ -60,7 +58,7 @@ private[flink] trait TypeDescriptors[C <: Context] { this: MacroContextHolder[C]
     }
 
     def getRecursiveRefs: Seq[UDTDescriptor] =
-      findByType[RecursiveDescriptor].flatMap { rd => findById(rd.refId) }.map { _.mkRoot }.distinct
+      findByType[RecursiveDescriptor].flatMap { rd => findById(rd.refId) }.distinct
   }
 
   case class GenericClassDescriptor(id: Int, tpe: Type) extends UDTDescriptor {
@@ -116,30 +114,45 @@ private[flink] trait TypeDescriptors[C <: Context] { this: MacroContextHolder[C]
     }
   }
 
-  case class BaseClassDescriptor(
-      id: Int, tpe: Type, override val getters: Seq[FieldAccessor], subTypes: Seq[UDTDescriptor])
+  case class PojoDescriptor(id: Int, tpe: Type, override val getters: Seq[FieldDescriptor])
     extends UDTDescriptor {
 
-    override def flatten =
-      this +: ((getters flatMap { _.desc.flatten }) ++ (subTypes flatMap { _.flatten }))
+    override val isPrimitiveProduct = getters.nonEmpty && getters.forall(_.desc.isPrimitiveProduct)
+
+    override def flatten = this +: (getters flatMap { _.desc.flatten })
+
     override def canBeKey = flatten forall { f => f.canBeKey }
-    
+
+    // Hack: ignore the ctorTpe, since two Type instances representing
+    // the same ctor function type don't appear to be considered equal.
+    // Equality of the tpe and ctor fields implies equality of ctorTpe anyway.
+    override def hashCode = (id, tpe, getters).hashCode
+    override def equals(that: Any) = that match {
+      case PojoDescriptor(thatId, thatTpe, thatGetters) =>
+        (id, tpe, getters).equals(
+          thatId, thatTpe, thatGetters)
+      case _ => false
+    }
+
     override def select(path: List[String]): Seq[Option[UDTDescriptor]] = path match {
       case Nil => getters flatMap { g => g.desc.select(Nil) }
       case head :: tail => getters find { _.getter.name.toString == head } match {
         case None => Seq(None)
-        case Some(d : FieldAccessor) => d.desc.select(tail)
+        case Some(d : FieldDescriptor) => d.desc.select(tail)
       }
     }
   }
 
   case class CaseClassDescriptor(
-      id: Int, tpe: Type, mutable: Boolean, ctor: Symbol, override val getters: Seq[FieldAccessor])
+      id: Int,
+      tpe: Type,
+      mutable: Boolean,
+      ctor: Symbol,
+      override val getters: Seq[FieldDescriptor])
     extends UDTDescriptor {
 
     override val isPrimitiveProduct = getters.nonEmpty && getters.forall(_.desc.isPrimitiveProduct)
 
-    override def mkRoot = this.copy(getters = getters map { _.copy(isBaseField = false) })
     override def flatten = this +: (getters flatMap { _.desc.flatten })
     
     override def canBeKey = flatten forall { f => f.canBeKey }
@@ -159,16 +172,16 @@ private[flink] trait TypeDescriptors[C <: Context] { this: MacroContextHolder[C]
       case Nil => getters flatMap { g => g.desc.select(Nil) }
       case head :: tail => getters find { _.getter.name.toString == head } match {
         case None => Seq(None)
-        case Some(d : FieldAccessor) => d.desc.select(tail)
+        case Some(d : FieldDescriptor) => d.desc.select(tail)
       }
     }
   }
 
-  case class FieldAccessor(
+  case class FieldDescriptor(
+      name: String,
       getter: Symbol,
       setter: Symbol,
       tpe: Type,
-      isBaseField: Boolean,
       desc: UDTDescriptor)
 
   case class RecursiveDescriptor(id: Int, tpe: Type, refId: Int) extends UDTDescriptor {
diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeInformationGen.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeInformationGen.scala
index f6a89d307b5503f4f3abdbe4a22a0f94c45eaeef..068666879e01122a9c38f2aab96b830399c54739 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeInformationGen.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/codegen/TypeInformationGen.scala
@@ -20,7 +20,6 @@ package org.apache.flink.api.scala.codegen
 import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo
 import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo
 import org.apache.flink.api.common.typeinfo.BasicTypeInfo
-import org.apache.flink.api.common.typeinfo.BasicTypeInfo
 
 import org.apache.flink.api.common.typeutils.TypeSerializer
 import org.apache.flink.api.java.typeutils._
@@ -29,6 +28,8 @@ import org.apache.flink.types.Value
 import org.apache.flink.api.common.typeinfo.TypeInformation
 import org.apache.hadoop.io.Writable
 
+import scala.collection.JavaConverters._
+
 import scala.reflect.macros.Context
 
 private[flink] trait TypeInformationGen[C <: Context] {
@@ -41,7 +42,7 @@ private[flink] trait TypeInformationGen[C <: Context] {
 
   // This is for external calling by TypeUtils.createTypeInfo
   def mkTypeInfo[T: c.WeakTypeTag]: c.Expr[TypeInformation[T]] = {
-    val desc = getUDTDescriptor(weakTypeOf[T])
+    val desc = getUDTDescriptor(weakTypeTag[T].tpe)
     val result: c.Expr[TypeInformation[T]] = mkTypeInfo(desc)(c.WeakTypeTag(desc.tpe))
     result
   }
@@ -61,6 +62,7 @@ private[flink] trait TypeInformationGen[C <: Context] {
     case d : WritableDescriptor =>
       mkWritableTypeInfo(d)(c.WeakTypeTag(d.tpe).asInstanceOf[c.WeakTypeTag[Writable]])
         .asInstanceOf[c.Expr[TypeInformation[T]]]
+    case pojo: PojoDescriptor => mkPojo(pojo)
     case d => mkGenericTypeInfo(d)
   }
 
@@ -96,7 +98,7 @@ private[flink] trait TypeInformationGen[C <: Context] {
   def mkListTypeInfo[T: c.WeakTypeTag](desc: ListDescriptor): c.Expr[TypeInformation[T]] = {
     val arrayClazz = c.Expr[Class[T]](Literal(Constant(desc.tpe)))
     val elementClazz = c.Expr[Class[T]](Literal(Constant(desc.elem.tpe)))
-    val elementTypeInfo = mkTypeInfo(desc.elem)
+    val elementTypeInfo = mkTypeInfo(desc.elem)(c.WeakTypeTag(desc.elem.tpe))
     desc.elem match {
       // special case for string, which in scala is a primitive, but not in java
       case p: PrimitiveDescriptor if p.tpe <:< typeOf[String] =>
@@ -115,7 +117,8 @@ private[flink] trait TypeInformationGen[C <: Context] {
         reify {
           ObjectArrayTypeInfo.getInfoFor(
             arrayClazz.splice,
-            elementTypeInfo.splice).asInstanceOf[TypeInformation[T]]
+            elementTypeInfo.splice.asInstanceOf[TypeInformation[_]])
+            .asInstanceOf[TypeInformation[T]]
         }
     }
   }
@@ -136,6 +139,35 @@ private[flink] trait TypeInformationGen[C <: Context] {
     }
   }
 
+  def mkPojo[T: c.WeakTypeTag](desc: PojoDescriptor): c.Expr[TypeInformation[T]] = {
+    val tpeClazz = c.Expr[Class[T]](Literal(Constant(desc.tpe)))
+    val fieldsTrees = desc.getters map {
+      f =>
+        val name = c.Expr(Literal(Constant(f.name)))
+        val fieldType = mkTypeInfo(f.desc)(c.WeakTypeTag(f.tpe))
+        reify { (name.splice, fieldType.splice) }.tree
+    }
+
+    val fieldsList = c.Expr[List[(String, TypeInformation[_])]](mkList(fieldsTrees.toList))
+
+    reify {
+      val fields =  fieldsList.splice
+      val clazz: Class[T] = tpeClazz.splice
+
+      val fieldMap = TypeExtractor.getAllDeclaredFields(clazz).asScala map {
+        f => (f.getName, f)
+      } toMap
+
+      val pojoFields = fields map {
+        case (fName, fTpe) =>
+          new PojoField(fieldMap(fName), fTpe)
+      }
+
+      new PojoTypeInfo(clazz, pojoFields.asJava)
+
+    }
+  }
+
   def mkGenericTypeInfo[T: c.WeakTypeTag](desc: UDTDescriptor): c.Expr[TypeInformation[T]] = {
     val tpeClazz = c.Expr[Class[T]](Literal(Constant(desc.tpe)))
     reify {
@@ -158,39 +190,4 @@ private[flink] trait TypeInformationGen[C <: Context] {
     val result = Apply(Select(New(TypeTree(desc.tpe)), nme.CONSTRUCTOR), fields.toList)
     c.Expr[T](result)
   }
-
-//    def mkCaseClassTypeInfo[T: c.WeakTypeTag](
-//        desc: CaseClassDescriptor): c.Expr[TypeInformation[T]] = {
-//      val tpeClazz = c.Expr[Class[_]](Literal(Constant(desc.tpe)))
-//      val caseFields = mkCaseFields(desc)
-//      reify {
-//        new ScalaTupleTypeInfo[T] {
-//          def createSerializer: TypeSerializer[T] = {
-//            null
-//          }
-//
-//          val fields: Map[String, TypeInformation[_]] = caseFields.splice
-//          val clazz = tpeClazz.splice
-//        }
-//      }
-//    }
-//
-//  private def mkCaseFields(desc: UDTDescriptor): c.Expr[Map[String, TypeInformation[_]]] = {
-//    val fields = getFields("_root_", desc).toList map { case (fieldName, fieldDesc) =>
-//      val nameTree = c.Expr(Literal(Constant(fieldName)))
-//      val fieldTypeInfo = mkTypeInfo(fieldDesc)(c.WeakTypeTag(fieldDesc.tpe))
-//      reify { (nameTree.splice, fieldTypeInfo.splice) }.tree
-//    }
-//
-//    c.Expr(mkMap(fields))
-//  }
-//
-//  protected def getFields(name: String, desc: UDTDescriptor): Seq[(String, UDTDescriptor)] =
-//  desc match {
-//    // Flatten product types
-//    case CaseClassDescriptor(_, _, _, _, getters) =>
-//      getters filterNot { _.isBaseField } flatMap {
-//        f => getFields(name + "." + f.getter.name, f.desc) }
-//    case _ => Seq((name, desc))
-//  }
 }
diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/typeutils/CaseClassTypeInfo.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/typeutils/CaseClassTypeInfo.scala
index 3e9d4c63008f61b94e2fc36b6a43784f359ad43a..53d1deaf169a9e2c026234eab2f25b9b96181133 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/typeutils/CaseClassTypeInfo.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/typeutils/CaseClassTypeInfo.scala
@@ -19,8 +19,10 @@ package org.apache.flink.api.scala.typeutils
 
 import org.apache.flink.api.common.typeinfo.TypeInformation
 import org.apache.flink.api.common.typeinfo.AtomicType
+import org.apache.flink.api.common.typeutils.CompositeType.FlatFieldDescriptor
+import org.apache.flink.api.java.operators.Keys.ExpressionKeys
 import org.apache.flink.api.java.typeutils.TupleTypeInfoBase
-import org.apache.flink.api.common.typeutils.{TypeComparator, TypeSerializer}
+import org.apache.flink.api.common.typeutils.{CompositeType, TypeComparator}
 
 /**
  * TypeInformation for Case Classes. Creation and access is different from
@@ -58,16 +60,82 @@ abstract class CaseClassTypeInfo[T <: Product](
   override protected def getNewComparator: TypeComparator[T] = {
     val finalLogicalKeyFields = logicalKeyFields.take(comparatorHelperIndex)
     val finalComparators = fieldComparators.take(comparatorHelperIndex)
-    var maxKey: Int = 0
-    for (key <- finalLogicalKeyFields) {
-      maxKey = Math.max(maxKey, key)
+    val maxKey = finalLogicalKeyFields.max
+
+    // create serializers only up to the last key, fields after that are not needed
+    val fieldSerializers = types.take(maxKey + 1).map(_.createSerializer)
+    new CaseClassComparator[T](finalLogicalKeyFields, finalComparators, fieldSerializers.toArray)
+  }
+
+  override def getKey(
+      fieldExpression: String,
+      offset: Int,
+      result: java.util.List[FlatFieldDescriptor]): Unit = {
+
+    if (fieldExpression == ExpressionKeys.SELECT_ALL_CHAR) {
+      var keyPosition = 0
+      for (tpe <- types) {
+        tpe match {
+          case a: AtomicType[_] =>
+            result.add(new CompositeType.FlatFieldDescriptor(offset + keyPosition, tpe))
+
+          case co: CompositeType[_] =>
+            co.getKey(ExpressionKeys.SELECT_ALL_CHAR, offset + keyPosition, result)
+            keyPosition += co.getTotalFields - 1
+
+          case _ => throw new RuntimeException(s"Unexpected key type: $tpe")
+
+        }
+        keyPosition += 1
+      }
+      return
+    }
+
+    if (fieldExpression == null || fieldExpression.length <= 0) {
+      throw new IllegalArgumentException("Field expression must not be empty.")
     }
-    val fieldSerializers: Array[TypeSerializer[_]] = new Array[TypeSerializer[_]](maxKey + 1)
 
-    for (i <- 0 to maxKey) {
-      fieldSerializers(i) = types(i).createSerializer
+    fieldExpression.split('.').toList match {
+      case headField :: Nil =>
+        var fieldId = 0
+        for (i <- 0 until fieldNames.length) {
+          fieldId += types(i).getTotalFields - 1
+
+          if (fieldNames(i) == headField) {
+            if (fieldTypes(i).isInstanceOf[CompositeType[_]]) {
+              throw new IllegalArgumentException(
+                s"The specified field '$fieldExpression' is refering to a composite type.\n"
+                + s"Either select all elements in this type with the " +
+                  s"'${ExpressionKeys.SELECT_ALL_CHAR}' operator or specify a field in" +
+                  s" the sub-type")
+            }
+            result.add(new CompositeType.FlatFieldDescriptor(offset + fieldId, fieldTypes(i)))
+            return
+          }
+
+          fieldId += 1
+        }
+      case firstField :: rest =>
+        var fieldId = 0
+        for (i <- 0 until fieldNames.length) {
+
+          if (fieldNames(i) == firstField) {
+            fieldTypes(i) match {
+              case co: CompositeType[_] =>
+                co.getKey(rest.mkString("."), offset + fieldId, result)
+                return
+
+              case _ =>
+                throw new RuntimeException(s"Field ${fieldTypes(i)} is not a composite type.")
+
+            }
+          }
+
+          fieldId += types(i).getTotalFields
+        }
     }
-    new CaseClassComparator[T](finalLogicalKeyFields, finalComparators, fieldSerializers)
+
+    throw new RuntimeException(s"Unable to find field $fieldExpression in type $this.")
   }
 
   override def toString = clazz.getSimpleName + "(" + fieldNames.zip(types).map {
diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/unfinishedKeyPairOperation.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/unfinishedKeyPairOperation.scala
index 9d9a19f0a37db2b5f25d46a4eff0055a0fe99a35..b2929b902117bade3ca77a97166b10c1e22e80b4 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/unfinishedKeyPairOperation.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/unfinishedKeyPairOperation.scala
@@ -69,11 +69,9 @@ private[flink] abstract class UnfinishedKeyPairOperation[L, R, O](
    * This only works on a CaseClass [[DataSet]].
    */
   def where(firstLeftField: String, otherLeftFields: String*) = {
-    val fieldIndices = fieldNames2Indices(
-      leftInput.getType,
-      firstLeftField +: otherLeftFields.toArray)
-
-    val leftKey = new ExpressionKeys[L](fieldIndices, leftInput.getType)
+    val leftKey = new ExpressionKeys[L](
+      firstLeftField +: otherLeftFields.toArray,
+      leftInput.getType)
     new HalfUnfinishedKeyPairOperation[L, R, O](this, leftKey)
   }
 
@@ -118,11 +116,9 @@ private[flink] class HalfUnfinishedKeyPairOperation[L, R, O](
    * This only works on a CaseClass [[DataSet]].
    */
   def equalTo(firstRightField: String, otherRightFields: String*): O = {
-    val fieldIndices = fieldNames2Indices(
-      unfinished.rightInput.getType,
-      firstRightField +: otherRightFields.toArray)
-
-    val rightKey = new ExpressionKeys[R](fieldIndices, unfinished.rightInput.getType)
+    val rightKey = new ExpressionKeys[R](
+      firstRightField +: otherRightFields.toArray,
+      unfinished.rightInput.getType)
     if (!leftKey.areCompatible(rightKey)) {
       throw new InvalidProgramException("The types of the key fields do not match. Left: " +
         leftKey + " Right: " + rightKey)
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/AggregateITCase.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/AggregateITCase.scala
index 631e68a7fec1a9825584c64a2341f8278c6867d4..0e3f2ed6956fcdd9910d1f7dd10c0631d80bf9bd 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/AggregateITCase.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/AggregateITCase.scala
@@ -19,6 +19,7 @@ package org.apache.flink.api.scala.operators
 
 import org.apache.flink.api.java.aggregation.Aggregations
 import org.apache.flink.api.scala.ExecutionEnvironment
+import org.apache.flink.api.scala.util.CollectionDataSets
 import org.apache.flink.configuration.Configuration
 import org.apache.flink.test.util.JavaProgramTestBase
 import org.junit.runner.RunWith
@@ -34,38 +35,14 @@ import org.apache.flink.api.scala._
 object AggregateProgs {
   var NUM_PROGRAMS: Int = 3
 
-  val tupleInput = Array(
-    (1,1L,"Hi"),
-    (2,2L,"Hello"),
-    (3,2L,"Hello world"),
-    (4,3L,"Hello world, how are you?"),
-    (5,3L,"I am fine."),
-    (6,3L,"Luke Skywalker"),
-    (7,4L,"Comment#1"),
-    (8,4L,"Comment#2"),
-    (9,4L,"Comment#3"),
-    (10,4L,"Comment#4"),
-    (11,5L,"Comment#5"),
-    (12,5L,"Comment#6"),
-    (13,5L,"Comment#7"),
-    (14,5L,"Comment#8"),
-    (15,5L,"Comment#9"),
-    (16,6L,"Comment#10"),
-    (17,6L,"Comment#11"),
-    (18,6L,"Comment#12"),
-    (19,6L,"Comment#13"),
-    (20,6L,"Comment#14"),
-    (21,6L,"Comment#15")
-  )
-
-
   def runProgram(progId: Int, resultPath: String): String = {
     progId match {
       case 1 =>
         // Full aggregate
         val env = ExecutionEnvironment.getExecutionEnvironment
         env.setDegreeOfParallelism(10)
-        val ds = env.fromCollection(tupleInput)
+//        val ds = CollectionDataSets.get3TupleDataSet(env)
+        val ds = CollectionDataSets.get3TupleDataSet(env)
 
         val aggregateDs = ds
           .aggregate(Aggregations.SUM,0)
@@ -84,7 +61,7 @@ object AggregateProgs {
       case 2 =>
         // Grouped aggregate
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromCollection(tupleInput)
+        val ds = CollectionDataSets.get3TupleDataSet(env)
 
         val aggregateDs = ds
           .groupBy(1)
@@ -103,7 +80,7 @@ object AggregateProgs {
       case 3 =>
         // Nested aggregate
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromCollection(tupleInput)
+        val ds = CollectionDataSets.get3TupleDataSet(env)
 
         val aggregateDs = ds
           .groupBy(1)
@@ -111,7 +88,7 @@ object AggregateProgs {
           .aggregate(Aggregations.MIN, 0)
           // Ensure aggregate operator correctly copies other fields
           .filter(_._3 != null)
-          .map { t => Tuple1(t._1) }
+          .map { t => new Tuple1(t._1) }
 
         aggregateDs.writeAsCsv(resultPath)
 
@@ -140,7 +117,7 @@ class AggregateITCase(config: Configuration) extends JavaProgramTestBase(config)
   }
 
   protected def testProgram(): Unit = {
-    expectedResult = AggregateProgs.runProgram(curProgId, resultPath)
+    expectedResult = DistinctProgs.runProgram(curProgId, resultPath)
   }
 
   protected override def postSubmit(): Unit = {
@@ -152,7 +129,7 @@ object AggregateITCase {
   @Parameters
   def getConfigurations: java.util.Collection[Array[AnyRef]] = {
     val configs = mutable.MutableList[Array[AnyRef]]()
-    for (i <- 1 to AggregateProgs.NUM_PROGRAMS) {
+    for (i <- 1 to DistinctProgs.NUM_PROGRAMS) {
       val config = new Configuration()
       config.setInteger("ProgramId", i)
       configs += Array(config)
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/CoGroupOperatorTest.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/CoGroupOperatorTest.scala
index d962b76369757ee320383afce41ce644fa7d1063..3f0ca5ff643ed81f6ef9a7e15e11bd4b29a486e5 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/CoGroupOperatorTest.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/CoGroupOperatorTest.scala
@@ -17,13 +17,11 @@
  */
 package org.apache.flink.api.scala.operators
 
-import java.io.Serializable
-import org.apache.flink.api.common.InvalidProgramException
 import org.apache.flink.api.java.operators.Keys.IncompatibleKeysException
 import org.junit.Assert
-import org.junit.Ignore
 import org.junit.Test
 import org.apache.flink.api.scala._
+import org.apache.flink.api.scala.util.CollectionDataSets.CustomType
 
 class CoGroupOperatorTest {
 
@@ -130,7 +128,7 @@ class CoGroupOperatorTest {
     ds1.coGroup(ds2).where("_1", "_2").equalTo("_3")
   }
 
-  @Test(expected = classOf[IllegalArgumentException])
+  @Test(expected = classOf[RuntimeException])
   def testCoGroupKeyFieldNames4(): Unit =  {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(emptyTupleData)
@@ -140,7 +138,7 @@ class CoGroupOperatorTest {
     ds1.coGroup(ds2).where("_6").equalTo("_1")
   }
 
-  @Test(expected = classOf[IllegalArgumentException])
+  @Test(expected = classOf[RuntimeException])
   def testCoGroupKeyFieldNames5(): Unit =  {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(emptyTupleData)
@@ -150,7 +148,7 @@ class CoGroupOperatorTest {
     ds1.coGroup(ds2).where("_1").equalTo("bar")
   }
 
-  @Test(expected = classOf[UnsupportedOperationException])
+  @Test(expected = classOf[RuntimeException])
   def testCoGroupKeyFieldNames6(): Unit = {
     val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(emptyTupleData)
@@ -160,7 +158,6 @@ class CoGroupOperatorTest {
     ds1.coGroup(ds2).where("_3").equalTo("_1")
   }
 
-  @Ignore
   @Test
   def testCoGroupKeyExpressions1(): Unit =  {
     val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
@@ -176,29 +173,26 @@ class CoGroupOperatorTest {
     }
   }
 
-  @Ignore
-  @Test(expected = classOf[InvalidProgramException])
+  @Test(expected = classOf[IncompatibleKeysException])
   def testCoGroupKeyExpressions2(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(customTypeData)
     val ds2 = env.fromCollection(customTypeData)
 
     // should not work, incompatible key types
-//    ds1.coGroup(ds2).where("i").equalTo("s")
+    ds1.coGroup(ds2).where("myInt").equalTo("myString")
   }
 
-  @Ignore
-  @Test(expected = classOf[InvalidProgramException])
+  @Test(expected = classOf[IncompatibleKeysException])
   def testCoGroupKeyExpressions3(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(customTypeData)
     val ds2 = env.fromCollection(customTypeData)
 
     // should not work, incompatible number of keys
-//    ds1.coGroup(ds2).where("i", "s").equalTo("s")
+    ds1.coGroup(ds2).where("myInt", "myString").equalTo("myString")
   }
 
-  @Ignore
   @Test(expected = classOf[IllegalArgumentException])
   def testCoGroupKeyExpressions4(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
@@ -207,7 +201,7 @@ class CoGroupOperatorTest {
 
 
     // should not work, key non-existent
-//    ds1.coGroup(ds2).where("myNonExistent").equalTo("i")
+    ds1.coGroup(ds2).where("myNonExistent").equalTo("i")
   }
 
   @Test
@@ -218,7 +212,7 @@ class CoGroupOperatorTest {
 
     // Should work
     try {
-      ds1.coGroup(ds2).where { _.l } equalTo { _.l }
+      ds1.coGroup(ds2).where { _.myLong } equalTo { _.myLong }
     }
     catch {
       case e: Exception => Assert.fail()
@@ -233,7 +227,7 @@ class CoGroupOperatorTest {
 
     // Should work
     try {
-      ds1.coGroup(ds2).where { _.l}.equalTo(3)
+      ds1.coGroup(ds2).where { _.myLong }.equalTo(3)
     }
     catch {
       case e: Exception => Assert.fail()
@@ -248,7 +242,7 @@ class CoGroupOperatorTest {
 
     // Should work
     try {
-      ds1.coGroup(ds2).where(3).equalTo { _.l }
+      ds1.coGroup(ds2).where(3).equalTo { _.myLong }
     }
     catch {
       case e: Exception => Assert.fail()
@@ -262,7 +256,7 @@ class CoGroupOperatorTest {
     val ds2 = env.fromCollection(customTypeData)
 
     // Should not work, incompatible types
-    ds1.coGroup(ds2).where(2).equalTo { _.l }
+    ds1.coGroup(ds2).where(2).equalTo { _.myLong }
   }
 
   @Test(expected = classOf[IncompatibleKeysException])
@@ -272,7 +266,7 @@ class CoGroupOperatorTest {
     val ds2 = env.fromCollection(customTypeData)
 
     // Should not work, more than one field position key
-    ds1.coGroup(ds2).where(1, 3).equalTo { _.l }
+    ds1.coGroup(ds2).where(1, 3).equalTo { _.myLong }
   }
 }
 
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/CustomType.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/CustomType.scala
deleted file mode 100644
index 94627b93dd48671b42bb89d1ece7d79d7c6a63a2..0000000000000000000000000000000000000000
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/CustomType.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.flink.api.scala.operators
-
-import java.io.Serializable
-
-/**
- * A custom data type that is used by the operator Tests.
- */
-class CustomType(var i:Int, var l: Long, var s: String) extends Serializable {
-  def this() {
-    this(0, 0, null)
-  }
-
-  override def toString: String = {
-    i + "," + l + "," + s
-  }
-}
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/DistinctITCase.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/DistinctITCase.scala
new file mode 100644
index 0000000000000000000000000000000000000000..855335def239e4f66720961c6a882d23747b406e
--- /dev/null
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/DistinctITCase.scala
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flink.api.scala.operators
+
+import org.apache.flink.api.scala.ExecutionEnvironment
+import org.apache.flink.api.scala.util.CollectionDataSets
+import org.apache.flink.configuration.Configuration
+import org.apache.flink.test.util.JavaProgramTestBase
+import org.junit.runner.RunWith
+import org.junit.runners.Parameterized
+import org.junit.runners.Parameterized.Parameters
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.flink.api.scala._
+
+
+object DistinctProgs {
+  var NUM_PROGRAMS: Int = 8
+
+
+  def runProgram(progId: Int, resultPath: String): String = {
+    progId match {
+      case 1 =>
+        /*
+         * Check correctness of distinct on tuples with key field selector
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.getSmall3TupleDataSet(env)
+
+        val distinctDs = ds.union(ds).distinct(0, 1, 2)
+        distinctDs.writeAsCsv(resultPath)
+
+        env.execute()
+
+        // return expected result
+        "1,1,Hi\n" +
+          "2,2,Hello\n" +
+          "3,2,Hello world\n"
+        
+      case 2 =>
+        /*
+         * check correctness of distinct on tuples with key field selector with not all fields
+         * selected
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.getSmall5TupleDataSet(env)
+
+        val distinctDs = ds.union(ds).distinct(0).map(_._1)
+        
+        distinctDs.writeAsText(resultPath)
+        env.execute()
+        "1\n" + "2\n"
+        
+      case 3 =>
+        /*
+         * check correctness of distinct on tuples with key extractor
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.getSmall5TupleDataSet(env)
+
+        val reduceDs = ds.union(ds).distinct(_._1).map(_._1)
+
+        reduceDs.writeAsText(resultPath)
+        env.execute()
+        "1\n" + "2\n"
+
+      case 4 =>
+        /*
+         * check correctness of distinct on custom type with type extractor
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.getCustomTypeDataSet(env)
+
+        val reduceDs = ds.distinct(_.myInt).map( t => new Tuple1(t.myInt))
+
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"
+
+      case 5 =>
+        /*
+         * check correctness of distinct on tuples
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.getSmall3TupleDataSet(env)
+
+        val distinctDs = ds.union(ds).distinct()
+
+        distinctDs.writeAsCsv(resultPath)
+        env.execute()
+        "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n"
+
+      case 6 =>
+        /*
+         * check correctness of distinct on custom type with tuple-returning type extractor
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.get5TupleDataSet(env)
+
+        val reduceDs = ds.distinct( t => (t._1, t._5)).map( t => (t._1, t._5) )
+
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1,1\n" + "2,1\n" + "2,2\n" + "3,2\n" + "3,3\n" + "4,1\n" + "4,2\n" + "5," +
+          "1\n" + "5,2\n" + "5,3\n"
+
+      case 7 =>
+        /*
+         * check correctness of distinct on tuples with field expressions
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.getSmall5TupleDataSet(env)
+
+        val reduceDs = ds.union(ds).distinct("_1").map(t => new Tuple1(t._1))
+
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1\n" + "2\n"
+
+      case 8 =>
+        /*
+         * check correctness of distinct on Pojos
+         */
+
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = CollectionDataSets.getDuplicatePojoDataSet(env)
+
+        val reduceDs = ds.distinct("nestedPojo.longNumber").map(_.nestedPojo.longNumber.toInt)
+
+        reduceDs.writeAsText(resultPath)
+        env.execute()
+        "10000\n20000\n30000\n"
+
+      case _ =>
+        throw new IllegalArgumentException("Invalid program id")
+    }
+  }
+}
+
+
+@RunWith(classOf[Parameterized])
+class DistinctITCase(config: Configuration) extends JavaProgramTestBase(config) {
+
+  private var curProgId: Int = config.getInteger("ProgramId", -1)
+  private var resultPath: String = null
+  private var expectedResult: String = null
+
+  protected override def preSubmit(): Unit = {
+    resultPath = getTempDirPath("result")
+  }
+
+  protected def testProgram(): Unit = {
+    expectedResult = DistinctProgs.runProgram(curProgId, resultPath)
+  }
+
+  protected override def postSubmit(): Unit = {
+    compareResultsByLinesInMemory(expectedResult, resultPath)
+  }
+}
+
+object DistinctITCase {
+  @Parameters
+  def getConfigurations: java.util.Collection[Array[AnyRef]] = {
+    val configs = mutable.MutableList[Array[AnyRef]]()
+    for (i <- 1 to DistinctProgs.NUM_PROGRAMS) {
+      val config = new Configuration()
+      config.setInteger("ProgramId", i)
+      configs += Array(config)
+    }
+
+    configs.asJavaCollection
+  }
+}
+
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/DistinctOperatorTest.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/DistinctOperatorTest.scala
index b146e1cc78d90e92a91d467fc0e9f1ee08882078..e9d214b7474ebbe8a9dc15010ee04da1e6c141db 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/DistinctOperatorTest.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/DistinctOperatorTest.scala
@@ -17,6 +17,7 @@
  */
 package org.apache.flink.api.scala.operators
 
+import org.apache.flink.api.scala.util.CollectionDataSets.CustomType
 import org.junit.Assert
 import org.apache.flink.api.common.InvalidProgramException
 import org.junit.Test
@@ -102,7 +103,7 @@ class DistinctOperatorTest {
     }
   }
 
-  @Test(expected = classOf[UnsupportedOperationException])
+  @Test(expected = classOf[RuntimeException])
   def testDistinctByKeyFields2(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val longDs = env.fromCollection(emptyLongData)
@@ -111,16 +112,16 @@ class DistinctOperatorTest {
     longDs.distinct("_1")
   }
 
-  @Test(expected = classOf[UnsupportedOperationException])
+  @Test(expected = classOf[RuntimeException])
   def testDistinctByKeyFields3(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val customDs = env.fromCollection(customTypeData)
 
-    // should not work: field key on custom type
+    // should not work: invalid fields
     customDs.distinct("_1")
   }
 
-  @Test(expected = classOf[IllegalArgumentException])
+  @Test(expected = classOf[RuntimeException])
   def testDistinctByKeyFields4(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val tupleDs = env.fromCollection(emptyTupleData)
@@ -129,12 +130,21 @@ class DistinctOperatorTest {
     tupleDs.distinct("foo")
   }
 
+  @Test
+  def testDistinctByKeyFields5(): Unit = {
+    val env = ExecutionEnvironment.getExecutionEnvironment
+    val customDs = env.fromCollection(customTypeData)
+
+    // should work
+    customDs.distinct("myInt")
+  }
+
   @Test
   def testDistinctByKeySelector1(): Unit = {
     val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
     try {
       val customDs = env.fromCollection(customTypeData)
-      customDs.distinct {_.l}
+      customDs.distinct {_.myLong}
     }
     catch {
       case e: Exception => Assert.fail()
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/ExamplesITCase.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/ExamplesITCase.scala
index d5ae6b6ba98520b9390764613684a56bfa2d90f3..f43052acb136d62321b98c20fa4e84ec9ae46096 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/ExamplesITCase.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/ExamplesITCase.scala
@@ -29,25 +29,36 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 // TODO case class Tuple2[T1, T2](_1: T1, _2: T2)
-// TODO case class Foo(a: Int, b: String)
+// TODO case class Foo(a: Int, b: String
 
-class Nested(var myLong: Long) {
+case class Nested(myLong: Long)
+
+class Pojo(var myString: String, var myInt: Int, var nested: Nested) {
   def this() = {
-    this(0);
+    this("", 0, new Nested(1))
   }
+
+  def this(myString: String, myInt: Int, myLong: Long) { this(myString, myInt, new Nested(myLong)) }
+
+  override def toString = s"myString=$myString myInt=$myInt nested.myLong=${nested.myLong}"
+}
+
+class NestedPojo(var myLong: Long) {
+  def this() { this(0) }
 }
-class Pojo(var myString: String, var myInt: Int, myLong: Long) {
-  var nested = new Nested(myLong)
 
+class PojoWithPojo(var myString: String, var myInt: Int, var nested: Nested) {
   def this() = {
-    this("", 0, 0)
+    this("", 0, new Nested(1))
   }
 
-  override def toString() = "myString="+myString+" myInt="+myInt+" nested.myLong="+nested.myLong
+  def this(myString: String, myInt: Int, myLong: Long) { this(myString, myInt, new Nested(myLong)) }
+
+  override def toString = s"myString=$myString myInt=$myInt nested.myLong=${nested.myLong}"
 }
 
 object ExampleProgs {
-  var NUM_PROGRAMS: Int = 3
+  var NUM_PROGRAMS: Int = 4
 
   def runProgram(progId: Int, resultPath: String, onCollection: Boolean): String = {
     progId match {
@@ -58,27 +69,53 @@ object ExampleProgs {
         val env = ExecutionEnvironment.getExecutionEnvironment
         val ds = env.fromElements( (("this","is"), 1), (("this", "is"),2), (("this","hello"),3) )
 
-        val grouped = ds.groupBy(0).reduce( { (e1, e2) => ((e1._1._1,e1._1._2), e1._2+e2._2)})
+        val grouped = ds.groupBy(0).reduce( { (e1, e2) => ((e1._1._1, e1._1._2), e1._2 + e2._2)})
         grouped.writeAsText(resultPath)
         env.execute()
         "((this,hello),3)\n((this,is),3)\n"
+
       case 2 =>
-      /*
-        Test nested tuples with int offset
-       */
-      val env = ExecutionEnvironment.getExecutionEnvironment
-      val ds = env.fromElements( (("this","is"), 1), (("this", "is"),2), (("this","hello"),3) )
-
-      val grouped = ds.groupBy("f0.f0").reduce( { (e1, e2) => ((e1._1._1,e1._1._2), e1._2+e2._2)})
-      grouped.writeAsText(resultPath)
-      env.execute()
-      "((this,is),6)\n"
+        /*
+          Test nested tuples with int offset
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = env.fromElements( (("this","is"), 1), (("this", "is"),2), (("this","hello"),3) )
+
+        val grouped = ds.groupBy("_1._1").reduce{
+          (e1, e2) => ((e1._1._1, e1._1._2), e1._2 + e2._2)
+        }
+        grouped.writeAsText(resultPath)
+        env.execute()
+        "((this,is),6)\n"
+
       case 3 =>
         /*
           Test nested pojos
          */
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromElements( new Pojo("one", 1, 1L),new Pojo("one", 1, 1L),new Pojo("two", 666, 2L) )
+        val ds = env.fromElements(
+          new PojoWithPojo("one", 1, 1L),
+          new PojoWithPojo("one", 1, 1L),
+          new PojoWithPojo("two", 666, 2L) )
+
+        val grouped = ds.groupBy("nested.myLong").reduce {
+          (p1, p2) =>
+            p1.myInt += p2.myInt
+            p1
+        }
+        grouped.writeAsText(resultPath)
+        env.execute()
+        "myString=two myInt=666 nested.myLong=2\nmyString=one myInt=2 nested.myLong=1\n"
+
+      case 4 =>
+        /*
+          Test pojo with nested case class
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds = env.fromElements(
+          new Pojo("one", 1, 1L),
+          new Pojo("one", 1, 1L),
+          new Pojo("two", 666, 2L) )
 
         val grouped = ds.groupBy("nested.myLong").reduce {
           (p1, p2) =>
@@ -124,4 +161,4 @@ object ExamplesITCase {
 
     configs.asJavaCollection
   }
-}
\ No newline at end of file
+}
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/GroupReduceITCase.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/GroupReduceITCase.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b796a81380692b8cfad9b5fc7019cb125183f9c9
--- /dev/null
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/GroupReduceITCase.scala
@@ -0,0 +1,574 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flink.api.scala.operators
+
+import java.lang.Iterable
+
+import org.apache.flink.api.common.functions._
+import org.apache.flink.api.common.operators.Order
+import org.apache.flink.api.scala.ExecutionEnvironment
+import org.apache.flink.api.scala.ExecutionEnvironment
+import org.apache.flink.api.scala.util.CollectionDataSets
+import org.apache.flink.api.scala.util.CollectionDataSets
+import org.apache.flink.api.scala.util.CollectionDataSets.CustomType
+import org.apache.flink.compiler.PactCompiler
+import org.apache.flink.configuration.Configuration
+import org.apache.flink.configuration.Configuration
+import org.apache.flink.test.util.JavaProgramTestBase
+import org.apache.flink.test.util.JavaProgramTestBase
+import org.apache.flink.util.Collector
+import org.junit.runner.RunWith
+import org.junit.runner.RunWith
+import org.junit.runners.Parameterized
+import org.junit.runners.Parameterized
+import org.junit.runners.Parameterized.Parameters
+import org.junit.runners.Parameterized.Parameters
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.flink.api.scala._
+
+
+object GroupReduceProgs {
+  var NUM_PROGRAMS: Int = 8
+
+  def runProgram(progId: Int, resultPath: String, onCollection: Boolean): String = {
+    progId match {
+      case 1 =>
+        /*
+         * check correctness of groupReduce on tuples with key field selector
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds =  CollectionDataSets.get3TupleDataSet(env)
+        val reduceDs =  ds.groupBy(1).reduceGroup {
+          in =>
+            in.map(t => (t._1, t._2)).reduce((l, r) => (l._1 + r._1, l._2))
+        }
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1,1\n" + "5,2\n" + "15,3\n" + "34,4\n" + "65,5\n" + "111,6\n"
+
+      case 2 =>
+        /*
+         * check correctness of groupReduce on tuples with multiple key field selector
+         */
+      val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds =  CollectionDataSets
+          .get5TupleDataSet(env)
+        val reduceDs =  ds.groupBy(4, 0).reduceGroup {
+          in =>
+            val (i, l, l2) = in
+              .map( t => (t._1, t._2, t._5))
+              .reduce((l, r) => (l._1, l._2 + r._2, l._3))
+            (i, l, 0, "P-)", l2)
+        }
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1,1,0,P-),1\n" + "2,3,0,P-),1\n" + "2,2,0,P-),2\n" + "3,9,0,P-),2\n" + "3,6,0," +
+          "P-),3\n" + "4,17,0,P-),1\n" + "4,17,0,P-),2\n" + "5,11,0,P-),1\n" + "5,29,0,P-)," +
+          "2\n" + "5,25,0,P-),3\n"
+
+      case 3 =>
+        /*
+         * check correctness of groupReduce on tuples with key field selector and group sorting
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        env.setDegreeOfParallelism(1)
+        val ds =  CollectionDataSets.get3TupleDataSet(env)
+        val reduceDs =  ds.groupBy(1).sortGroup(2, Order.ASCENDING).reduceGroup {
+          in =>
+            in.reduce((l, r) => (l._1 + r._1, l._2, l._3 + "-" + r._3))
+        }
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1,1,Hi\n" +
+          "5,2,Hello-Hello world\n" +
+          "15,3,Hello world, how are you?-I am fine.-Luke Skywalker\n" +
+          "34,4,Comment#1-Comment#2-Comment#3-Comment#4\n" +
+          "65,5,Comment#5-Comment#6-Comment#7-Comment#8-Comment#9\n" +
+          "111,6,Comment#10-Comment#11-Comment#12-Comment#13-Comment#14-Comment#15\n"
+
+      case 4 =>
+        /*
+         * check correctness of groupReduce on tuples with key extractor
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds =  CollectionDataSets.get3TupleDataSet(env)
+        val reduceDs =  ds.groupBy(_._2).reduceGroup {
+          in =>
+            in.map(t => (t._1, t._2)).reduce((l, r) => (l._1 + r._1, l._2))
+        }
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1,1\n" + "5,2\n" + "15,3\n" + "34,4\n" + "65,5\n" + "111,6\n"
+
+      case 5 =>
+        /*
+         * check correctness of groupReduce on custom type with type extractor
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds =  CollectionDataSets.getCustomTypeDataSet(env)
+        val reduceDs =  ds.groupBy(_.myInt).reduceGroup {
+          in =>
+            val iter = in.toIterator
+            val o = new CustomType
+            var c = iter.next()
+
+            o.myString = "Hello!"
+            o.myInt = c.myInt
+            o.myLong = c.myLong
+
+            while (iter.hasNext) {
+              val next = iter.next()
+              o.myLong += next.myLong
+            }
+            o
+        }
+        reduceDs.writeAsText(resultPath)
+        env.execute()
+        "1,0,Hello!\n" + "2,3,Hello!\n" + "3,12,Hello!\n" + "4,30,Hello!\n" + "5,60," +
+          "Hello!\n" + "6,105,Hello!\n"
+
+      case 6 =>
+        /*
+         * check correctness of all-groupreduce for tuples
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds =  CollectionDataSets.get3TupleDataSet(env)
+        val reduceDs =  ds.reduceGroup {
+          in =>
+            var i = 0
+            var l = 0L
+            for (t <- in) {
+              i += t._1
+              l += t._2
+            }
+            (i, l, "Hello World")
+        }
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "231,91,Hello World\n"
+
+      case 7 =>
+        /*
+         * check correctness of all-groupreduce for custom types
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val ds =  CollectionDataSets.getCustomTypeDataSet(env)
+        val reduceDs =  ds.reduceGroup {
+          in =>
+            val o = new CustomType(0, 0, "Hello!")
+            for (t <- in) {
+              o.myInt += t.myInt
+              o.myLong += t.myLong
+            }
+            o
+        }
+        reduceDs.writeAsText(resultPath)
+        env.execute()
+        "91,210,Hello!"
+
+      case 8 =>
+        /*
+         * check correctness of groupReduce with broadcast set
+         */
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        val intDs =  CollectionDataSets.getIntDataSet(env)
+        val ds =  CollectionDataSets.get3TupleDataSet(env)
+        val reduceDs =  ds.groupBy(1).reduceGroup(
+          new RichGroupReduceFunction[(Int, Long, String), (Int, Long, String)] {
+            private var f2Replace = ""
+
+            override def open(config: Configuration) {
+              val ints = this.getRuntimeContext.getBroadcastVariable[Int]("ints").asScala
+              f2Replace = ints.sum + ""
+            }
+
+            override def reduce(
+                values: Iterable[(Int, Long, String)],
+                out: Collector[(Int, Long, String)]): Unit = {
+              var i: Int = 0
+              var l: Long = 0L
+              for (t <- values.asScala) {
+                i += t._1
+                l = t._2
+              }
+              out.collect((i, l, f2Replace))
+            }
+          }).withBroadcastSet(intDs, "ints")
+        reduceDs.writeAsCsv(resultPath)
+        env.execute()
+        "1,1,55\n" + "5,2,55\n" + "15,3,55\n" + "34,4,55\n" + "65,5,55\n" + "111,6,55\n"
+
+//      case 9 =>
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds =  CollectionDataSets.get3TupleDataSet(env)
+//        val reduceDs =  ds.groupBy(1).reduceGroup(new
+//            GroupReduceITCase.InputReturningTuple3GroupReduce)
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        "11,1,Hi!\n" + "21,1,Hi again!\n" + "12,2,Hi!\n" + "22,2,Hi again!\n" + "13,2," +
+//          "Hi!\n" + "23,2,Hi again!\n"
+
+//      case 10 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds =  CollectionDataSets.getCustomTypeDataSet
+//        (env)
+//        val reduceDs =  ds.groupBy(new
+//            KeySelector[CollectionDataSets.CustomType, Integer] {
+//          def getKey(in: CollectionDataSets.CustomType): Integer = {
+//            return in.myInt
+//          }
+//        }).reduceGroup(new GroupReduceITCase.CustomTypeGroupReduceWithCombine)
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        if (collectionExecution) {
+//          return null
+//        }
+//        else {
+//          "1,0,test1\n" + "2,3,test2\n" + "3,12,test3\n" + "4,30,test4\n" + "5,60," +
+//            "test5\n" + "6,105,test6\n"
+//        }
+//      }
+//      case 11 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(2)
+//        val ds =  CollectionDataSets.get3TupleDataSet(env)
+//        val reduceDs =  ds.groupBy(1).reduceGroup(new
+//            GroupReduceITCase.Tuple3GroupReduceWithCombine)
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        if (collectionExecution) {
+//          return null
+//        }
+//        else {
+//          "1,test1\n" + "5,test2\n" + "15,test3\n" + "34,test4\n" + "65,test5\n" + "111," +
+//            "test6\n"
+//        }
+//      }
+//
+//
+//      // all-groupreduce with combine
+//
+//
+//      case 12 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds =  CollectionDataSets.get3TupleDataSet(env)
+//          .map(new GroupReduceITCase.IdentityMapper[Tuple3[Integer, Long,
+//          String]]).setParallelism(4)
+//        val cfg: Configuration = new Configuration
+//        cfg.setString(PactCompiler.HINT_SHIP_STRATEGY,
+// PactCompiler.HINT_SHIP_STRATEGY_REPARTITION)
+//        val reduceDs =  ds.reduceGroup(new GroupReduceITCase
+//        .Tuple3AllGroupReduceWithCombine).withParameters(cfg)
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        if (collectionExecution) {
+//          return null
+//        }
+//        else {
+//          "322," +
+//          "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n"
+//        }
+//      }
+//      case 13 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets.get3TupleDataSet(env)
+//        val reduceDs =  ds.groupBy(1).sortGroup(2,
+//          Order.DESCENDING).reduceGroup(new GroupReduceITCase.Tuple3SortedGroupReduce)
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        "1,1,Hi\n" + "5,2,Hello world-Hello\n" + "15,3,Luke Skywalker-I am fine.-Hello " +
+//          "world, how are you?\n" + "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" + "65,5," +
+//          "Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" + "111,6," +
+//          "Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n"
+//      }
+//      case 14 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds =  CollectionDataSets
+//          .get5TupleDataSet(env)
+//        val reduceDs: DataSet[Tuple5[Integer, Long, Integer, String,
+//          Long]] = ds.groupBy(new KeySelector[Tuple5[Integer, Long, Integer, String, Long],
+//          Tuple2[Integer, Long]] {
+//          def getKey(t: Tuple5[Integer, Long, Integer, String, Long]): Tuple2[Integer, Long] = {
+//            return new Tuple2[Integer, Long](t.f0, t.f4)
+//          }
+//        }).reduceGroup(new GroupReduceITCase.Tuple5GroupReduce)
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        "1,1,0,P-),1\n" + "2,3,0,P-),1\n" + "2,2,0,P-),2\n" + "3,9,0,P-),2\n" + "3,6,0," +
+//          "P-),3\n" + "4,17,0,P-),1\n" + "4,17,0,P-),2\n" + "5,11,0,P-),1\n" + "5,29,0,P-)," +
+//          "2\n" + "5,25,0,P-),3\n"
+//      }
+//      case 15 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets.get3TupleDataSet(env)
+//        val reduceDs =  ds.groupBy(1).sortGroup(0,
+//          Order.ASCENDING).reduceGroup(new GroupReduceITCase.OrderCheckingCombinableReduce)
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        "1,1,Hi\n" + "2,2,Hello\n" + "4,3,Hello world, how are you?\n" + "7,4," +
+//          "Comment#1\n" + "11,5,Comment#5\n" + "16,6,Comment#10\n"
+//      }
+//      case 16 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds =  CollectionDataSets
+//          .getCrazyNestedDataSet(env)
+//        val reduceDs =  ds.groupBy("nest_Lvl1.nest_Lvl2" +
+//          ".nest_Lvl3.nest_Lvl4.f1nal").reduceGroup(new GroupReduceFunction[CollectionDataSets
+//        .CrazyNested, Tuple2[String, Integer]] {
+//          def reduce(values: Iterable[CollectionDataSets.CrazyNested],
+//                     out: Collector[Tuple2[String, Integer]]) {
+//            var c: Int = 0
+//            var n: String = null
+//            import scala.collection.JavaConversions._
+//            for (v <- values) {
+//              c += 1
+//              n = v.nest_Lvl1.nest_Lvl2.nest_Lvl3.nest_Lvl4.f1nal
+//            }
+//            out.collect(new Tuple2[String, Integer](n, c))
+//          }
+//        })
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        "aa,1\nbb,2\ncc,3\n"
+//      }
+//      case 17 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds =  CollectionDataSets
+//          .getPojoExtendingFromTuple(env)
+//        val reduceDs =  ds.groupBy("special",
+//          "f2")
+// .reduceGroup(new GroupReduceFunction[CollectionDataSets.FromTupleWithCTor, Integer] {
+//          def reduce(values: Iterable[CollectionDataSets.FromTupleWithCTor],
+//                     out: Collector[Integer]) {
+//            var c: Int = 0
+//            import scala.collection.JavaConversions._
+//            for (v <- values) {
+//              c += 1
+//            }
+//            out.collect(c)
+//          }
+//        })
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "3\n2\n"
+//      }
+//      case 18 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds =  CollectionDataSets
+//          .getPojoContainingTupleAndWritable(env)
+//        val reduceDs =  ds.groupBy("hadoopFan", "theTuple.*").reduceGroup(new
+//            GroupReduceFunction[CollectionDataSets.PojoContainingTupleAndWritable, Integer] {
+//          def reduce(values: Iterable[CollectionDataSets.PojoContainingTupleAndWritable],
+//                     out: Collector[Integer]) {
+//            var c: Int = 0
+//            import scala.collection.JavaConversions._
+//            for (v <- values) {
+//              c += 1
+//            }
+//            out.collect(c)
+//          }
+//        })
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "1\n5\n"
+//      }
+//      case 19 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        val ds: DataSet[Tuple3[Integer, CollectionDataSets.CrazyNested,
+//          CollectionDataSets.POJO]] = CollectionDataSets.getTupleContainingPojos(env)
+//        val reduceDs =  ds.groupBy("f0", "f1.*").reduceGroup(new
+//            GroupReduceFunction[Tuple3[Integer, CollectionDataSets.CrazyNested,
+//              CollectionDataSets.POJO], Integer] {
+//          def reduce(values: Iterable[Tuple3[Integer, CollectionDataSets.CrazyNested,
+//            CollectionDataSets.POJO]], out: Collector[Integer]) {
+//            var c: Int = 0
+//            import scala.collection.JavaConversions._
+//            for (v <- values) {
+//              c += 1
+//            }
+//            out.collect(c)
+//          }
+//        })
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "3\n1\n"
+//      }
+//      case 20 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets.get3TupleDataSet(env)
+//        val reduceDs =  ds.groupBy(1).sortGroup("f2",
+//          Order.DESCENDING).reduceGroup(new GroupReduceITCase.Tuple3SortedGroupReduce)
+//        reduceDs.writeAsCsv(resultPath)
+//        env.execute()
+//        "1,1,Hi\n" + "5,2,Hello world-Hello\n" + "15,3,Luke Skywalker-I am fine.-Hello " +
+//          "world, how are you?\n" + "34,4,Comment#4-Comment#3-Comment#2-Comment#1\n" + "65,5," +
+//          "Comment#9-Comment#8-Comment#7-Comment#6-Comment#5\n" + "111,6," +
+//          "Comment#15-Comment#14-Comment#13-Comment#12-Comment#11-Comment#10\n"
+//      }
+//      case 21 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets
+//          .getGroupSortedNestedTupleDataSet(env)
+//        val reduceDs =  ds.groupBy("f1").sortGroup(0,
+//          Order.DESCENDING).reduceGroup(new GroupReduceITCase.NestedTupleReducer)
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "a--(1,1)-(1,2)-(1,3)-\n" + "b--(2,2)-\n" + "c--(3,3)-(3,6)-(3,9)-\n"
+//      }
+//      case 22 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets
+//          .getGroupSortedNestedTupleDataSet(env)
+//        val reduceDs =  ds.groupBy("f1").sortGroup("f0.f0",
+//          Order.ASCENDING).reduceGroup(new GroupReduceITCase.NestedTupleReducer)
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "a--(1,3)-(1,2)-(2,1)-\n" + "b--(2,2)-\n" + "c--(3,3)-(3,6)-(4,9)-\n"
+//      }
+//      case 23 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets
+//          .getGroupSortedNestedTupleDataSet(env)
+//        val reduceDs =  ds.groupBy("f1").sortGroup("f0.f0",
+//          Order.DESCENDING).reduceGroup(new GroupReduceITCase.NestedTupleReducer)
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "a--(2,1)-(1,3)-(1,2)-\n" + "b--(2,2)-\n" + "c--(4,9)-(3,3)-(3,6)-\n"
+//      }
+//      case 24 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets
+//          .getGroupSortedNestedTupleDataSet(env)
+//        val reduceDs =  ds.groupBy("f1").sortGroup("f0.f0",
+//          Order.DESCENDING).sortGroup("f0.f1", Order.DESCENDING).reduceGroup(new
+//            GroupReduceITCase.NestedTupleReducer)
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "a--(2,1)-(1,3)-(1,2)-\n" + "b--(2,2)-\n" + "c--(4,9)-(3,6)-(3,3)-\n"
+//      }
+//      case 25 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets
+//          .getGroupSortedPojoContainingTupleAndWritable(env)
+//        val reduceDs =  ds.groupBy("hadoopFan").sortGroup("theTuple.f0",
+//          Order.DESCENDING)
+// .sortGroup("theTuple.f1", Order.DESCENDING)
+// .reduceGroup(new GroupReduceFunction[CollectionDataSets.PojoContainingTupleAndWritable, String] {
+//  def reduce(values: Iterable[CollectionDataSets.PojoContainingTupleAndWritable],
+// out: Collector[String]) {
+//            var once: Boolean = false
+//            val concat: StringBuilder = new StringBuilder
+//            import scala.collection.JavaConversions._
+//            for (value <- values) {
+//              if (!once) {
+//                concat.append(value.hadoopFan.get)
+//                concat.append("---")
+//                once = true
+//              }
+//              concat.append(value.theTuple)
+//              concat.append("-")
+//            }
+//            out.collect(concat.toString)
+//          }
+//        })
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "1---(10,100)-\n" + "2---(30,600)-(30,400)-(30,200)-(20,201)-(20,200)-\n"
+//      }
+//      case 26 => {
+//        val env = ExecutionEnvironment.getExecutionEnvironment
+//        env.setDegreeOfParallelism(1)
+//        val ds =  CollectionDataSets.getPojoWithMultiplePojos(env)
+//        val reduceDs =  ds.groupBy("hadoopFan")
+// .sortGroup("theTuple.f0", Order.DESCENDING).sortGroup("theTuple.f1", Order.DESCENDING)
+// .reduceGroup(new GroupReduceFunction[CollectionDataSets.PojoContainingTupleAndWritable, String] {
+//          def reduce(values: Iterable[CollectionDataSets.PojoContainingTupleAndWritable],
+// out: Collector[String]) {
+//            var once: Boolean = false
+//            val concat: StringBuilder = new StringBuilder
+//            import scala.collection.JavaConversions._
+//            for (value <- values) {
+//              if (!once) {
+//                concat.append(value.hadoopFan.get)
+//                concat.append("---")
+//                once = true
+//              }
+//              concat.append(value.theTuple)
+//              concat.append("-")
+//            }
+//            out.collect(concat.toString)
+//          }
+//        })
+//        reduceDs.writeAsText(resultPath)
+//        env.execute()
+//        "1---(10,100)-\n" + "2---(30,600)-(30,400)-(30,200)-(20,201)-(20,200)-\n"
+//      }
+//      case _ => {
+//        throw new IllegalArgumentException("Invalid program id")
+//      }
+    }
+  }
+}
+
+
+@RunWith(classOf[Parameterized])
+class GroupReduceITCase(config: Configuration) extends JavaProgramTestBase(config) {
+
+  private var curProgId: Int = config.getInteger("ProgramId", -1)
+  private var resultPath: String = null
+  private var expectedResult: String = null
+
+  protected override def preSubmit(): Unit = {
+    resultPath = getTempDirPath("result")
+  }
+
+  protected def testProgram(): Unit = {
+    expectedResult = GroupReduceProgs.runProgram(curProgId, resultPath, isCollectionExecution)
+  }
+
+  protected override def postSubmit(): Unit = {
+    compareResultsByLinesInMemory(expectedResult, resultPath)
+  }
+}
+
+object GroupReduceITCase {
+  @Parameters
+  def getConfigurations: java.util.Collection[Array[AnyRef]] = {
+    val configs = mutable.MutableList[Array[AnyRef]]()
+    for (i <- 1 to GroupReduceProgs.NUM_PROGRAMS) {
+      val config = new Configuration()
+      config.setInteger("ProgramId", i)
+      configs += Array(config)
+    }
+
+    configs.asJavaCollection
+  }
+}
+
+
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/GroupingTest.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/GroupingTest.scala
index dd1ac99f18000ab1c734af2501c50244c9d3c5ca..fe1dd4393631997a8aad453ff9c20bf8ba94cadb 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/GroupingTest.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/GroupingTest.scala
@@ -17,10 +17,10 @@
  */
 package org.apache.flink.api.scala.operators
 
+import org.apache.flink.api.scala.util.CollectionDataSets.CustomType
 import org.junit.Assert
 import org.apache.flink.api.common.InvalidProgramException
 import org.apache.flink.api.common.operators.Order
-import org.junit.Ignore
 import org.junit.Test
 
 import org.apache.flink.api.scala._
@@ -96,7 +96,7 @@ class GroupingTest {
     }
   }
 
-  @Test(expected = classOf[UnsupportedOperationException])
+  @Test(expected = classOf[IllegalArgumentException])
   def testGroupByKeyFields2(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val longDs = env.fromCollection(emptyLongData)
@@ -105,7 +105,7 @@ class GroupingTest {
     longDs.groupBy("_1")
   }
 
-  @Test(expected = classOf[UnsupportedOperationException])
+  @Test(expected = classOf[IllegalArgumentException])
   def testGroupByKeyFields3(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val customDs = env.fromCollection(customTypeData)
@@ -114,7 +114,7 @@ class GroupingTest {
     customDs.groupBy("_1")
   }
 
-  @Test(expected = classOf[IllegalArgumentException])
+  @Test(expected = classOf[RuntimeException])
   def testGroupByKeyFields4(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val tupleDs = env.fromCollection(emptyTupleData)
@@ -123,7 +123,15 @@ class GroupingTest {
     tupleDs.groupBy("foo")
   }
 
-  @Ignore
+  @Test
+  def testGroupByKeyFields5(): Unit = {
+    val env = ExecutionEnvironment.getExecutionEnvironment
+    val customDs = env.fromCollection(customTypeData)
+
+    // should not work
+    customDs.groupBy("myInt")
+  }
+
   @Test
   def testGroupByKeyExpressions1(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
@@ -131,24 +139,22 @@ class GroupingTest {
 
     // should work
     try {
-//      ds.groupBy("i");
+      ds.groupBy("myInt")
     }
     catch {
       case e: Exception => Assert.fail()
     }
   }
 
-  @Ignore
-  @Test(expected = classOf[UnsupportedOperationException])
+  @Test(expected = classOf[IllegalArgumentException])
   def testGroupByKeyExpressions2(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
 
     // should not work: groups on basic type
-//    longDs.groupBy("l");
     val longDs = env.fromCollection(emptyLongData)
+    longDs.groupBy("l")
   }
 
-  @Ignore
   @Test(expected = classOf[InvalidProgramException])
   def testGroupByKeyExpressions3(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
@@ -158,14 +164,13 @@ class GroupingTest {
     customDs.groupBy(0)
   }
 
-  @Ignore
   @Test(expected = classOf[IllegalArgumentException])
   def testGroupByKeyExpressions4(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds = env.fromCollection(customTypeData)
 
     // should not work, non-existent field
-//    ds.groupBy("myNonExistent");
+    ds.groupBy("myNonExistent")
   }
 
   @Test
@@ -173,7 +178,7 @@ class GroupingTest {
     val env = ExecutionEnvironment.getExecutionEnvironment
     try {
       val customDs = env.fromCollection(customTypeData)
-      customDs.groupBy { _.l }
+      customDs.groupBy { _.myLong }
     }
     catch {
       case e: Exception => Assert.fail()
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/JoinOperatorTest.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/JoinOperatorTest.scala
index cae936da804bbe02f683db1c4a5f7384ff14a040..02191544684168f331a46d349ca1b29f771f57c6 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/JoinOperatorTest.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/JoinOperatorTest.scala
@@ -18,6 +18,7 @@
 package org.apache.flink.api.scala.operators
 
 import org.apache.flink.api.java.operators.Keys.IncompatibleKeysException
+import org.apache.flink.api.scala.util.CollectionDataSets.CustomType
 import org.junit.Assert
 import org.apache.flink.api.common.InvalidProgramException
 import org.junit.Ignore
@@ -132,7 +133,7 @@ class JoinOperatorTest {
     ds1.join(ds2).where("_1", "_2").equalTo("_3")
   }
 
-  @Test(expected = classOf[IllegalArgumentException])
+  @Test(expected = classOf[RuntimeException])
   def testJoinKeyFields4(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(emptyTupleData)
@@ -142,7 +143,7 @@ class JoinOperatorTest {
     ds1.join(ds2).where("foo").equalTo("_1")
   }
 
-  @Test(expected = classOf[IllegalArgumentException])
+  @Test(expected = classOf[RuntimeException])
   def testJoinKeyFields5(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(emptyTupleData)
@@ -152,7 +153,7 @@ class JoinOperatorTest {
     ds1.join(ds2).where("_1").equalTo("bar")
   }
 
-  @Test(expected = classOf[UnsupportedOperationException])
+  @Test(expected = classOf[IllegalArgumentException])
   def testJoinKeyFields6(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(emptyTupleData)
@@ -162,7 +163,6 @@ class JoinOperatorTest {
     ds1.join(ds2).where("_2").equalTo("_1")
   }
 
-  @Ignore
   @Test
   def testJoinKeyExpressions1(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
@@ -171,36 +171,33 @@ class JoinOperatorTest {
 
     // should work
     try {
-//      ds1.join(ds2).where("i").equalTo("i")
+      ds1.join(ds2).where("myInt").equalTo("myInt")
     }
     catch {
       case e: Exception => Assert.fail()
     }
   }
 
-  @Ignore
-  @Test(expected = classOf[InvalidProgramException])
+  @Test(expected = classOf[IncompatibleKeysException])
   def testJoinKeyExpressions2(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(customTypeData)
     val ds2 = env.fromCollection(customTypeData)
 
     // should not work, incompatible join key types
-//    ds1.join(ds2).where("i").equalTo("s")
+    ds1.join(ds2).where("myInt").equalTo("myString")
   }
 
-  @Ignore
-  @Test(expected = classOf[InvalidProgramException])
+  @Test(expected = classOf[IncompatibleKeysException])
   def testJoinKeyExpressions3(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
     val ds1 = env.fromCollection(customTypeData)
     val ds2 = env.fromCollection(customTypeData)
 
     // should not work, incompatible number of keys
-//    ds1.join(ds2).where("i", "s").equalTo("i")
+    ds1.join(ds2).where("myInt", "myString").equalTo("myInt")
   }
 
-  @Ignore
   @Test(expected = classOf[IllegalArgumentException])
   def testJoinKeyExpressions4(): Unit = {
     val env = ExecutionEnvironment.getExecutionEnvironment
@@ -208,7 +205,7 @@ class JoinOperatorTest {
     val ds2 = env.fromCollection(customTypeData)
 
     // should not work, join key non-existent
-//    ds1.join(ds2).where("myNonExistent").equalTo("i")
+    ds1.join(ds2).where("myNonExistent").equalTo("i")
   }
 
   @Test
@@ -219,7 +216,7 @@ class JoinOperatorTest {
 
     // should work
     try {
-      ds1.join(ds2).where { _.l} equalTo { _.l }
+      ds1.join(ds2).where { _.myLong} equalTo { _.myLong }
     }
     catch {
       case e: Exception => Assert.fail()
@@ -234,7 +231,7 @@ class JoinOperatorTest {
 
     // should work
     try {
-      ds1.join(ds2).where { _.l }.equalTo(3)
+      ds1.join(ds2).where { _.myLong }.equalTo(3)
     }
     catch {
       case e: Exception => Assert.fail()
@@ -249,7 +246,7 @@ class JoinOperatorTest {
 
     // should work
     try {
-      ds1.join(ds2).where(3).equalTo { _.l }
+      ds1.join(ds2).where(3).equalTo { _.myLong }
     }
     catch {
       case e: Exception => Assert.fail()
@@ -263,7 +260,7 @@ class JoinOperatorTest {
     val ds2 = env.fromCollection(customTypeData)
 
     // should not work, incompatible types
-    ds1.join(ds2).where(2).equalTo { _.l }
+    ds1.join(ds2).where(2).equalTo { _.myLong }
   }
 
   @Test(expected = classOf[IncompatibleKeysException])
@@ -273,7 +270,7 @@ class JoinOperatorTest {
     val ds2 = env.fromCollection(customTypeData)
 
     // should not work, more than one field position key
-    ds1.join(ds2).where(1, 3) equalTo { _.l }
+    ds1.join(ds2).where(1, 3) equalTo { _.myLong }
   }
 }
 
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/PartitionITCase.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/PartitionITCase.scala
index a8447a9a82750ff27a15388981311907671fd519..bea91dfaac07cd2c67f10fa964b403f3fbdcdfe0 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/PartitionITCase.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/operators/PartitionITCase.scala
@@ -19,6 +19,7 @@ package org.apache.flink.api.scala.operators
 
 import org.apache.flink.api.common.functions.{RichFilterFunction, RichMapFunction}
 import org.apache.flink.api.scala.ExecutionEnvironment
+import org.apache.flink.api.scala.util.CollectionDataSets
 import org.apache.flink.configuration.Configuration
 import org.apache.flink.test.util.JavaProgramTestBase
 import org.junit.runner.RunWith
@@ -32,40 +33,18 @@ import org.apache.flink.api.scala._
 
 
 object PartitionProgs {
-  var NUM_PROGRAMS: Int = 6
-
-  val tupleInput = Array(
-    (1, "Foo"),
-    (1, "Foo"),
-    (1, "Foo"),
-    (2, "Foo"),
-    (2, "Foo"),
-    (2, "Foo"),
-    (2, "Foo"),
-    (2, "Foo"),
-    (3, "Foo"),
-    (3, "Foo"),
-    (3, "Foo"),
-    (4, "Foo"),
-    (4, "Foo"),
-    (4, "Foo"),
-    (4, "Foo"),
-    (5, "Foo"),
-    (5, "Foo"),
-    (6, "Foo"),
-    (6, "Foo"),
-    (6, "Foo"),
-    (6, "Foo")
-  )
-
+  var NUM_PROGRAMS: Int = 7
 
   def runProgram(progId: Int, resultPath: String, onCollection: Boolean): String = {
     progId match {
       case 1 =>
+        /*
+         * Test hash partition by tuple field
+         */
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromCollection(tupleInput)
+        val ds = CollectionDataSets.get3TupleDataSet(env)
 
-        val unique = ds.partitionByHash(0).mapPartition( _.map(_._1).toSet )
+        val unique = ds.partitionByHash(1).mapPartition( _.map(_._2).toSet )
 
         unique.writeAsText(resultPath)
         env.execute()
@@ -73,16 +52,22 @@ object PartitionProgs {
         "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"
 
       case 2 =>
+        /*
+         * Test hash partition by key selector
+         */
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromCollection(tupleInput)
-        val unique = ds.partitionByHash( _._1 ).mapPartition( _.map(_._1).toSet )
+        val ds = CollectionDataSets.get3TupleDataSet(env)
+        val unique = ds.partitionByHash( _._2 ).mapPartition( _.map(_._2).toSet )
 
         unique.writeAsText(resultPath)
         env.execute()
         "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"
 
       case 3 =>
-        val env = ExecutionEnvironment.getExecutionEnvironment
+        /*
+         * Test forced rebalancing
+         */
+      val env = ExecutionEnvironment.getExecutionEnvironment
         val ds = env.generateSequence(1, 3000)
 
         val skewed = ds.filter(_ > 780)
@@ -101,8 +86,8 @@ object PartitionProgs {
         countsInPartition.writeAsText(resultPath)
         env.execute()
 
-        val numPerPartition : Int = 2220 / env.getDegreeOfParallelism / 10;
-        var result = "";
+        val numPerPartition : Int = 2220 / env.getDegreeOfParallelism / 10
+        var result = ""
         for (i <- 0 until env.getDegreeOfParallelism) {
           result += "(" + i + "," + numPerPartition + ")\n"
         }
@@ -112,10 +97,12 @@ object PartitionProgs {
         // Verify that mapPartition operation after repartition picks up correct
         // DOP
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromCollection(tupleInput)
+        val ds = CollectionDataSets.get3TupleDataSet(env)
         env.setDegreeOfParallelism(1)
 
-        val unique = ds.partitionByHash(0).setParallelism(4).mapPartition( _.map(_._1).toSet )
+        val unique = ds.partitionByHash(1)
+          .setParallelism(4)
+          .mapPartition( _.map(_._2).toSet )
 
         unique.writeAsText(resultPath)
         env.execute()
@@ -126,13 +113,13 @@ object PartitionProgs {
         // Verify that map operation after repartition picks up correct
         // DOP
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromCollection(tupleInput)
+        val ds = CollectionDataSets.get3TupleDataSet(env)
         env.setDegreeOfParallelism(1)
 
         val count = ds.partitionByHash(0).setParallelism(4).map(
-          new RichMapFunction[(Int, String), Tuple1[Int]] {
+          new RichMapFunction[(Int, Long, String), Tuple1[Int]] {
             var first = true
-            override def map(in: (Int, String)): Tuple1[Int] = {
+            override def map(in: (Int, Long, String)): Tuple1[Int] = {
               // only output one value with count 1
               if (first) {
                 first = false
@@ -152,13 +139,13 @@ object PartitionProgs {
         // Verify that filter operation after repartition picks up correct
         // DOP
         val env = ExecutionEnvironment.getExecutionEnvironment
-        val ds = env.fromCollection(tupleInput)
+        val ds = CollectionDataSets.get3TupleDataSet(env)
         env.setDegreeOfParallelism(1)
 
         val count = ds.partitionByHash(0).setParallelism(4).filter(
-          new RichFilterFunction[(Int, String)] {
+          new RichFilterFunction[(Int, Long, String)] {
             var first = true
-            override def filter(in: (Int, String)): Boolean = {
+            override def filter(in: (Int, Long, String)): Boolean = {
               // only output one value with count 1
               if (first) {
                 first = false
@@ -175,6 +162,19 @@ object PartitionProgs {
 
         if (onCollection) "(1)\n" else "(4)\n"
 
+      case 7 =>
+        val env = ExecutionEnvironment.getExecutionEnvironment
+        env.setDegreeOfParallelism(3)
+        val ds = CollectionDataSets.getDuplicatePojoDataSet(env)
+        val uniqLongs = ds
+          .partitionByHash("nestedPojo.longNumber")
+          .setParallelism(4)
+          .mapPartition( _.map(_.nestedPojo.longNumber).toSet )
+
+        uniqLongs.writeAsText(resultPath)
+        env.execute()
+        "10000\n" + "20000\n" + "30000\n"
+
       case _ =>
         throw new IllegalArgumentException("Invalid program id")
     }
@@ -194,7 +194,7 @@ class PartitionITCase(config: Configuration) extends JavaProgramTestBase(config)
   }
 
   protected def testProgram(): Unit = {
-    expectedResult = PartitionProgs.runProgram(curProgId, resultPath, isCollectionExecution)
+    expectedResult = GroupReduceProgs.runProgram(curProgId, resultPath, isCollectionExecution)
   }
 
   protected override def postSubmit(): Unit = {
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/types/TypeInformationGenTest.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/types/TypeInformationGenTest.scala
index d33da4109b0b7b9a9645e493a5f6b358724f9afb..2b2d3a91becf567e353c963181af6081b1a7cfa1 100644
--- a/flink-scala/src/test/scala/org/apache/flink/api/scala/types/TypeInformationGenTest.scala
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/types/TypeInformationGenTest.scala
@@ -44,7 +44,9 @@ class CustomType(var myField1: String, var myField2: Int) {
   }
 }
 
-class MyObject[A](var a: A)
+class MyObject[A](var a: A) {
+  def this() { this(null.asInstanceOf[A]) }
+}
 
 class TypeInformationGenTest {
 
@@ -139,7 +141,7 @@ class TypeInformationGenTest {
 
     Assert.assertFalse(ti.isBasicType)
     Assert.assertFalse(ti.isTupleType)
-    Assert.assertTrue(ti.isInstanceOf[GenericTypeInfo[_]])
+    Assert.assertTrue(ti.isInstanceOf[PojoTypeInfo[_]])
     Assert.assertEquals(ti.getTypeClass, classOf[CustomType])
   }
 
@@ -152,7 +154,7 @@ class TypeInformationGenTest {
     val tti = ti.asInstanceOf[TupleTypeInfoBase[_]]
     Assert.assertEquals(classOf[Tuple2[_, _]], tti.getTypeClass)
     Assert.assertEquals(classOf[java.lang.Long], tti.getTypeAt(0).getTypeClass)
-    Assert.assertTrue(tti.getTypeAt(1).isInstanceOf[GenericTypeInfo[_]])
+    Assert.assertTrue(tti.getTypeAt(1).isInstanceOf[PojoTypeInfo[_]])
     Assert.assertEquals(classOf[CustomType], tti.getTypeAt(1).getTypeClass)
   }
 
@@ -235,7 +237,7 @@ class TypeInformationGenTest {
   def testParamertizedCustomObject(): Unit = {
     val ti = createTypeInformation[MyObject[String]]
 
-    Assert.assertTrue(ti.isInstanceOf[GenericTypeInfo[_]])
+    Assert.assertTrue(ti.isInstanceOf[PojoTypeInfo[_]])
   }
 
   @Test
diff --git a/flink-scala/src/test/scala/org/apache/flink/api/scala/util/CollectionDataSets.scala b/flink-scala/src/test/scala/org/apache/flink/api/scala/util/CollectionDataSets.scala
new file mode 100644
index 0000000000000000000000000000000000000000..60f86a0f7bf2a4964ab10eefed2e6388da5cdbd5
--- /dev/null
+++ b/flink-scala/src/test/scala/org/apache/flink/api/scala/util/CollectionDataSets.scala
@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.flink.api.scala.util
+
+import org.apache.hadoop.io.IntWritable
+
+import org.apache.flink.api.scala._
+
+import scala.collection.mutable
+import scala.util.Random
+
+/**
+ * #################################################################################################
+ *
+ * BE AWARE THAT OTHER TESTS DEPEND ON THIS TEST DATA. 
+ * IF YOU MODIFY THE DATA MAKE SURE YOU CHECK THAT ALL TESTS ARE STILL WORKING!
+ *
+ * #################################################################################################
+ */
+object CollectionDataSets {
+  def get3TupleDataSet(env: ExecutionEnvironment): DataSet[(Int, Long, String)] = {
+    val data = new mutable.MutableList[(Int, Long, String)]
+    data.+=((1, 1L, "Hi"))
+    data.+=((2, 2L, "Hello"))
+    data.+=((3, 2L, "Hello world"))
+    data.+=((4, 3L, "Hello world, how are you?"))
+    data.+=((5, 3L, "I am fine."))
+    data.+=((6, 3L, "Luke Skywalker"))
+    data.+=((7, 4L, "Comment#1"))
+    data.+=((8, 4L, "Comment#2"))
+    data.+=((9, 4L, "Comment#3"))
+    data.+=((10, 4L, "Comment#4"))
+    data.+=((11, 5L, "Comment#5"))
+    data.+=((12, 5L, "Comment#6"))
+    data.+=((13, 5L, "Comment#7"))
+    data.+=((14, 5L, "Comment#8"))
+    data.+=((15, 5L, "Comment#9"))
+    data.+=((16, 6L, "Comment#10"))
+    data.+=((17, 6L, "Comment#11"))
+    data.+=((18, 6L, "Comment#12"))
+    data.+=((19, 6L, "Comment#13"))
+    data.+=((20, 6L, "Comment#14"))
+    data.+=((21, 6L, "Comment#15"))
+    Random.shuffle(data)
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getSmall3TupleDataSet(env: ExecutionEnvironment): DataSet[(Int, Long, String)] = {
+    val data = new mutable.MutableList[(Int, Long, String)]
+    data.+=((1, 1L, "Hi"))
+    data.+=((2, 2L, "Hello"))
+    data.+=((3, 2L, "Hello world"))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def get5TupleDataSet(env: ExecutionEnvironment): DataSet[(Int, Long, Int, String, Long)] = {
+    val data = new mutable.MutableList[(Int, Long, Int, String, Long)]
+    data.+=((1, 1L, 0, "Hallo", 1L))
+    data.+=((2, 2L, 1, "Hallo Welt", 2L))
+    data.+=((2, 3L, 2, "Hallo Welt wie", 1L))
+    data.+=((3, 4L, 3, "Hallo Welt wie gehts?", 2L))
+    data.+=((3, 5L, 4, "ABC", 2L))
+    data.+=((3, 6L, 5, "BCD", 3L))
+    data.+=((4, 7L, 6, "CDE", 2L))
+    data.+=((4, 8L, 7, "DEF", 1L))
+    data.+=((4, 9L, 8, "EFG", 1L))
+    data.+=((4, 10L, 9, "FGH", 2L))
+    data.+=((5, 11L, 10, "GHI", 1L))
+    data.+=((5, 12L, 11, "HIJ", 3L))
+    data.+=((5, 13L, 12, "IJK", 3L))
+    data.+=((5, 14L, 13, "JKL", 2L))
+    data.+=((5, 15L, 14, "KLM", 2L))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getSmall5TupleDataSet(env: ExecutionEnvironment): DataSet[(Int, Long, Int, String, Long)] = {
+    val data = new mutable.MutableList[(Int, Long, Int, String, Long)]
+    data.+=((1, 1L, 0, "Hallo", 1L))
+    data.+=((2, 2L, 1, "Hallo Welt", 2L))
+    data.+=((2, 3L, 2, "Hallo Welt wie", 1L))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getSmallNestedTupleDataSet(env: ExecutionEnvironment): DataSet[((Int, Int), String)] = {
+    val data = new mutable.MutableList[((Int, Int), String)]
+    data.+=(((1, 1), "one"))
+    data.+=(((2, 2), "two"))
+    data.+=(((3, 3), "three"))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getGroupSortedNestedTupleDataSet(env: ExecutionEnvironment): DataSet[((Int, Int), String)] = {
+    val data = new mutable.MutableList[((Int, Int), String)]
+    data.+=(((1, 3), "a"))
+    data.+=(((1, 2), "a"))
+    data.+=(((2, 1), "a"))
+    data.+=(((2, 2), "b"))
+    data.+=(((3, 3), "c"))
+    data.+=(((3, 6), "c"))
+    data.+=(((4, 9), "c"))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getStringDataSet(env: ExecutionEnvironment): DataSet[String] = {
+    val data = new mutable.MutableList[String]
+    data.+=("Hi")
+    data.+=("Hello")
+    data.+=("Hello world")
+    data.+=("Hello world, how are you?")
+    data.+=("I am fine.")
+    data.+=("Luke Skywalker")
+    data.+=("Random comment")
+    data.+=("LOL")
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getIntDataSet(env: ExecutionEnvironment): DataSet[Int] = {
+    val data = new mutable.MutableList[Int]
+    data.+=(1)
+    data.+=(2)
+    data.+=(2)
+    data.+=(3)
+    data.+=(3)
+    data.+=(3)
+    data.+=(4)
+    data.+=(4)
+    data.+=(4)
+    data.+=(4)
+    data.+=(5)
+    data.+=(5)
+    data.+=(5)
+    data.+=(5)
+    data.+=(5)
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getCustomTypeDataSet(env: ExecutionEnvironment): DataSet[CustomType] = {
+    val data = new mutable.MutableList[CustomType]
+    data.+=(new CustomType(1, 0L, "Hi"))
+    data.+=(new CustomType(2, 1L, "Hello"))
+    data.+=(new CustomType(2, 2L, "Hello world"))
+    data.+=(new CustomType(3, 3L, "Hello world, how are you?"))
+    data.+=(new CustomType(3, 4L, "I am fine."))
+    data.+=(new CustomType(3, 5L, "Luke Skywalker"))
+    data.+=(new CustomType(4, 6L, "Comment#1"))
+    data.+=(new CustomType(4, 7L, "Comment#2"))
+    data.+=(new CustomType(4, 8L, "Comment#3"))
+    data.+=(new CustomType(4, 9L, "Comment#4"))
+    data.+=(new CustomType(5, 10L, "Comment#5"))
+    data.+=(new CustomType(5, 11L, "Comment#6"))
+    data.+=(new CustomType(5, 12L, "Comment#7"))
+    data.+=(new CustomType(5, 13L, "Comment#8"))
+    data.+=(new CustomType(5, 14L, "Comment#9"))
+    data.+=(new CustomType(6, 15L, "Comment#10"))
+    data.+=(new CustomType(6, 16L, "Comment#11"))
+    data.+=(new CustomType(6, 17L, "Comment#12"))
+    data.+=(new CustomType(6, 18L, "Comment#13"))
+    data.+=(new CustomType(6, 19L, "Comment#14"))
+    data.+=(new CustomType(6, 20L, "Comment#15"))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getSmallCustomTypeDataSet(env: ExecutionEnvironment): DataSet[CustomType] = {
+    val data = new mutable.MutableList[CustomType]
+    data.+=(new CustomType(1, 0L, "Hi"))
+    data.+=(new CustomType(2, 1L, "Hello"))
+    data.+=(new CustomType(2, 2L, "Hello world"))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getSmallTuplebasedPojoMatchingDataSet(env: ExecutionEnvironment):
+      DataSet[(Int, String, Int, Int, Long, String, Long)] = {
+    val data = new mutable.MutableList[(Int, String, Int, Int, Long, String, Long)]
+    data.+=((1, "First", 10, 100, 1000L, "One", 10000L))
+    data.+=((2, "Second", 20, 200, 2000L, "Two", 20000L))
+    data.+=((3, "Third", 30, 300, 3000L, "Three", 30000L))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getSmallPojoDataSet(env: ExecutionEnvironment): DataSet[POJO] = {
+    val data = new mutable.MutableList[POJO]
+    data.+=(new POJO(1, "First", 10, 100, 1000L, "One", 10000L))
+    data.+=(new POJO(2, "Second", 20, 200, 2000L, "Two", 20000L))
+    data.+=(new POJO(3, "Third", 30, 300, 3000L, "Three", 30000L))
+    env.fromCollection(Random.shuffle(data))
+  }
+
+  def getDuplicatePojoDataSet(env: ExecutionEnvironment): DataSet[POJO] = {
+    val data = new mutable.MutableList[POJO]
+    data.+=(new POJO(1, "First", 10, 100, 1000L, "One", 10000L))
+    data.+=(new POJO(1, "First", 10, 100, 1000L, "One", 10000L))
+    data.+=(new POJO(1, "First", 10, 100, 1000L, "One", 10000L))
+    data.+=(new POJO(1, "First", 10, 100, 1000L, "One", 10000L))
+    data.+=(new POJO(1, "First", 10, 100, 1000L, "One", 10000L))
+    data.+=(new POJO(2, "Second", 20, 200, 2000L, "Two", 20000L))
+    data.+=(new POJO(3, "Third", 30, 300, 3000L, "Three", 30000L))
+    data.+=(new POJO(3, "Third", 30, 300, 3000L, "Three", 30000L))
+    env.fromCollection(data)
+  }
+
+  def getCrazyNestedDataSet(env: ExecutionEnvironment): DataSet[CrazyNested] = {
+    val data = new mutable.MutableList[CrazyNested]
+    data.+=(new CrazyNested("aa"))
+    data.+=(new CrazyNested("bb"))
+    data.+=(new CrazyNested("bb"))
+    data.+=(new CrazyNested("cc"))
+    data.+=(new CrazyNested("cc"))
+    data.+=(new CrazyNested("cc"))
+    env.fromCollection(data)
+  }
+
+  def getPojoContainingTupleAndWritable(env: ExecutionEnvironment): DataSet[CollectionDataSets
+  .PojoContainingTupleAndWritable] = {
+    val data = new
+        mutable.MutableList[PojoContainingTupleAndWritable]
+    data.+=(new PojoContainingTupleAndWritable(1, 10L, 100L))
+    data.+=(new PojoContainingTupleAndWritable(2, 20L, 200L))
+    data.+=(new PojoContainingTupleAndWritable(2, 20L, 200L))
+    data.+=(new PojoContainingTupleAndWritable(2, 20L, 200L))
+    data.+=(new PojoContainingTupleAndWritable(2, 20L, 200L))
+    data.+=(new PojoContainingTupleAndWritable(2, 20L, 200L))
+    env.fromCollection(data)
+  }
+
+  def getTupleContainingPojos(env: ExecutionEnvironment): DataSet[(Int, CrazyNested, POJO)] = {
+    val data = new mutable.MutableList[(Int, CrazyNested, POJO)]
+    data.+=((
+      1,
+      new CrazyNested("one", "uno", 1L),
+      new POJO(1, "First", 10, 100, 1000L, "One", 10000L)))
+    data.+=((
+      1,
+      new CrazyNested("one", "uno", 1L),
+      new POJO(1, "First", 10, 100, 1000L, "One", 10000L)))
+    data.+=((
+      1,
+      new CrazyNested("one", "uno", 1L),
+      new POJO(1, "First", 10, 100, 1000L, "One", 10000L)))
+    data.+=((
+      2,
+      new CrazyNested("two", "duo", 2L),
+      new POJO(1, "First", 10, 100, 1000L, "One", 10000L)))
+    env.fromCollection(data)
+  }
+
+  def getPojoWithMultiplePojos(env: ExecutionEnvironment): DataSet[CollectionDataSets
+  .PojoWithMultiplePojos] = {
+    val data = new mutable.MutableList[CollectionDataSets
+    .PojoWithMultiplePojos]
+    data.+=(new PojoWithMultiplePojos("a", "aa", "b", "bb", 1))
+    data.+=(new PojoWithMultiplePojos("b", "bb", "c", "cc", 2))
+    data.+=(new PojoWithMultiplePojos("d", "dd", "e", "ee", 3))
+    env.fromCollection(data)
+  }
+
+
+  class CustomType(var myInt: Int, var myLong: Long, var myString: String) {
+    def this() {
+      this(0, 0, "")
+    }
+
+    override def toString: String = {
+      myInt + "," + myLong + "," + myString
+    }
+  }
+
+  class POJO(
+      var number: Int,
+      var str: String,
+      var nestedTupleWithCustom: (Int, CustomType),
+      var nestedPojo: NestedPojo) {
+    def this() {
+      this(0, "", null, null)
+    }
+
+    def this(i0: Int, s0: String, i1: Int, i2: Int, l0: Long, s1: String, l1: Long) {
+      this(i0, s0, (i1, new CustomType(i2, l0, s1)), new NestedPojo(l1))
+    }
+
+    override def toString: String = {
+      number + " " + str + " " + nestedTupleWithCustom + " " + nestedPojo.longNumber
+    }
+
+    @transient var ignoreMe: Long = 1L
+  }
+
+  class NestedPojo(var longNumber: Long) {
+    def this() {
+      this(0)
+    }
+  }
+
+  class CrazyNested(var nest_Lvl1: CrazyNestedL1, var something: Long) {
+    def this() {
+      this(new CrazyNestedL1, 0)
+    }
+
+    def this(set: String) {
+      this()
+      nest_Lvl1 = new CrazyNestedL1
+      nest_Lvl1.nest_Lvl2 = new CrazyNestedL2
+      nest_Lvl1.nest_Lvl2.nest_Lvl3 = new CrazyNestedL3
+      nest_Lvl1.nest_Lvl2.nest_Lvl3.nest_Lvl4 = new CrazyNestedL4
+      nest_Lvl1.nest_Lvl2.nest_Lvl3.nest_Lvl4.f1nal = set
+    }
+
+    def this(set: String, second: String, s: Long) {
+      this(set)
+      something = s
+      nest_Lvl1.a = second
+    }
+  }
+
+  class CrazyNestedL1 {
+    var a: String = null
+    var b: Int = 0
+    var nest_Lvl2: CrazyNestedL2 = null
+  }
+
+  class CrazyNestedL2 {
+    var nest_Lvl3: CrazyNestedL3 = null
+  }
+
+  class CrazyNestedL3 {
+    var nest_Lvl4: CrazyNestedL4 = null
+  }
+
+  class CrazyNestedL4 {
+    var f1nal: String = null
+  }
+
+  class PojoContainingTupleAndWritable(
+      var someInt: Int,
+      var someString: String,
+      var hadoopFan: IntWritable,
+      var theTuple: (Long, Long)) {
+    def this() {
+      this(0, "", new IntWritable(0), (0, 0))
+    }
+
+    def this(i: Int, l1: Long, l2: Long) {
+      this()
+      hadoopFan = new IntWritable(i)
+      someInt = i
+      theTuple = (l1, l2)
+    }
+
+  }
+
+  class Pojo1 {
+    var a: String = null
+    var b: String = null
+  }
+
+  class Pojo2 {
+    var a2: String = null
+    var b2: String = null
+  }
+
+  class PojoWithMultiplePojos {
+
+    def this(a: String, b: String, a1: String, b1: String, i0: Int) {
+      this()
+      p1 = new Pojo1
+      p1.a = a
+      p1.b = b
+      p2 = new Pojo2
+      p2.a2 = a1
+      p2.a2 = b1
+      this.i0 = i0
+    }
+
+    var p1: Pojo1 = null
+    var p2: Pojo2 = null
+    var i0: Int = 0
+  }
+
+}
+