[SPARK-2434][MLlib]: Warning messages that point users to original MLlib...

[SPARK-2434][MLlib]: Warning messages that point users to original MLlib implementations added to Examples [SPARK-2434][MLlib]: Warning messages that refer users to the original MLlib implementations of some popular example machine learning algorithms added both in the comments and the code. The following examples have been modified: Scala: * LocalALS * LocalFileLR * LocalKMeans * LocalLP * SparkALS * SparkHdfsLR * SparkKMeans * SparkLR Python: * kmeans.py * als.py * logistic_regression.py Author: Burak <brkyvz@gmail.com> Closes #1515 from brkyvz/SPARK-2434 and squashes the following commits: 7505da9 [Burak] [SPARK-2434][MLlib]: Warning messages added, scalastyle errors fixed, and added missing punctuation b96b522 [Burak] [SPARK-2434][MLlib]: Warning messages added and scalastyle errors fixed 4762f39 [Burak] [SPARK-2434]: Warning messages added 17d3d83 [Burak] SPARK-2434: Added warning messages to the naive implementations of the example algorithms 2cb5301 [Burak] SPARK-2434: Warning messages redirecting to original implementaions added.

[SPARK-2434][MLlib]: Warning messages that point users to original MLlib...
[SPARK-2434][MLlib]: Warning messages that point users to original MLlib implementations added to Examples [SPARK-2434][MLlib]: Warning messages that refer users to the original MLlib implementations of some popular example machine learning algorithms added both in the comments and the code. The following examples have been modified: Scala: * LocalALS * LocalFileLR * LocalKMeans * LocalLP * SparkALS * SparkHdfsLR * SparkKMeans * SparkLR Python: * kmeans.py * als.py * logistic_regression.py Author: Burak <brkyvz@gmail.com> Closes #1515 from brkyvz/SPARK-2434 and squashes the following commits: 7505da9 [Burak] [SPARK-2434][MLlib]: Warning messages added, scalastyle errors fixed, and added missing punctuation b96b522 [Burak] [SPARK-2434][MLlib]: Warning messages added and scalastyle errors fixed 4762f39 [Burak] [SPARK-2434]: Warning messages added 17d3d83 [Burak] SPARK-2434: Added warning messages to the naive implementations of the example algorithms 2cb5301 [Burak] SPARK-2434: Warning messages redirecting to original implementaions added.
a4d60208 · Burak · Xiangrui Meng · abeacffb · a4d60208 · a4d60208
11 changed file
--- a/examples/src/main/python/als.py
+++ b/examples/src/main/python/als.py
@@ -16,6 +16,9 @@
 #

 """
+This is an example implementation of ALS for learning how to use Spark. Please refer to
+ALS in pyspark.mllib.recommendation for more conventional use.
+
 This example requires numpy (http://www.numpy.org/)
 """
 from os.path import realpath
@@ -49,9 +52,15 @@ def update(i, vec, mat, ratings):


 if __name__ == "__main__":
+
    """
    Usage: als [M] [U] [F] [iterations] [slices]"
    """
+
+    print >> sys.stderr, """WARN: This is a naive implementation of ALS and is given as an
+      example. Please use the ALS method found in pyspark.mllib.recommendation for more
+      conventional use."""
+
    sc = SparkContext(appName="PythonALS")
    M = int(sys.argv[1]) if len(sys.argv) > 1 else 100
    U = int(sys.argv[2]) if len(sys.argv) > 2 else 500

--- a/examples/src/main/python/kmeans.py
+++ b/examples/src/main/python/kmeans.py
@@ -45,9 +45,15 @@ def closestPoint(p, centers):


 if __name__ == "__main__":
+
    if len(sys.argv) != 4:
        print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>"
        exit(-1)
+
+    print >> sys.stderr, """WARN: This is a naive implementation of KMeans Clustering and is given
+       as an example! Please refer to examples/src/main/python/mllib/kmeans.py for an example on
+       how to use MLlib's KMeans implementation."""
+
    sc = SparkContext(appName="PythonKMeans")
    lines = sc.textFile(sys.argv[1])
    data = lines.map(parseVector).cache()

--- a/examples/src/main/python/logistic_regression.py
+++ b/examples/src/main/python/logistic_regression.py
@@ -47,9 +47,15 @@ def readPointBatch(iterator):
    return [matrix]

 if __name__ == "__main__":
+
    if len(sys.argv) != 3:
        print >> sys.stderr, "Usage: logistic_regression <file> <iterations>"
        exit(-1)
+
+    print >> sys.stderr,  """WARN: This is a naive implementation of Logistic Regression and is
+      given as an example! Please refer to examples/src/main/python/mllib/logistic_regression.py
+      to see how MLlib's implementation is used."""
+
    sc = SparkContext(appName="PythonLR")
    points = sc.textFile(sys.argv[1]).mapPartitions(readPointBatch).cache()
    iterations = int(sys.argv[2])

--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -25,6 +25,9 @@ import cern.jet.math._

 /**
 * Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.recommendation.ALS
 */
 object LocalALS {
  // Parameters set through command line arguments
@@ -107,7 +110,16 @@ object LocalALS {
    solved2D.viewColumn(0)
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of ALS and is given as an example!
+        |Please use the ALS method found in org.apache.spark.mllib.recommendation
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
+
    args match {
      case Array(m, u, f, iters) => {
        M = m.toInt
@@ -120,6 +132,9 @@ object LocalALS {
        System.exit(1)
      }
    }
+
+    showWarning()
+
    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)

    val R = generateR()

--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -21,6 +21,12 @@ import java.util.Random

 import breeze.linalg.{Vector, DenseVector}

+/**
+ * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
+ */
 object LocalFileLR {
  val D = 10   // Numer of dimensions
  val rand = new Random(42)
@@ -32,7 +38,18 @@ object LocalFileLR {
    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
+
+    showWarning()
+
    val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
    val points = lines.map(parsePoint _)
    val ITERATIONS = args(1).toInt

--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -28,6 +28,9 @@ import org.apache.spark.SparkContext._

 /**
 * K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.clustering.KMeans
 */
 object LocalKMeans {
  val N = 1000
@@ -61,7 +64,18 @@ object LocalKMeans {
    bestIndex
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+        |Please use the KMeans method found in org.apache.spark.mllib.clustering
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
+
+    showWarning()
+
    val data = generateData
    var points = new HashSet[Vector[Double]]
    var kPoints = new HashMap[Int, Vector[Double]]

--- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -23,6 +23,9 @@ import breeze.linalg.{Vector, DenseVector}

 /**
 * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
 */
 object LocalLR {
  val N = 10000  // Number of data points
@@ -42,9 +45,19 @@ object LocalLR {
    Array.tabulate(N)(generatePoint)
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
-    val data = generateData

+    showWarning()
+
+    val data = generateData
    // Initialize w to a random value
    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
    println("Initial w: " + w)

--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -27,6 +27,9 @@ import org.apache.spark._

 /**
 * Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.recommendation.ALS
 */
 object SparkALS {
  // Parameters set through command line arguments
@@ -87,7 +90,16 @@ object SparkALS {
    solved2D.viewColumn(0)
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of ALS and is given as an example!
+        |Please use the ALS method found in org.apache.spark.mllib.recommendation
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
+
    var slices = 0

    val options = (0 to 4).map(i => if (i < args.length) Some(args(i)) else None)
@@ -103,7 +115,11 @@ object SparkALS {
        System.err.println("Usage: SparkALS [M] [U] [F] [iters] [slices]")
        System.exit(1)
    }
+
+    showWarning()
+
    printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
+
    val sparkConf = new SparkConf().setAppName("SparkALS")
    val sc = new SparkContext(sparkConf)


--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -30,6 +30,9 @@ import org.apache.spark.scheduler.InputFormatInfo

 /**
 * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
 */
 object SparkHdfsLR {
  val D = 10   // Numer of dimensions
@@ -48,12 +51,23 @@ object SparkHdfsLR {
    DataPoint(new DenseVector(x), y)
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
+
    if (args.length < 2) {
      System.err.println("Usage: SparkHdfsLR <file> <iters>")
      System.exit(1)
    }

+    showWarning()
+
    val sparkConf = new SparkConf().setAppName("SparkHdfsLR")
    val inputPath = args(0)
    val conf = SparkHadoopUtil.get.newConfiguration()

--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -24,6 +24,9 @@ import org.apache.spark.SparkContext._

 /**
 * K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.clustering.KMeans
 */
 object SparkKMeans {

@@ -46,11 +49,23 @@ object SparkKMeans {
    bestIndex
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+        |Please use the KMeans method found in org.apache.spark.mllib.clustering
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
+
    if (args.length < 3) {
      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
      System.exit(1)
    }
+
+    showWarning()
+
    val sparkConf = new SparkConf().setAppName("SparkKMeans")
    val sc = new SparkContext(sparkConf)
    val lines = sc.textFile(args(0))

--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -28,6 +28,9 @@ import org.apache.spark._
 /**
 * Logistic regression based classification.
 * Usage: SparkLR [slices]
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.mllib.classification.LogisticRegression
 */
 object SparkLR {
  val N = 10000  // Number of data points
@@ -47,7 +50,18 @@ object SparkLR {
    Array.tabulate(N)(generatePoint)
  }

+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use the LogisticRegression method found in org.apache.spark.mllib.classification
+        |for more conventional use.
+      """.stripMargin)
+  }
+
  def main(args: Array[String]) {
+
+    showWarning()
+
    val sparkConf = new SparkConf().setAppName("SparkLR")
    val sc = new SparkContext(sparkConf)
    val numSlices = if (args.length > 0) args(0).toInt else 2
@@ -66,6 +80,7 @@ object SparkLR {
    }

    println("Final w: " + w)
+
    sc.stop()
  }
 }