提交 a4d60208 编写于 作者: B Burak 提交者: Xiangrui Meng

[SPARK-2434][MLlib]: Warning messages that point users to original MLlib...

[SPARK-2434][MLlib]: Warning messages that point users to original MLlib implementations added to Examples

[SPARK-2434][MLlib]: Warning messages that refer users to the original MLlib implementations of some popular example machine learning algorithms added both in the comments and the code. The following examples have been modified:
Scala:
* LocalALS
* LocalFileLR
* LocalKMeans
* LocalLP
* SparkALS
* SparkHdfsLR
* SparkKMeans
* SparkLR
Python:
 * kmeans.py
 * als.py
 * logistic_regression.py

Author: Burak <brkyvz@gmail.com>

Closes #1515 from brkyvz/SPARK-2434 and squashes the following commits:

7505da9 [Burak] [SPARK-2434][MLlib]: Warning messages added, scalastyle errors fixed, and added missing punctuation
b96b522 [Burak] [SPARK-2434][MLlib]: Warning messages added and scalastyle errors fixed
4762f39 [Burak] [SPARK-2434]: Warning messages added
17d3d83 [Burak] SPARK-2434: Added warning messages to the naive implementations of the example algorithms
2cb5301 [Burak] SPARK-2434: Warning messages redirecting to original implementaions added.
上级 abeacffb
......@@ -16,6 +16,9 @@
#
"""
This is an example implementation of ALS for learning how to use Spark. Please refer to
ALS in pyspark.mllib.recommendation for more conventional use.
This example requires numpy (http://www.numpy.org/)
"""
from os.path import realpath
......@@ -49,9 +52,15 @@ def update(i, vec, mat, ratings):
if __name__ == "__main__":
"""
Usage: als [M] [U] [F] [iterations] [slices]"
"""
print >> sys.stderr, """WARN: This is a naive implementation of ALS and is given as an
example. Please use the ALS method found in pyspark.mllib.recommendation for more
conventional use."""
sc = SparkContext(appName="PythonALS")
M = int(sys.argv[1]) if len(sys.argv) > 1 else 100
U = int(sys.argv[2]) if len(sys.argv) > 2 else 500
......
......@@ -45,9 +45,15 @@ def closestPoint(p, centers):
if __name__ == "__main__":
if len(sys.argv) != 4:
print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>"
exit(-1)
print >> sys.stderr, """WARN: This is a naive implementation of KMeans Clustering and is given
as an example! Please refer to examples/src/main/python/mllib/kmeans.py for an example on
how to use MLlib's KMeans implementation."""
sc = SparkContext(appName="PythonKMeans")
lines = sc.textFile(sys.argv[1])
data = lines.map(parseVector).cache()
......
......@@ -47,9 +47,15 @@ def readPointBatch(iterator):
return [matrix]
if __name__ == "__main__":
if len(sys.argv) != 3:
print >> sys.stderr, "Usage: logistic_regression <file> <iterations>"
exit(-1)
print >> sys.stderr, """WARN: This is a naive implementation of Logistic Regression and is
given as an example! Please refer to examples/src/main/python/mllib/logistic_regression.py
to see how MLlib's implementation is used."""
sc = SparkContext(appName="PythonLR")
points = sc.textFile(sys.argv[1]).mapPartitions(readPointBatch).cache()
iterations = int(sys.argv[2])
......
......@@ -25,6 +25,9 @@ import cern.jet.math._
/**
* Alternating least squares matrix factorization.
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.recommendation.ALS
*/
object LocalALS {
// Parameters set through command line arguments
......@@ -107,7 +110,16 @@ object LocalALS {
solved2D.viewColumn(0)
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of ALS and is given as an example!
|Please use the ALS method found in org.apache.spark.mllib.recommendation
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
args match {
case Array(m, u, f, iters) => {
M = m.toInt
......@@ -120,6 +132,9 @@ object LocalALS {
System.exit(1)
}
}
showWarning()
printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
val R = generateR()
......
......@@ -21,6 +21,12 @@ import java.util.Random
import breeze.linalg.{Vector, DenseVector}
/**
* Logistic regression based classification.
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.classification.LogisticRegression
*/
object LocalFileLR {
val D = 10 // Numer of dimensions
val rand = new Random(42)
......@@ -32,7 +38,18 @@ object LocalFileLR {
DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
showWarning()
val lines = scala.io.Source.fromFile(args(0)).getLines().toArray
val points = lines.map(parsePoint _)
val ITERATIONS = args(1).toInt
......
......@@ -28,6 +28,9 @@ import org.apache.spark.SparkContext._
/**
* K-means clustering.
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.clustering.KMeans
*/
object LocalKMeans {
val N = 1000
......@@ -61,7 +64,18 @@ object LocalKMeans {
bestIndex
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of KMeans Clustering and is given as an example!
|Please use the KMeans method found in org.apache.spark.mllib.clustering
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
showWarning()
val data = generateData
var points = new HashSet[Vector[Double]]
var kPoints = new HashMap[Int, Vector[Double]]
......
......@@ -23,6 +23,9 @@ import breeze.linalg.{Vector, DenseVector}
/**
* Logistic regression based classification.
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.classification.LogisticRegression
*/
object LocalLR {
val N = 10000 // Number of data points
......@@ -42,9 +45,19 @@ object LocalLR {
Array.tabulate(N)(generatePoint)
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
val data = generateData
showWarning()
val data = generateData
// Initialize w to a random value
var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
println("Initial w: " + w)
......
......@@ -27,6 +27,9 @@ import org.apache.spark._
/**
* Alternating least squares matrix factorization.
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.recommendation.ALS
*/
object SparkALS {
// Parameters set through command line arguments
......@@ -87,7 +90,16 @@ object SparkALS {
solved2D.viewColumn(0)
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of ALS and is given as an example!
|Please use the ALS method found in org.apache.spark.mllib.recommendation
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
var slices = 0
val options = (0 to 4).map(i => if (i < args.length) Some(args(i)) else None)
......@@ -103,7 +115,11 @@ object SparkALS {
System.err.println("Usage: SparkALS [M] [U] [F] [iters] [slices]")
System.exit(1)
}
showWarning()
printf("Running with M=%d, U=%d, F=%d, iters=%d\n", M, U, F, ITERATIONS)
val sparkConf = new SparkConf().setAppName("SparkALS")
val sc = new SparkContext(sparkConf)
......
......@@ -30,6 +30,9 @@ import org.apache.spark.scheduler.InputFormatInfo
/**
* Logistic regression based classification.
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.classification.LogisticRegression
*/
object SparkHdfsLR {
val D = 10 // Numer of dimensions
......@@ -48,12 +51,23 @@ object SparkHdfsLR {
DataPoint(new DenseVector(x), y)
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
if (args.length < 2) {
System.err.println("Usage: SparkHdfsLR <file> <iters>")
System.exit(1)
}
showWarning()
val sparkConf = new SparkConf().setAppName("SparkHdfsLR")
val inputPath = args(0)
val conf = SparkHadoopUtil.get.newConfiguration()
......
......@@ -24,6 +24,9 @@ import org.apache.spark.SparkContext._
/**
* K-means clustering.
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.clustering.KMeans
*/
object SparkKMeans {
......@@ -46,11 +49,23 @@ object SparkKMeans {
bestIndex
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of KMeans Clustering and is given as an example!
|Please use the KMeans method found in org.apache.spark.mllib.clustering
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
if (args.length < 3) {
System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
System.exit(1)
}
showWarning()
val sparkConf = new SparkConf().setAppName("SparkKMeans")
val sc = new SparkContext(sparkConf)
val lines = sc.textFile(args(0))
......
......@@ -28,6 +28,9 @@ import org.apache.spark._
/**
* Logistic regression based classification.
* Usage: SparkLR [slices]
*
* This is an example implementation for learning how to use Spark. For more conventional use,
* please refer to org.apache.spark.mllib.classification.LogisticRegression
*/
object SparkLR {
val N = 10000 // Number of data points
......@@ -47,7 +50,18 @@ object SparkLR {
Array.tabulate(N)(generatePoint)
}
def showWarning() {
System.err.println(
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
|for more conventional use.
""".stripMargin)
}
def main(args: Array[String]) {
showWarning()
val sparkConf = new SparkConf().setAppName("SparkLR")
val sc = new SparkContext(sparkConf)
val numSlices = if (args.length > 0) args(0).toInt else 2
......@@ -66,6 +80,7 @@ object SparkLR {
}
println("Final w: " + w)
sc.stop()
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册