提交 185b5f6c 编写于 作者: T twalthr

[FLINK-4179] [table] Additional TPCHQuery3Table example improvements

上级 ec4c9bef
...@@ -17,57 +17,60 @@ ...@@ -17,57 +17,60 @@
*/ */
package org.apache.flink.examples.scala package org.apache.flink.examples.scala
import org.apache.flink.api.table.TableEnvironment
import org.apache.flink.api.table.expressions.Literal
import org.apache.flink.api.scala._ import org.apache.flink.api.scala._
import org.apache.flink.api.scala.table._ import org.apache.flink.api.scala.table._
import org.apache.flink.api.table.TableEnvironment
/** /**
* This program implements a modified version of the TPC-H query 3. The * This program implements a modified version of the TPC-H query 3. The
* example demonstrates how to assign names to fields by extending the Tuple class. * example demonstrates how to assign names to fields by extending the Tuple class.
* The original query can be found at * The original query can be found at
* [http://www.tpc.org/tpch/spec/tpch2.16.0.pdf](http://www.tpc.org/tpch/spec/tpch2.16.0.pdf) * [http://www.tpc.org/tpch/spec/tpch2.16.0.pdf](http://www.tpc.org/tpch/spec/tpch2.16.0.pdf)
* (page 29). * (page 29).
* *
* This program implements the following SQL equivalent: * This program implements the following SQL equivalent:
* *
* {{{ * {{{
* SELECT * SELECT
* l_orderkey, * l_orderkey,
* SUM(l_extendedprice*(1-l_discount)) AS revenue, * SUM(l_extendedprice*(1-l_discount)) AS revenue,
* o_orderdate, * o_orderdate,
* o_shippriority * o_shippriority
* FROM customer, * FROM customer,
* orders, * orders,
* lineitem * lineitem
* WHERE * WHERE
* c_mktsegment = '[SEGMENT]' * c_mktsegment = '[SEGMENT]'
* AND c_custkey = o_custkey * AND c_custkey = o_custkey
* AND l_orderkey = o_orderkey * AND l_orderkey = o_orderkey
* AND o_orderdate < date '[DATE]' * AND o_orderdate < date '[DATE]'
* AND l_shipdate > date '[DATE]' * AND l_shipdate > date '[DATE]'
* GROUP BY * GROUP BY
* l_orderkey, * l_orderkey,
* o_orderdate, * o_orderdate,
* o_shippriority; * o_shippriority
* }}} * ORDER BY
* * revenue desc,
* Compared to the original TPC-H query this version does not sort the result by revenue * o_orderdate;
* and orderdate. * }}}
* *
* Input files are plain text CSV files using the pipe character ('|') as field separator * Compared to the original TPC-H query this version does not sort the result by revenue
* as generated by the TPC-H data generator which is available at * and orderdate.
* [http://www.tpc.org/tpch/](a href="http://www.tpc.org/tpch/). *
* * Input files are plain text CSV files using the pipe character ('|') as field separator
* Usage: * as generated by the TPC-H data generator which is available at
* {{{ * [http://www.tpc.org/tpch/](a href="http://www.tpc.org/tpch/).
* TPCHQuery3Expression <lineitem-csv path> <customer-csv path> <orders-csv path> <result path> *
* }}} * Usage:
* * {{{
* This example shows how to use: * TPCHQuery3Expression <lineitem-csv path> <customer-csv path> <orders-csv path> <result path>
* - Table API expressions * }}}
* *
*/ * This example shows how to:
* - Convert DataSets to Tables
* - Use Table API expressions
*
*/
object TPCHQuery3Table { object TPCHQuery3Table {
def main(args: Array[String]) { def main(args: Array[String]) {
...@@ -76,23 +79,23 @@ object TPCHQuery3Table { ...@@ -76,23 +79,23 @@ object TPCHQuery3Table {
} }
// set filter date // set filter date
val date = java.sql.Date.valueOf("1995-03-12") val date = "1995-03-12".toDate
// get execution environment // get execution environment
val env = ExecutionEnvironment.getExecutionEnvironment val env = ExecutionEnvironment.getExecutionEnvironment
val tEnv = TableEnvironment.getTableEnvironment(env) val tEnv = TableEnvironment.getTableEnvironment(env)
val lineitems = getLineitemDataSet(env) val lineitems = getLineitemDataSet(env)
.filter( l => java.sql.Date.valueOf(l.shipDate).after(date) ).toTable(tEnv) .toTable(tEnv, 'id, 'extdPrice, 'discount, 'shipDate)
.as('id, 'extdPrice, 'discount, 'shipDate) .filter('shipDate.toDate > date)
val customers = getCustomerDataSet(env).toTable(tEnv) val customers = getCustomerDataSet(env)
.as('id, 'mktSegment) .toTable(tEnv, 'id, 'mktSegment)
.filter( 'mktSegment === "AUTOMOBILE" ) .filter('mktSegment === "AUTOMOBILE")
val orders = getOrdersDataSet(env) val orders = getOrdersDataSet(env)
.filter( o => java.sql.Date.valueOf(o.orderDate).before(date) ).toTable(tEnv) .toTable(tEnv, 'orderId, 'custId, 'orderDate, 'shipPrio)
.as('orderId, 'custId, 'orderDate, 'shipPrio) .filter('orderDate.toDate < date)
val items = val items =
orders.join(customers) orders.join(customers)
...@@ -102,19 +105,20 @@ object TPCHQuery3Table { ...@@ -102,19 +105,20 @@ object TPCHQuery3Table {
.where('orderId === 'id) .where('orderId === 'id)
.select( .select(
'orderId, 'orderId,
'extdPrice * (Literal(1.0f) - 'discount) as 'revenue, 'extdPrice * (1.0f.toExpr - 'discount) as 'revenue,
'orderDate, 'orderDate,
'shipPrio) 'shipPrio)
val result = items val result = items
.groupBy('orderId, 'orderDate, 'shipPrio) .groupBy('orderId, 'orderDate, 'shipPrio)
.select('orderId, 'revenue.sum, 'orderDate, 'shipPrio) .select('orderId, 'revenue.sum as 'revenue, 'orderDate, 'shipPrio)
.orderBy('revenue.desc, 'orderDate.asc)
// emit result // emit result
result.writeAsCsv(outputPath, "\n", "|") result.writeAsCsv(outputPath, "\n", "|")
// execute program // execute program
env.execute("Scala TPCH Query 3 (Expression) Example") env.execute("Scala TPCH Query 3 (Table API Expression) Example")
} }
// ************************************************************************* // *************************************************************************
...@@ -145,12 +149,12 @@ object TPCHQuery3Table { ...@@ -145,12 +149,12 @@ object TPCHQuery3Table {
System.err.println("This program expects data from the TPC-H benchmark as input data.\n" + System.err.println("This program expects data from the TPC-H benchmark as input data.\n" +
" Due to legal restrictions, we can not ship generated data.\n" + " Due to legal restrictions, we can not ship generated data.\n" +
" You can find the TPC-H data generator at http://www.tpc.org/tpch/.\n" + " You can find the TPC-H data generator at http://www.tpc.org/tpch/.\n" +
" Usage: TPCHQuery3 <lineitem-csv path> <customer-csv path>" + " Usage: TPCHQuery3 <lineitem-csv path> <customer-csv path> " +
"<orders-csv path> <result path>") "<orders-csv path> <result path>")
false false
} }
} }
private def getLineitemDataSet(env: ExecutionEnvironment): DataSet[Lineitem] = { private def getLineitemDataSet(env: ExecutionEnvironment): DataSet[Lineitem] = {
env.readCsvFile[Lineitem]( env.readCsvFile[Lineitem](
lineitemPath, lineitemPath,
...@@ -164,7 +168,7 @@ object TPCHQuery3Table { ...@@ -164,7 +168,7 @@ object TPCHQuery3Table {
fieldDelimiter = "|", fieldDelimiter = "|",
includedFields = Array(0, 6) ) includedFields = Array(0, 6) )
} }
private def getOrdersDataSet(env: ExecutionEnvironment): DataSet[Order] = { private def getOrdersDataSet(env: ExecutionEnvironment): DataSet[Order] = {
env.readCsvFile[Order]( env.readCsvFile[Order](
ordersPath, ordersPath,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册