未验证 提交 64c732f0 编写于 作者: J Jark Wu

[FLINK-16991][table-planner-blink] Support DynamicTableSink in blink planner

This closes #12086
上级 3c6df778
......@@ -58,6 +58,12 @@ By default no operator is disabled.</td>
<td>String</td>
<td>Sets exec shuffle mode.<br />Accepted values are:<ul><li><span markdown="span">`ALL_EDGES_BLOCKING`</span>: All edges will use blocking shuffle.</li><li><span markdown="span">`FORWARD_EDGES_PIPELINED`</span>: Forward edges will use pipelined shuffle, others blocking.</li><li><span markdown="span">`POINTWISE_EDGES_PIPELINED`</span>: Pointwise edges will use pipelined shuffle, others blocking. Pointwise edges include forward and rescale edges.</li><li><span markdown="span">`ALL_EDGES_PIPELINED`</span>: All edges will use pipelined shuffle.</li><li><span markdown="span">`batch`</span>: the same as <span markdown="span">`ALL_EDGES_BLOCKING`</span>. Deprecated.</li><li><span markdown="span">`pipelined`</span>: the same as <span markdown="span">`ALL_EDGES_PIPELINED`</span>. Deprecated.</li></ul>Note: Blocking shuffle means data will be fully produced before sent to consumer tasks. Pipelined shuffle means data will be sent to consumer tasks once produced.</td>
</tr>
<tr>
<td><h5>table.exec.sink.not-null-enforcer</h5><br> <span class="label label-primary">Batch</span> <span class="label label-primary">Streaming</span></td>
<td style="word-wrap: break-word;">ERROR</td>
<td><p>Enum</p>Possible values: [ERROR, DROP]</td>
<td>The NOT NULL column constraint on a table enforces that null values can't be inserted into the table. Flink supports 'error' (default) and 'drop' enforcement behavior. By default, Flink will check values and throw runtime exception when null values writing into NOT NULL columns. Users can change the behavior to 'drop' to silently drop such records without throwing exception.</td>
</tr>
<tr>
<td><h5>table.exec.sort.async-merge-enabled</h5><br> <span class="label label-primary">Batch</span></td>
<td style="word-wrap: break-word;">true</td>
......
......@@ -47,6 +47,22 @@ public class ExecutionConfigOptions {
"tasks to advance their watermarks without the need to wait for " +
"watermarks from this source while it is idle.");
// ------------------------------------------------------------------------
// Sink Options
// ------------------------------------------------------------------------
@Documentation.TableOption(execMode = Documentation.ExecMode.BATCH_STREAMING)
public static final ConfigOption<NotNullEnforcer> TABLE_EXEC_SINK_NOT_NULL_ENFORCER =
key("table.exec.sink.not-null-enforcer")
.enumType(NotNullEnforcer.class)
.defaultValue(NotNullEnforcer.ERROR)
.withDescription("The NOT NULL column constraint on a table enforces that " +
"null values can't be inserted into the table. Flink supports " +
"'error' (default) and 'drop' enforcement behavior. By default, " +
"Flink will check values and throw runtime exception when null values writing " +
"into NOT NULL columns. Users can change the behavior to 'drop' to " +
"silently drop such records without throwing exception.");
// ------------------------------------------------------------------------
// Sort Options
// ------------------------------------------------------------------------
......@@ -250,4 +266,21 @@ public class ExecutionConfigOptions {
"Pipelined shuffle means data will be sent to consumer tasks once produced.")
.build());
// ------------------------------------------------------------------------------------------
// Enum option types
// ------------------------------------------------------------------------------------------
/**
* The enforcer to guarantee NOT NULL column constraint when writing data into sink.
*/
public enum NotNullEnforcer {
/**
* Throws runtime exception when writing null values into NOT NULL column.
*/
ERROR,
/**
* Drop records when writing null values into NOT NULL column.
*/
DROP
}
}
......@@ -27,6 +27,8 @@ import org.apache.flink.table.catalog.ObjectIdentifier;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.table.sources.TableSource;
import javax.annotation.Nullable;
import java.util.Optional;
/**
......@@ -91,6 +93,31 @@ public class TableFactoryUtil {
}
}
/**
* Creates a {@link TableSink} from a {@link CatalogTable}.
*
* <p>It considers {@link Catalog#getFactory()} if provided.
*/
@SuppressWarnings("unchecked")
public static <T> TableSink<T> findAndCreateTableSink(
@Nullable Catalog catalog,
ObjectIdentifier objectIdentifier,
CatalogTable catalogTable,
ReadableConfig configuration,
boolean isStreamingMode) {
TableSinkFactory.Context context = new TableSinkFactoryContextImpl(
objectIdentifier,
catalogTable,
configuration,
!isStreamingMode);
if (catalog == null) {
return findAndCreateTableSink(context);
} else {
return createTableSinkForCatalogTable(catalog, context)
.orElseGet(() -> findAndCreateTableSink(context));
}
}
/**
* Creates a table sink for a {@link CatalogTable} using table factory associated with the catalog.
*/
......
......@@ -26,6 +26,7 @@ import org.apache.flink.table.api.WatermarkSpec;
import org.apache.flink.table.api.constraints.UniqueConstraint;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.table.sources.TableSource;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.util.Preconditions;
import java.util.List;
......@@ -77,6 +78,22 @@ public class TableSchemaUtils {
return schema;
}
/**
* Returns the field indices of primary key in the physical columns of
* this schema (not include computed columns).
*/
public static int[] getPrimaryKeyIndices(TableSchema schema) {
if (schema.getPrimaryKey().isPresent()) {
RowType physicalRowType = (RowType) schema.toPhysicalRowDataType().getLogicalType();
List<String> fieldNames = physicalRowType.getFieldNames();
return schema.getPrimaryKey().get().getColumns().stream()
.mapToInt(fieldNames::indexOf)
.toArray();
} else {
return new int[0];
}
}
/**
* Creates a builder with given table schema.
*
......
......@@ -193,7 +193,7 @@ public class FlinkCalciteCatalogReader extends CalciteCatalogReader {
RelDataType rowType,
CatalogTable catalogTable,
CatalogSchemaTable schemaTable) {
if (isLegacyConnectorOptions(catalogTable, schemaTable)) {
if (isLegacySourceOptions(catalogTable, schemaTable)) {
return new LegacyCatalogSourceTable<>(
relOptSchema,
names,
......@@ -211,9 +211,9 @@ public class FlinkCalciteCatalogReader extends CalciteCatalogReader {
}
/**
* Checks whether the {@link CatalogTable} uses legacy connector options.
* Checks whether the {@link CatalogTable} uses legacy connector source options.
*/
private static boolean isLegacyConnectorOptions(
private static boolean isLegacySourceOptions(
CatalogTable catalogTable,
CatalogSchemaTable schemaTable) {
// normalize option keys
......
......@@ -163,24 +163,19 @@ class RelTimeIndicatorConverter(rexBuilder: RexBuilder) extends RelShuttle {
val input = snapshot.getInput.accept(this)
snapshot.copy(snapshot.getTraitSet, input, snapshot.getPeriod)
case sink: LogicalLegacySink =>
var newInput = sink.getInput.accept(this)
var needsConversion = false
val projects = newInput.getRowType.getFieldList.map { field =>
if (isProctimeIndicatorType(field.getType)) {
needsConversion = true
rexBuilder.makeCall(FlinkSqlOperatorTable.PROCTIME_MATERIALIZE,
new RexInputRef(field.getIndex, field.getType))
} else {
new RexInputRef(field.getIndex, field.getType)
}
}
case sink: LogicalSink =>
val newInput = convertSinkInput(sink.getInput)
new LogicalSink(
sink.getCluster,
sink.getTraitSet,
newInput,
sink.tableIdentifier,
sink.catalogTable,
sink.tableSink,
sink.staticPartitions)
// add final conversion if necessary
if (needsConversion) {
newInput = LogicalProject.create(newInput, projects, newInput.getRowType.getFieldNames)
}
case sink: LogicalLegacySink =>
val newInput = convertSinkInput(sink.getInput)
new LogicalLegacySink(
sink.getCluster,
sink.getTraitSet,
......@@ -373,6 +368,28 @@ class RelTimeIndicatorConverter(rexBuilder: RexBuilder) extends RelShuttle {
rowType.getFieldList.exists(field => isRowtimeIndicatorType(field.getType))
}
private def convertSinkInput(sinkInput: RelNode): RelNode = {
var newInput = sinkInput.accept(this)
var needsConversion = false
val projects = newInput.getRowType.getFieldList.map { field =>
if (isProctimeIndicatorType(field.getType)) {
needsConversion = true
rexBuilder.makeCall(FlinkSqlOperatorTable.PROCTIME_MATERIALIZE,
new RexInputRef(field.getIndex, field.getType))
} else {
new RexInputRef(field.getIndex, field.getType)
}
}
// add final conversion if necessary
if (needsConversion) {
LogicalProject.create(newInput, projects, newInput.getRowType.getFieldNames)
} else {
newInput
}
}
private def convertAggregate(aggregate: Aggregate): LogicalAggregate = {
// visit children and update inputs
val input = aggregate.getInput.accept(this)
......
......@@ -24,8 +24,10 @@ import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.table.api.config.ExecutionConfigOptions
import org.apache.flink.table.api.{TableConfig, TableEnvironment, TableException}
import org.apache.flink.table.catalog._
import org.apache.flink.table.connector.sink.DynamicTableSink
import org.apache.flink.table.delegation.{Executor, Parser, Planner}
import org.apache.flink.table.factories.{TableFactoryUtil, TableSinkFactoryContextImpl}
import org.apache.flink.table.descriptors.{ConnectorDescriptorValidator, DescriptorProperties}
import org.apache.flink.table.factories.{FactoryUtil, TableFactoryUtil}
import org.apache.flink.table.operations.OutputConversionModifyOperation.UpdateMode
import org.apache.flink.table.operations._
import org.apache.flink.table.planner.JMap
......@@ -33,7 +35,7 @@ import org.apache.flink.table.planner.calcite.{CalciteParser, FlinkPlannerImpl,
import org.apache.flink.table.planner.catalog.CatalogManagerCalciteSchema
import org.apache.flink.table.planner.expressions.PlannerTypeInferenceUtilImpl
import org.apache.flink.table.planner.hint.FlinkHints
import org.apache.flink.table.planner.plan.nodes.calcite.LogicalLegacySink
import org.apache.flink.table.planner.plan.nodes.calcite.{LogicalLegacySink, LogicalSink}
import org.apache.flink.table.planner.plan.nodes.exec.ExecNode
import org.apache.flink.table.planner.plan.nodes.physical.FlinkPhysicalRel
import org.apache.flink.table.planner.plan.optimize.Optimizer
......@@ -188,25 +190,42 @@ abstract class PlannerBase(
val input = getRelBuilder.queryOperation(modifyOperation.getChild).build()
val identifier = catalogSink.getTableIdentifier
val dynamicOptions = catalogSink.getDynamicOptions
getTableSink(identifier, dynamicOptions).map { case (table, sink) =>
// check the logical field type and physical field type are compatible
val queryLogicalType = FlinkTypeFactory.toLogicalRowType(input.getRowType)
// validate logical schema and physical schema are compatible
validateLogicalPhysicalTypesCompatible(table, sink, queryLogicalType)
// validate TableSink
validateTableSink(catalogSink, identifier, sink, table.getPartitionKeys)
// validate query schema and sink schema, and apply cast if possible
val query = validateSchemaAndApplyImplicitCast(
input,
TableSchemaUtils.getPhysicalSchema(table.getSchema),
getTypeFactory,
Some(catalogSink.getTableIdentifier.asSummaryString()))
LogicalLegacySink.create(
query,
sink,
identifier.toString,
table,
catalogSink.getStaticPartitions.toMap)
getTableSink(identifier, dynamicOptions).map {
case (table, sink: TableSink[_]) =>
// check the logical field type and physical field type are compatible
val queryLogicalType = FlinkTypeFactory.toLogicalRowType(input.getRowType)
// validate logical schema and physical schema are compatible
validateLogicalPhysicalTypesCompatible(table, sink, queryLogicalType)
// validate TableSink
validateTableSink(catalogSink, identifier, sink, table.getPartitionKeys)
// validate query schema and sink schema, and apply cast if possible
val query = validateSchemaAndApplyImplicitCast(
input,
TableSchemaUtils.getPhysicalSchema(table.getSchema),
getTypeFactory,
Some(catalogSink.getTableIdentifier.asSummaryString()))
LogicalLegacySink.create(
query,
sink,
identifier.toString,
table,
catalogSink.getStaticPartitions.toMap)
case (table, sink: DynamicTableSink) =>
// validate TableSink
validateTableSink(catalogSink, identifier, sink, table.getPartitionKeys)
// validate query schema and sink schema, and apply cast if possible
val query = validateSchemaAndApplyImplicitCast(
input,
TableSchemaUtils.getPhysicalSchema(table.getSchema),
getTypeFactory,
Some(catalogSink.getTableIdentifier.asSummaryString()))
LogicalSink.create(
query,
identifier,
table,
sink,
catalogSink.getStaticPartitions.toMap)
} match {
case Some(sinkRel) => sinkRel
case None =>
......@@ -286,38 +305,72 @@ abstract class PlannerBase(
private def getTableSink(
objectIdentifier: ObjectIdentifier,
dynamicOptions: JMap[String, String])
: Option[(CatalogTable, TableSink[_])] = {
: Option[(CatalogTable, Any)] = {
JavaScalaConversionUtil.toScala(catalogManager.getTable(objectIdentifier))
.map(_.getTable) match {
case Some(s) if s.isInstanceOf[ConnectorCatalogTable[_, _]] =>
val table = s.asInstanceOf[ConnectorCatalogTable[_, _]]
case Some(table: ConnectorCatalogTable[_, _]) =>
JavaScalaConversionUtil.toScala(table.getTableSink) match {
case Some(sink) => Some(table, sink)
case None => None
}
case Some(s) if s.isInstanceOf[CatalogTable] =>
case Some(table: CatalogTable) =>
val catalog = catalogManager.getCatalog(objectIdentifier.getCatalogName)
val table = s.asInstanceOf[CatalogTable]
val tableToFind = if (dynamicOptions.nonEmpty) {
table.copy(FlinkHints.mergeTableOptions(dynamicOptions, table.getProperties))
} else {
table
}
val context = new TableSinkFactoryContextImpl(
objectIdentifier,
tableToFind,
getTableConfig.getConfiguration,
!isStreamingMode)
if (catalog.isPresent && catalog.get().getTableFactory.isPresent) {
val sink = TableFactoryUtil.createTableSinkForCatalogTable(catalog.get(), context)
if (sink.isPresent) {
return Option(table, sink.get())
}
if (isLegacyConnectorOptions(objectIdentifier, table)) {
val tableSink = TableFactoryUtil.findAndCreateTableSink(
catalog.orElse(null),
objectIdentifier,
tableToFind,
getTableConfig.getConfiguration,
isStreamingMode)
Option(table, tableSink)
} else {
val tableSink = FactoryUtil.createTableSink(
catalog.orElse(null),
objectIdentifier,
tableToFind,
getTableConfig.getConfiguration,
Thread.currentThread().getContextClassLoader)
Option(table, tableSink)
}
Option(table, TableFactoryUtil.findAndCreateTableSink(context))
case _ => None
}
}
/**
* Checks whether the [[CatalogTable]] uses legacy connector sink options.
*/
private def isLegacyConnectorOptions(
objectIdentifier: ObjectIdentifier,
catalogTable: CatalogTable) = {
// normalize option keys
val properties = new DescriptorProperties(true)
properties.putProperties(catalogTable.getOptions)
if (properties.containsKey(ConnectorDescriptorValidator.CONNECTOR_TYPE)) {
true
} else {
val catalog = catalogManager.getCatalog(objectIdentifier.getCatalogName)
try {
// try to create legacy table source using the options,
// some legacy factories uses the new 'connector' key
TableFactoryUtil.findAndCreateTableSink(
catalog.orElse(null),
objectIdentifier,
catalogTable,
getTableConfig.getConfiguration,
isStreamingMode)
// success, then we will use the legacy factories
true
} catch {
// fail, then we will use new factories
case _: Throwable => false
}
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.nodes.calcite
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.connector.sink.DynamicTableSink
import org.apache.calcite.plan.{Convention, RelOptCluster, RelTraitSet}
import org.apache.calcite.rel.RelNode
import java.util
import scala.collection.JavaConversions._
/**
* Sub-class of [[Sink]] that is a relational expression
* which writes out data of input node into a [[DynamicTableSink]].
* This class corresponds to Calcite logical rel.
*/
final class LogicalSink(
cluster: RelOptCluster,
traitSet: RelTraitSet,
input: RelNode,
tableIdentifier: ObjectIdentifier,
catalogTable: CatalogTable,
tableSink: DynamicTableSink,
val staticPartitions: Map[String, String])
extends Sink(cluster, traitSet, input, tableIdentifier, catalogTable, tableSink) {
override def copy(traitSet: RelTraitSet, inputs: util.List[RelNode]): RelNode = {
new LogicalSink(
cluster, traitSet, inputs.head, tableIdentifier, catalogTable, tableSink, staticPartitions)
}
}
object LogicalSink {
def create(
input: RelNode,
tableIdentifier: ObjectIdentifier,
catalogTable: CatalogTable,
tableSink: DynamicTableSink,
staticPartitions: Map[String, String] = Map()): LogicalSink = {
val traits = input.getCluster.traitSetOf(Convention.NONE)
new LogicalSink(
input.getCluster,
traits,
input,
tableIdentifier,
catalogTable,
tableSink,
staticPartitions)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.nodes.calcite
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.connector.sink.DynamicTableSink
import org.apache.flink.table.planner.calcite.FlinkTypeFactory
import org.apache.flink.table.runtime.types.LogicalTypeDataTypeConverter.fromDataTypeToLogicalType
import org.apache.calcite.plan.{RelOptCluster, RelTraitSet}
import org.apache.calcite.rel.`type`.RelDataType
import org.apache.calcite.rel.{RelNode, RelWriter, SingleRel}
import scala.collection.JavaConversions._
/**
* Relational expression that writes out data of input node into a [[DynamicTableSink]].
*
* @param cluster cluster that this relational expression belongs to
* @param traitSet the traits of this rel
* @param input input relational expression
* @param tableIdentifier the full path of the table to retrieve.
* @param catalogTable Catalog table where this table source table comes from
* @param tableSink the [[DynamicTableSink]] for which to write into
*/
abstract class Sink(
cluster: RelOptCluster,
traitSet: RelTraitSet,
input: RelNode,
val tableIdentifier: ObjectIdentifier,
val catalogTable: CatalogTable,
val tableSink: DynamicTableSink)
extends SingleRel(cluster, traitSet, input) {
override def deriveRowType(): RelDataType = {
val typeFactory = getCluster.getTypeFactory.asInstanceOf[FlinkTypeFactory]
val outputType = catalogTable.getSchema.toPhysicalRowDataType
typeFactory.createFieldTypeFromLogicalType(fromDataTypeToLogicalType(outputType))
}
override def explainTerms(pw: RelWriter): RelWriter = {
super.explainTerms(pw)
.item("table", tableIdentifier.asSummaryString())
.item("fields", getRowType.getFieldNames.mkString(", "))
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.nodes.common
import org.apache.calcite.plan.{RelOptCluster, RelTraitSet}
import org.apache.calcite.rel.RelNode
import org.apache.flink.api.dag.Transformation
import org.apache.flink.api.java.typeutils.InputTypeConfigurable
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.functions.sink.OutputFormatSinkFunction
import org.apache.flink.streaming.api.operators.SimpleOperatorFactory
import org.apache.flink.streaming.api.transformations.SinkTransformation
import org.apache.flink.table.api.TableConfig
import org.apache.flink.table.api.config.ExecutionConfigOptions
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.connector.sink.{DynamicTableSink, OutputFormatProvider, SinkFunctionProvider}
import org.apache.flink.table.data.RowData
import org.apache.flink.table.planner.calcite.FlinkTypeFactory
import org.apache.flink.table.planner.plan.nodes.calcite.Sink
import org.apache.flink.table.planner.plan.nodes.physical.FlinkPhysicalRel
import org.apache.flink.table.planner.sinks.TableSinkUtils
import org.apache.flink.table.runtime.connector.sink.SinkRuntimeProviderContext
import org.apache.flink.table.runtime.operators.sink.SinkOperator
import org.apache.flink.table.runtime.typeutils.RowDataTypeInfo
import org.apache.flink.table.types.logical.RowType
import scala.collection.JavaConversions._
/**
* Base physical RelNode to write data to an external sink defined by a [[DynamicTableSink]].
*/
class CommonPhysicalSink (
cluster: RelOptCluster,
traitSet: RelTraitSet,
inputRel: RelNode,
tableIdentifier: ObjectIdentifier,
catalogTable: CatalogTable,
tableSink: DynamicTableSink)
extends Sink(cluster, traitSet, inputRel, tableIdentifier, catalogTable, tableSink)
with FlinkPhysicalRel {
/**
* Common implementation to create sink transformation for both batch and streaming.
*/
protected def createSinkTransformation(
env: StreamExecutionEnvironment,
inputTransformation: Transformation[RowData],
tableConfig: TableConfig,
rowtimeFieldIndex: Int,
isBounded: Boolean): Transformation[Any] = {
val inputTypeInfo = new RowDataTypeInfo(FlinkTypeFactory.toLogicalRowType(getInput.getRowType))
val runtimeProvider = tableSink.getSinkRuntimeProvider(
new SinkRuntimeProviderContext(isBounded))
val sinkFunction = runtimeProvider match {
case provider: SinkFunctionProvider => provider.createSinkFunction()
case provider: OutputFormatProvider =>
val outputFormat = provider.createOutputFormat()
new OutputFormatSinkFunction(outputFormat)
}
sinkFunction match {
case itc: InputTypeConfigurable =>
// configure the type if needed
itc.setInputType(inputTypeInfo, env.getConfig)
case _ => // nothing to do
}
val notNullEnforcer = tableConfig.getConfiguration
.get(ExecutionConfigOptions.TABLE_EXEC_SINK_NOT_NULL_ENFORCER)
val notNullFieldIndices = TableSinkUtils.getNotNullFieldIndices(catalogTable)
val fieldNames = catalogTable.getSchema.toPhysicalRowDataType
.getLogicalType.asInstanceOf[RowType]
.getFieldNames
.toList.toArray
val operator = new SinkOperator(
env.clean(sinkFunction),
rowtimeFieldIndex,
notNullEnforcer,
notNullFieldIndices,
fieldNames
)
new SinkTransformation(
inputTransformation,
getRelDetailedDescription,
SimpleOperatorFactory.of(operator),
inputTransformation.getParallelism).asInstanceOf[Transformation[Any]]
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.nodes.logical
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.connector.sink.DynamicTableSink
import org.apache.flink.table.planner.plan.nodes.FlinkConventions
import org.apache.flink.table.planner.plan.nodes.calcite.{LogicalSink, Sink}
import org.apache.flink.table.sinks.TableSink
import org.apache.calcite.plan.{Convention, RelOptCluster, RelOptRule, RelTraitSet}
import org.apache.calcite.rel.RelNode
import org.apache.calcite.rel.convert.ConverterRule
import java.util
import scala.collection.JavaConversions._
/**
* Sub-class of [[Sink]] that is a relational expression
* which writes out data of input node into a [[DynamicTableSink]].
*/
class FlinkLogicalSink(
cluster: RelOptCluster,
traitSet: RelTraitSet,
input: RelNode,
tableIdentifier: ObjectIdentifier,
catalogTable: CatalogTable,
tableSink: DynamicTableSink,
val staticPartitions: Map[String, String])
extends Sink(cluster, traitSet, input, tableIdentifier, catalogTable, tableSink)
with FlinkLogicalRel {
override def copy(traitSet: RelTraitSet, inputs: util.List[RelNode]): RelNode = {
new FlinkLogicalSink(
cluster, traitSet, inputs.head, tableIdentifier, catalogTable, tableSink, staticPartitions)
}
}
private class FlinkLogicalSinkConverter
extends ConverterRule(
classOf[LogicalSink],
Convention.NONE,
FlinkConventions.LOGICAL,
"FlinkLogicalSinkConverter") {
override def convert(rel: RelNode): RelNode = {
val sink = rel.asInstanceOf[LogicalSink]
val newInput = RelOptRule.convert(sink.getInput, FlinkConventions.LOGICAL)
FlinkLogicalSink.create(
newInput,
sink.tableIdentifier,
sink.catalogTable,
sink.tableSink,
sink.staticPartitions)
}
}
object FlinkLogicalSink {
val CONVERTER: ConverterRule = new FlinkLogicalSinkConverter()
def create(
input: RelNode,
tableIdentifier: ObjectIdentifier,
catalogTable: CatalogTable,
tableSink: DynamicTableSink,
staticPartitions: Map[String, String] = Map()): FlinkLogicalSink = {
val cluster = input.getCluster
val traitSet = cluster.traitSetOf(FlinkConventions.LOGICAL).simplify()
new FlinkLogicalSink(
cluster, traitSet, input, tableIdentifier, catalogTable, tableSink, staticPartitions)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.nodes.physical.batch
import org.apache.flink.api.dag.Transformation
import org.apache.flink.runtime.operators.DamBehavior
import org.apache.flink.table.api.TableException
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.connector.ChangelogMode
import org.apache.flink.table.connector.sink.DynamicTableSink
import org.apache.flink.table.data.RowData
import org.apache.flink.table.planner.delegation.BatchPlanner
import org.apache.flink.table.planner.plan.nodes.common.CommonPhysicalSink
import org.apache.flink.table.planner.plan.nodes.exec.{BatchExecNode, ExecNode}
import org.apache.calcite.plan.{RelOptCluster, RelTraitSet}
import org.apache.calcite.rel.RelNode
import java.util
import scala.collection.JavaConversions._
/**
* Batch physical RelNode to to write data into an external sink defined by a
* [[DynamicTableSink]].
*/
class BatchExecSink(
cluster: RelOptCluster,
traitSet: RelTraitSet,
inputRel: RelNode,
tableIdentifier: ObjectIdentifier,
catalogTable: CatalogTable,
tableSink: DynamicTableSink)
extends CommonPhysicalSink(cluster, traitSet, inputRel, tableIdentifier, catalogTable, tableSink)
with BatchPhysicalRel
with BatchExecNode[Any] {
override def copy(traitSet: RelTraitSet, inputs: util.List[RelNode]): RelNode = {
new BatchExecSink(cluster, traitSet, inputs.get(0), tableIdentifier, catalogTable, tableSink)
}
//~ ExecNode methods -----------------------------------------------------------
/**
* For sink operator, the records will not pass through it, so it's DamBehavior is FULL_DAM.
*
* @return Returns [[DamBehavior]] of Sink.
*/
override def getDamBehavior: DamBehavior = DamBehavior.FULL_DAM
override def getInputNodes: util.List[ExecNode[BatchPlanner, _]] = {
List(getInput.asInstanceOf[ExecNode[BatchPlanner, _]])
}
override def replaceInputNode(
ordinalInParent: Int,
newInputNode: ExecNode[BatchPlanner, _]): Unit = {
replaceInput(ordinalInParent, newInputNode.asInstanceOf[RelNode])
}
override protected def translateToPlanInternal(
planner: BatchPlanner): Transformation[Any] = {
val inputTransformation = getInputNodes.get(0) match {
// Sink's input must be BatchExecNode[RowData] now.
case node: BatchExecNode[RowData] => node.translateToPlan(planner)
case _ =>
throw new TableException("Cannot generate BoundedStream due to an invalid logical plan. " +
"This is a bug and should not happen. Please file an issue.")
}
// tell sink the ChangelogMode of input, batch only supports INSERT only.
tableSink.getChangelogMode(ChangelogMode.insertOnly())
createSinkTransformation(
planner.getExecEnv,
inputTransformation,
planner.getTableConfig,
-1 /* not rowtime field */,
isBounded = true)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.nodes.physical.stream
import org.apache.flink.api.dag.Transformation
import org.apache.flink.table.api.TableException
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.connector.sink.DynamicTableSink
import org.apache.flink.table.data.RowData
import org.apache.flink.table.planner.calcite.FlinkTypeFactory
import org.apache.flink.table.planner.delegation.StreamPlanner
import org.apache.flink.table.planner.plan.nodes.common.CommonPhysicalSink
import org.apache.flink.table.planner.plan.nodes.exec.{ExecNode, StreamExecNode}
import org.apache.flink.table.planner.plan.utils.ChangelogPlanUtils
import org.apache.calcite.plan.{RelOptCluster, RelTraitSet}
import org.apache.calcite.rel.RelNode
import java.util
import scala.collection.JavaConversions._
/**
* Stream physical RelNode to to write data into an external sink defined by a
* [[DynamicTableSink]].
*/
class StreamExecSink(
cluster: RelOptCluster,
traitSet: RelTraitSet,
inputRel: RelNode,
tableIdentifier: ObjectIdentifier,
catalogTable: CatalogTable,
tableSink: DynamicTableSink)
extends CommonPhysicalSink(cluster, traitSet, inputRel, tableIdentifier, catalogTable, tableSink)
with StreamPhysicalRel
with StreamExecNode[Any] {
override def requireWatermark: Boolean = false
override def copy(traitSet: RelTraitSet, inputs: util.List[RelNode]): RelNode = {
new StreamExecSink(cluster, traitSet, inputs.get(0), tableIdentifier, catalogTable, tableSink)
}
//~ ExecNode methods -----------------------------------------------------------
override def getInputNodes: util.List[ExecNode[StreamPlanner, _]] = {
List(getInput.asInstanceOf[ExecNode[StreamPlanner, _]])
}
override def replaceInputNode(
ordinalInParent: Int,
newInputNode: ExecNode[StreamPlanner, _]): Unit = {
replaceInput(ordinalInParent, newInputNode.asInstanceOf[RelNode])
}
override protected def translateToPlanInternal(
planner: StreamPlanner): Transformation[Any] = {
// get RowData plan
val inputTransformation = getInputNodes.get(0) match {
// Sink's input must be StreamExecNode[RowData] now.
case node: StreamExecNode[RowData] =>
node.translateToPlan(planner)
case _ =>
throw new TableException("Cannot generate DataStream due to an invalid logical plan. " +
"This is a bug and should not happen. Please file an issue.")
}
val inputLogicalType = getInput.getRowType
val rowtimeFields = inputLogicalType.getFieldList.zipWithIndex
.filter { case (f, _) =>
FlinkTypeFactory.isRowtimeIndicatorType(f.getType)
}
if (rowtimeFields.size > 1) {
throw new TableException(
s"Found more than one rowtime field: [${rowtimeFields.map(_._1.getName).mkString(", ")}]" +
s" in the query when insert into '${tableIdentifier.asSummaryString()}'.\n" +
s"Please select the rowtime field that should be used as event-time timestamp for the " +
s"DataStream by casting all other fields to TIMESTAMP.")
}
val inputChangelogMode = ChangelogPlanUtils.getChangelogMode(
getInput.asInstanceOf[StreamPhysicalRel]).get
// tell sink the ChangelogMode of input
tableSink.getChangelogMode(inputChangelogMode)
val rowtimeFieldIndex: Int = rowtimeFields.map(_._2).headOption.getOrElse(-1)
createSinkTransformation(
planner.getExecEnv,
inputTransformation,
planner.getTableConfig,
rowtimeFieldIndex,
isBounded = false)
}
}
......@@ -25,7 +25,7 @@ import org.apache.flink.table.planner.calcite.{FlinkContext, SqlExprToRexConvert
import org.apache.flink.table.planner.delegation.StreamPlanner
import org.apache.flink.table.planner.plan.`trait`.{MiniBatchInterval, MiniBatchIntervalTrait, MiniBatchIntervalTraitDef, MiniBatchMode, ModifyKindSet, ModifyKindSetTraitDef, UpdateKind, UpdateKindTraitDef}
import org.apache.flink.table.planner.plan.metadata.FlinkRelMetadataQuery
import org.apache.flink.table.planner.plan.nodes.calcite.LegacySink
import org.apache.flink.table.planner.plan.nodes.calcite.{LegacySink, Sink}
import org.apache.flink.table.planner.plan.nodes.physical.stream.{StreamExecDataStreamScan, StreamExecIntermediateTableScan, StreamPhysicalRel}
import org.apache.flink.table.planner.plan.optimize.program.{FlinkStreamProgram, StreamOptimizeContext}
import org.apache.flink.table.planner.plan.schema.IntermediateRelTable
......@@ -109,10 +109,10 @@ class StreamCommonSubGraphBasedOptimizer(planner: StreamPlanner)
val blockLogicalPlan = block.getPlan
blockLogicalPlan match {
case s: LegacySink =>
case _: LegacySink | _: Sink =>
require(isSinkBlock)
val optimizedTree = optimizeTree(
s,
blockLogicalPlan,
updateBeforeRequired = block.isUpdateBeforeRequired,
miniBatchInterval = block.getMiniBatchInterval,
isSinkBlock = true)
......@@ -210,10 +210,10 @@ class StreamCommonSubGraphBasedOptimizer(planner: StreamPlanner)
val blockLogicalPlan = block.getPlan
blockLogicalPlan match {
case n: LegacySink =>
case _: LegacySink | _: Sink =>
require(isSinkBlock)
val optimizedPlan = optimizeTree(
n, updateBeforeRequired, miniBatchInterval, isSinkBlock = true)
blockLogicalPlan, updateBeforeRequired, miniBatchInterval, isSinkBlock = true)
block.setOptimizedPlan(optimizedPlan)
case o =>
......
......@@ -19,9 +19,10 @@
package org.apache.flink.table.planner.plan.optimize.program
import org.apache.flink.table.api.TableException
import org.apache.flink.table.planner.plan.`trait`.UpdateKindTrait.{beforeAfterOrNone, onlyAfterOrNone}
import org.apache.flink.table.planner.plan.`trait`.UpdateKindTrait.{BEFORE_AND_AFTER, ONLY_UPDATE_AFTER, beforeAfterOrNone, onlyAfterOrNone}
import org.apache.flink.table.planner.plan.`trait`._
import org.apache.flink.table.planner.plan.nodes.physical.stream._
import org.apache.flink.table.planner.plan.utils.ChangelogPlanUtils.FULL_CHANGELOG_MODE
import org.apache.flink.table.planner.plan.utils._
import org.apache.flink.table.planner.sinks.DataStreamTableSink
import org.apache.flink.table.runtime.operators.join.FlinkJoinType
......@@ -112,6 +113,15 @@ class FlinkChangelogModeInferenceProgram extends FlinkOptimizeProgram[StreamOpti
rel: StreamPhysicalRel,
requiredTrait: ModifyKindSetTrait,
requester: String): StreamPhysicalRel = rel match {
case sink: StreamExecSink =>
val name = s"Table sink '${sink.tableIdentifier.asSummaryString()}'"
val sinkRequiredTrait = ModifyKindSetTrait.fromChangelogMode(
sink.tableSink.getChangelogMode(FULL_CHANGELOG_MODE))
val children = visitChildren(sink, sinkRequiredTrait, name)
val sinkTrait = sink.getTraitSet.plus(ModifyKindSetTrait.EMPTY)
// ignore required trait from context, because sink is the true root
sink.copy(sinkTrait, children).asInstanceOf[StreamPhysicalRel]
case sink: StreamExecLegacySink[_] =>
val (sinkRequiredTrait, name) = sink.sink match {
case _: UpsertStreamTableSink[_] =>
......@@ -370,6 +380,21 @@ class FlinkChangelogModeInferenceProgram extends FlinkOptimizeProgram[StreamOpti
def visit(
rel: StreamPhysicalRel,
requiredTrait: UpdateKindTrait): Option[StreamPhysicalRel] = rel match {
case sink: StreamExecSink =>
val childModifyKindSet = getModifyKindSet(sink.getInput)
val onlyAfter = onlyAfterOrNone(childModifyKindSet)
val beforeAndAfter = beforeAfterOrNone(childModifyKindSet)
val sinkTrait = UpdateKindTrait.fromChangelogMode(
sink.tableSink.getChangelogMode(FULL_CHANGELOG_MODE))
val sinkRequiredTraits = if (sinkTrait.equals(ONLY_UPDATE_AFTER)) {
Seq(onlyAfter, beforeAndAfter)
} else if (sinkTrait.equals(BEFORE_AND_AFTER)){
Seq(beforeAndAfter)
} else {
Seq(UpdateKindTrait.NONE)
}
visitSink(sink, sinkRequiredTraits)
case sink: StreamExecLegacySink[_] =>
val childModifyKindSet = getModifyKindSet(sink.getInput)
val onlyAfter = onlyAfterOrNone(childModifyKindSet)
......@@ -394,13 +419,7 @@ class FlinkChangelogModeInferenceProgram extends FlinkOptimizeProgram[StreamOpti
Seq(UpdateKindTrait.NONE)
}
}
val children = sinkRequiredTraits.flatMap(t => visitChildren(sink, t))
if (children.isEmpty) {
None
} else {
val sinkTrait = sink.getTraitSet.plus(UpdateKindTrait.NONE)
Some(sink.copy(sinkTrait, children.head).asInstanceOf[StreamPhysicalRel])
}
visitSink(sink, sinkRequiredTraits)
case _: StreamExecGroupAggregate | _: StreamExecGroupTableAggregate |
_: StreamExecLimit =>
......@@ -649,7 +668,19 @@ class FlinkChangelogModeInferenceProgram extends FlinkOptimizeProgram[StreamOpti
return newNode
}
}
return None
None
}
private def visitSink(
sink: StreamPhysicalRel,
sinkRequiredTraits: Seq[UpdateKindTrait]): Option[StreamPhysicalRel] = {
val children = sinkRequiredTraits.flatMap(t => visitChildren(sink, t))
if (children.isEmpty) {
None
} else {
val sinkTrait = sink.getTraitSet.plus(UpdateKindTrait.NONE)
Some(sink.copy(sinkTrait, children.head).asInstanceOf[StreamPhysicalRel])
}
}
}
......
......@@ -20,7 +20,7 @@ package org.apache.flink.table.planner.plan.reuse
import org.apache.flink.table.api.config.OptimizerConfigOptions
import org.apache.flink.table.api.{TableConfig, TableException}
import org.apache.flink.table.planner.plan.nodes.calcite.LegacySink
import org.apache.flink.table.planner.plan.nodes.calcite.{LegacySink, Sink}
import org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalLegacyTableSourceScan
import org.apache.flink.table.planner.plan.nodes.physical.PhysicalLegacyTableSourceScan
import org.apache.flink.table.planner.plan.utils.{DefaultRelShuttle, FlinkRelOptUtil}
......@@ -157,7 +157,7 @@ object SubplanReuser {
// Exchange node can not be reused if its input is reusable disabled
case e: Exchange => isNodeReusableDisabled(e.getInput)
// TableFunctionScan and sink can not be reused
case _: TableFunctionScan | _: LegacySink => true
case _: TableFunctionScan | _: LegacySink | _: Sink => true
case _ => false
}
}
......
......@@ -335,6 +335,7 @@ object FlinkBatchRuleSets {
FlinkLogicalRank.CONVERTER,
FlinkLogicalWindowAggregate.CONVERTER,
FlinkLogicalSnapshot.CONVERTER,
FlinkLogicalSink.CONVERTER,
FlinkLogicalLegacySink.CONVERTER
)
......@@ -423,6 +424,7 @@ object FlinkBatchRuleSets {
BatchExecCorrelateRule.INSTANCE,
BatchExecPythonCorrelateRule.INSTANCE,
// sink
BatchExecSinkRule.INSTANCE,
BatchExecLegacySinkRule.INSTANCE
)
......
......@@ -310,6 +310,7 @@ object FlinkStreamRuleSets {
FlinkLogicalWindowTableAggregate.CONVERTER,
FlinkLogicalSnapshot.CONVERTER,
FlinkLogicalMatch.CONVERTER,
FlinkLogicalSink.CONVERTER,
FlinkLogicalLegacySink.CONVERTER
)
......@@ -406,6 +407,7 @@ object FlinkStreamRuleSets {
StreamExecCorrelateRule.INSTANCE,
StreamExecPythonCorrelateRule.INSTANCE,
// sink
StreamExecSinkRule.INSTANCE,
StreamExecLegacySinkRule.INSTANCE
)
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.rules.physical.batch
import org.apache.flink.table.api.TableException
import org.apache.flink.table.connector.sink.abilities.SupportsPartitioning
import org.apache.flink.table.filesystem.FileSystemTableFactory
import org.apache.flink.table.planner.plan.`trait`.FlinkRelDistribution
import org.apache.flink.table.planner.plan.nodes.FlinkConventions
import org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalSink
import org.apache.flink.table.planner.plan.nodes.physical.batch.BatchExecSink
import org.apache.flink.table.planner.plan.utils.FlinkRelOptUtil
import org.apache.flink.table.types.logical.RowType
import org.apache.calcite.plan.RelOptRule
import org.apache.calcite.rel.convert.ConverterRule
import org.apache.calcite.rel.{RelCollations, RelNode}
import scala.collection.JavaConversions._
class BatchExecSinkRule extends ConverterRule(
classOf[FlinkLogicalSink],
FlinkConventions.LOGICAL,
FlinkConventions.BATCH_PHYSICAL,
"BatchExecSinkRule") {
def convert(rel: RelNode): RelNode = {
val sinkNode = rel.asInstanceOf[FlinkLogicalSink]
val newTrait = rel.getTraitSet.replace(FlinkConventions.BATCH_PHYSICAL)
var requiredTraitSet = sinkNode.getInput.getTraitSet.replace(FlinkConventions.BATCH_PHYSICAL)
if (sinkNode.catalogTable != null && sinkNode.catalogTable.isPartitioned) {
sinkNode.tableSink match {
case partitionSink: SupportsPartitioning =>
partitionSink.applyStaticPartition(sinkNode.staticPartitions)
val dynamicPartFields = sinkNode.catalogTable.getPartitionKeys
.filter(!sinkNode.staticPartitions.contains(_))
val fieldNames = sinkNode.catalogTable
.getSchema
.toPhysicalRowDataType
.getLogicalType.asInstanceOf[RowType]
.getFieldNames
if (dynamicPartFields.nonEmpty) {
val dynamicPartIndices =
dynamicPartFields.map(fieldNames.indexOf(_))
val shuffleEnable = sinkNode
.catalogTable
.getOptions
.get(FileSystemTableFactory.SINK_SHUFFLE_BY_PARTITION.key())
if (shuffleEnable != null && shuffleEnable.toBoolean) {
requiredTraitSet = requiredTraitSet.plus(
FlinkRelDistribution.hash(dynamicPartIndices
.map(Integer.valueOf), requireStrict = false))
}
if (partitionSink.requiresPartitionGrouping(true)) {
// default to asc.
val fieldCollations = dynamicPartIndices.map(FlinkRelOptUtil.ofRelFieldCollation)
requiredTraitSet = requiredTraitSet.plus(RelCollations.of(fieldCollations: _*))
}
}
case _ => throw new TableException(
s"'${sinkNode.tableIdentifier.asSummaryString()}' is a partitioned table, " +
s"but the underlying [${sinkNode.tableSink.asSummaryString()}] DynamicTableSink " +
s"doesn't implement SupportsPartitioning interface.")
}
}
val newInput = RelOptRule.convert(sinkNode.getInput, requiredTraitSet)
new BatchExecSink(
rel.getCluster,
newTrait,
newInput,
sinkNode.tableIdentifier,
sinkNode.catalogTable,
sinkNode.tableSink)
}
}
object BatchExecSinkRule {
val INSTANCE = new BatchExecSinkRule
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.rules.physical.stream
import org.apache.flink.table.api.TableException
import org.apache.flink.table.connector.sink.abilities.SupportsPartitioning
import org.apache.flink.table.filesystem.FileSystemTableFactory
import org.apache.flink.table.planner.plan.`trait`.FlinkRelDistribution
import org.apache.flink.table.planner.plan.nodes.FlinkConventions
import org.apache.flink.table.planner.plan.nodes.logical.FlinkLogicalSink
import org.apache.flink.table.planner.plan.nodes.physical.stream.StreamExecSink
import org.apache.flink.table.types.logical.RowType
import org.apache.calcite.plan.RelOptRule
import org.apache.calcite.rel.RelNode
import org.apache.calcite.rel.convert.ConverterRule
import scala.collection.JavaConversions._
class StreamExecSinkRule extends ConverterRule(
classOf[FlinkLogicalSink],
FlinkConventions.LOGICAL,
FlinkConventions.STREAM_PHYSICAL,
"StreamExecSinkRule") {
def convert(rel: RelNode): RelNode = {
val sinkNode = rel.asInstanceOf[FlinkLogicalSink]
val newTrait = rel.getTraitSet.replace(FlinkConventions.STREAM_PHYSICAL)
var requiredTraitSet = sinkNode.getInput.getTraitSet.replace(FlinkConventions.STREAM_PHYSICAL)
if (sinkNode.catalogTable != null && sinkNode.catalogTable.isPartitioned) {
sinkNode.tableSink match {
case partitionSink: SupportsPartitioning =>
partitionSink.applyStaticPartition(sinkNode.staticPartitions)
val dynamicPartFields = sinkNode.catalogTable.getPartitionKeys
.filter(!sinkNode.staticPartitions.contains(_))
val fieldNames = sinkNode.catalogTable
.getSchema
.toPhysicalRowDataType
.getLogicalType.asInstanceOf[RowType]
.getFieldNames
if (dynamicPartFields.nonEmpty) {
val dynamicPartIndices =
dynamicPartFields.map(fieldNames.indexOf(_))
val shuffleEnable = sinkNode
.catalogTable
.getOptions
.get(FileSystemTableFactory.SINK_SHUFFLE_BY_PARTITION.key())
if (shuffleEnable != null && shuffleEnable.toBoolean) {
requiredTraitSet = requiredTraitSet.plus(
FlinkRelDistribution.hash(dynamicPartIndices
.map(Integer.valueOf), requireStrict = false))
}
if (partitionSink.requiresPartitionGrouping(false)) {
throw new TableException("Partition grouping in stream mode is not supported yet!")
}
}
case _ => throw new TableException(
s"'${sinkNode.tableIdentifier.asSummaryString()}' is a partitioned table, " +
s"but the underlying [${sinkNode.tableSink.asSummaryString()}] DynamicTableSink " +
s"doesn't implement SupportsPartitioning interface.")
}
}
val newInput = RelOptRule.convert(sinkNode.getInput, requiredTraitSet)
new StreamExecSink(
rel.getCluster,
newTrait,
newInput,
sinkNode.tableIdentifier,
sinkNode.catalogTable,
sinkNode.tableSink)
}
}
object StreamExecSinkRule {
val INSTANCE = new StreamExecSinkRule
}
......@@ -33,6 +33,16 @@ import scala.collection.mutable.ArrayBuffer
*/
object ChangelogPlanUtils {
/**
* A [[ChangelogMode]] contains all kinds of [[RowKind]].
*/
val FULL_CHANGELOG_MODE: ChangelogMode = ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build()
/**
* Returns true if the inputs of current node produce insert-only changes.
*
......
......@@ -17,7 +17,7 @@
*/
package org.apache.flink.table.planner.plan.utils
import org.apache.flink.table.planner.plan.nodes.calcite.LegacySink
import org.apache.flink.table.planner.plan.nodes.calcite.{LegacySink, Sink}
import org.apache.flink.table.planner.plan.nodes.exec.{ExecNode, ExecNodeVisitorImpl}
import org.apache.flink.table.planner.plan.nodes.physical.stream.StreamPhysicalRel
......@@ -116,7 +116,8 @@ object ExecNodePlanDumper {
}
val reuseId = reuseInfoBuilder.getReuseId(node)
val isReuseNode = reuseId.isDefined
if (node.isInstanceOf[LegacySink] || (isReuseNode && !reuseInfoMap.containsKey(node))) {
if (node.isInstanceOf[LegacySink] || node.isInstanceOf[Sink] ||
(isReuseNode && !reuseInfoMap.containsKey(node))) {
if (isReuseNode) {
reuseInfoMap.put(node, (reuseId.get, true))
}
......
......@@ -23,6 +23,8 @@ import org.apache.flink.api.java.typeutils.{GenericTypeInfo, PojoTypeInfo, Tuple
import org.apache.flink.api.scala.typeutils.CaseClassTypeInfo
import org.apache.flink.table.api._
import org.apache.flink.table.catalog.{CatalogTable, ObjectIdentifier}
import org.apache.flink.table.connector.sink.DynamicTableSink
import org.apache.flink.table.connector.sink.abilities.{SupportsOverwrite, SupportsPartitioning}
import org.apache.flink.table.data.RowData
import org.apache.flink.table.operations.CatalogSinkModifyOperation
import org.apache.flink.table.planner.calcite.FlinkTypeFactory
......@@ -64,9 +66,12 @@ object TableSinkUtils {
sinkIdentifier: Option[String] = None): RelNode = {
val queryLogicalType = FlinkTypeFactory.toLogicalRowType(query.getRowType)
val sinkDataType = sinkSchema.toRowDataType
val sinkLogicalType = DataTypeUtils
// we recognize legacy decimal is the same to default decimal
.transform(sinkSchema.toRowDataType, legacyDecimalToDefaultDecimal, legacyRawToTypeInfoRaw)
// we ignore NULL constraint, the NULL constraint will be checked during runtime
// see StreamExecSink and BatchExecSink
.transform(sinkDataType, legacyDecimalToDefaultDecimal, legacyRawToTypeInfoRaw, toNullable)
.getLogicalType
.asInstanceOf[RowType]
if (supportsImplicitCast(queryLogicalType, sinkLogicalType)) {
......@@ -144,6 +149,54 @@ object TableSinkUtils {
}
}
/**
* It checks whether the [[DynamicTableSink]] is compatible to the INSERT INTO clause, e.g.
* whether the sink implements [[SupportsOverwrite]] and [[SupportsPartitioning]].
*
* @param sinkOperation The sink operation with the query that is supposed to be written.
* @param sinkIdentifier Tha path of the sink. It is needed just for logging. It does not
* participate in the validation.
* @param sink The sink that we want to write to.
* @param partitionKeys The partition keys of this table.
*/
def validateTableSink(
sinkOperation: CatalogSinkModifyOperation,
sinkIdentifier: ObjectIdentifier,
sink: DynamicTableSink,
partitionKeys: Seq[String]): Unit = {
// check partitions are valid
if (partitionKeys.nonEmpty) {
sink match {
case _: SupportsPartitioning => // pass
case _ => throw new TableException(
s"'${sinkIdentifier.asSummaryString()}' is a partitioned table, " +
s"but the underlying [${sink.asSummaryString()}] DynamicTableSink " +
s"doesn't implement SupportsPartitioning interface.")
}
}
val staticPartitions = sinkOperation.getStaticPartitions
if (staticPartitions != null && !staticPartitions.isEmpty) {
staticPartitions.map(_._1) foreach { p =>
if (!partitionKeys.contains(p)) {
throw new ValidationException(s"Static partition column $p should be in the partition" +
s" fields list $partitionKeys for table '$sinkIdentifier'.")
}
}
}
sink match {
case overwritable: SupportsOverwrite =>
overwritable.applyOverwrite(sinkOperation.isOverwrite)
case _ =>
if (sinkOperation.isOverwrite) {
throw new ValidationException(s"INSERT OVERWRITE requires ${sink.asSummaryString()} " +
"DynamicTableSink to implement SupportsOverwrite interface.")
}
}
}
/**
* Inferences the physical schema of [[TableSink]], the physical schema ignores change flag
* field and normalizes physical types (can be generic type or POJO type) into [[TableSchema]].
......@@ -320,4 +373,15 @@ object TableSinkUtils {
false)
}
}
/**
* Gets the NOT NULL physical field indices on the [[CatalogTable]].
*/
def getNotNullFieldIndices(catalogTable: CatalogTable): Array[Int] = {
val rowType = catalogTable.getSchema.toPhysicalRowDataType.getLogicalType.asInstanceOf[RowType]
val fieldTypes = rowType.getFields.map(_.getType).toArray
fieldTypes.indices.filter { index =>
!fieldTypes(index).isNullable
}.toArray
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.factories;
import org.apache.flink.api.common.io.RichOutputFormat;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.table.connector.sink.DynamicTableSink.DataStructureConverter;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.functions.AsyncTableFunction;
import org.apache.flink.table.functions.FunctionContext;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static org.apache.flink.table.planner.factories.TestValuesTableFactory.RESOURCE_COUNTER;
import static org.apache.flink.util.Preconditions.checkArgument;
/**
* Runtime function implementations for {@link TestValuesTableFactory}.
*/
final class TestValuesRuntimeFunctions {
// [table_name, [task_id, List[value]]]
private static final Map<String, Map<Integer, List<String>>> globalRawResult = new HashMap<>();
// [table_name, [task_id, Map[key, value]]]
private static final Map<String, Map<Integer, Map<String, String>>> globalUpsertResult = new HashMap<>();
// [table_name, [task_id, List[value]]]
private static final Map<String, Map<Integer, List<String>>> globalRetractResult = new HashMap<>();
static List<String> getRawResults(String tableName) {
List<String> result = new ArrayList<>();
if (globalRawResult.containsKey(tableName)) {
globalRawResult.get(tableName)
.values()
.forEach(result::addAll);
} else {
throw new IllegalArgumentException("Can't find result for the table '" + tableName + "'.");
}
return result;
}
static List<String> getResults(String tableName) {
List<String> result = new ArrayList<>();
if (globalUpsertResult.containsKey(tableName)) {
globalUpsertResult.get(tableName)
.values()
.forEach(map -> result.addAll(map.values()));
} else if (globalRetractResult.containsKey(tableName)) {
globalRetractResult.get(tableName)
.values()
.forEach(result::addAll);
} else if (globalRawResult.containsKey(tableName)) {
getRawResults(tableName).stream()
.map(s -> s.substring(3, s.length() - 1)) // removes the +I(...) wrapper
.forEach(result::add);
} else {
throw new IllegalArgumentException("Can't find result for the table '" + tableName + "'.");
}
return result;
}
static void clearResults() {
globalRawResult.clear();
globalUpsertResult.clear();
globalRetractResult.clear();
}
// ------------------------------------------------------------------------------------------
// Sink Function implementations
// ------------------------------------------------------------------------------------------
/**
* The sink implementation is end-to-end exactly once, so that it can be used to check the
* state restoring in streaming sql.
*/
private abstract static class AbstractExactlyOnceSink
extends RichSinkFunction<RowData> implements CheckpointedFunction {
private static final long serialVersionUID = 1L;
protected final String tableName;
protected transient ListState<String> rawResultState;
protected transient List<String> localRawResult;
protected AbstractExactlyOnceSink(String tableName) {
this.tableName = tableName;
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
this.rawResultState = context.getOperatorStateStore().getListState(
new ListStateDescriptor<>("sink-results", Types.STRING));
this.localRawResult = new ArrayList<>();
if (context.isRestored()) {
for (String value : rawResultState.get()) {
localRawResult.add(value);
}
}
int taskId = getRuntimeContext().getIndexOfThisSubtask();
synchronized (TestValuesTableFactory.class) {
globalRawResult
.computeIfAbsent(tableName, k -> new HashMap<>())
.put(taskId, localRawResult);
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
rawResultState.clear();
rawResultState.addAll(localRawResult);
}
}
static class AppendingSinkFunction extends AbstractExactlyOnceSink {
private static final long serialVersionUID = 1L;
private final DataStructureConverter converter;
protected AppendingSinkFunction(String tableName, DataStructureConverter converter) {
super(tableName);
this.converter = converter;
}
@SuppressWarnings("rawtypes")
@Override
public void invoke(RowData value, Context context) throws Exception {
RowKind kind = value.getRowKind();
if (value.getRowKind() == RowKind.INSERT) {
Row row = (Row) converter.toExternal(value);
assert row != null;
localRawResult.add(kind.shortString() + "(" + row.toString() + ")");
} else {
throw new RuntimeException(
"AppendingSinkFunction received " + value.getRowKind() + " messages.");
}
}
}
static class KeyedUpsertingSinkFunction extends AbstractExactlyOnceSink {
private static final long serialVersionUID = 1L;
private final DataStructureConverter converter;
private final int[] keyIndices;
// we store key and value as adjacent elements in the ListState
private transient ListState<String> upsertResultState;
// [key, value] map result
private transient Map<String, String> localUpsertResult;
protected KeyedUpsertingSinkFunction(String tableName, DataStructureConverter converter, int[] keyIndices) {
super(tableName);
this.converter = converter;
this.keyIndices = keyIndices;
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
super.initializeState(context);
this.upsertResultState = context.getOperatorStateStore().getListState(
new ListStateDescriptor<>("sink-upsert-results", Types.STRING));
this.localUpsertResult = new HashMap<>();
if (context.isRestored()) {
String key = null;
String value;
for (String entry : upsertResultState.get()) {
if (key == null) {
key = entry;
} else {
value = entry;
localUpsertResult.put(key, value);
// reset
key = null;
}
}
if (key != null) {
throw new RuntimeException("The upsertResultState is corrupt.");
}
}
int taskId = getRuntimeContext().getIndexOfThisSubtask();
synchronized (TestValuesTableFactory.class) {
globalUpsertResult
.computeIfAbsent(tableName, k -> new HashMap<>())
.put(taskId, localUpsertResult);
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
super.snapshotState(context);
upsertResultState.clear();
for (Map.Entry<String, String> entry : localUpsertResult.entrySet()) {
upsertResultState.add(entry.getKey());
upsertResultState.add(entry.getValue());
}
}
@SuppressWarnings("rawtypes")
@Override
public void invoke(RowData value, Context context) throws Exception {
RowKind kind = value.getRowKind();
Row row = (Row) converter.toExternal(value);
assert row != null;
Row key = Row.project(row, keyIndices);
localRawResult.add(kind.shortString() + "(" + row.toString() + ")");
if (kind == RowKind.INSERT || kind == RowKind.UPDATE_AFTER) {
localUpsertResult.put(key.toString(), row.toString());
} else {
String oldValue = localUpsertResult.remove(key.toString());
if (oldValue == null) {
throw new RuntimeException("Tried to delete a value that wasn't inserted first. " +
"This is probably an incorrectly implemented test.");
}
}
}
}
static class RetractingSinkFunction extends AbstractExactlyOnceSink {
private static final long serialVersionUID = 1L;
private final DataStructureConverter converter;
protected transient ListState<String> retractResultState;
protected transient List<String> localRetractResult;
protected RetractingSinkFunction(String tableName, DataStructureConverter converter) {
super(tableName);
this.converter = converter;
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
super.initializeState(context);
this.retractResultState = context.getOperatorStateStore().getListState(
new ListStateDescriptor<>("sink-retract-results", Types.STRING));
this.localRetractResult = new ArrayList<>();
if (context.isRestored()) {
for (String value : retractResultState.get()) {
localRetractResult.add(value);
}
}
int taskId = getRuntimeContext().getIndexOfThisSubtask();
synchronized (TestValuesTableFactory.class) {
globalRetractResult
.computeIfAbsent(tableName, k -> new HashMap<>())
.put(taskId, localRetractResult);
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
super.snapshotState(context);
retractResultState.clear();
retractResultState.addAll(localRetractResult);
}
@SuppressWarnings("rawtypes")
@Override
public void invoke(RowData value, Context context) throws Exception {
RowKind kind = value.getRowKind();
Row row = (Row) converter.toExternal(value);
assert row != null;
localRawResult.add(kind.shortString() + "(" + row.toString() + ")");
if (kind == RowKind.INSERT || kind == RowKind.UPDATE_AFTER) {
localRetractResult.add(row.toString());
} else {
boolean contains = localRetractResult.remove(row.toString());
if (!contains) {
throw new RuntimeException("Tried to retract a value that wasn't inserted first. " +
"This is probably an incorrectly implemented test.");
}
}
}
}
static class AppendingOutputFormat extends RichOutputFormat<RowData> {
private static final long serialVersionUID = 1L;
private final String tableName;
private final DataStructureConverter converter;
protected transient List<String> localRawResult;
protected AppendingOutputFormat(String tableName, DataStructureConverter converter) {
this.tableName = tableName;
this.converter = converter;
}
@Override
public void configure(Configuration parameters) {
// nothing to do
}
@Override
public void open(int taskNumber, int numTasks) throws IOException {
this.localRawResult = new ArrayList<>();
synchronized (TestValuesTableFactory.class) {
globalRawResult
.computeIfAbsent(tableName, k -> new HashMap<>())
.put(taskNumber, localRawResult);
}
}
@Override
public void writeRecord(RowData value) throws IOException {
RowKind kind = value.getRowKind();
if (value.getRowKind() == RowKind.INSERT) {
Row row = (Row) converter.toExternal(value);
assert row != null;
localRawResult.add(kind.shortString() + "(" + row.toString() + ")");
} else {
throw new RuntimeException(
"AppendingOutputFormat received " + value.getRowKind() + " messages.");
}
}
@Override
public void close() throws IOException {
// nothing to do
}
}
// ------------------------------------------------------------------------------------------
// Lookup Function implementations
// ------------------------------------------------------------------------------------------
/**
* A lookup function which find matched rows with the given fields.
* NOTE: We have to declare it as public because it will be used in code generation.
*/
public static class TestValuesLookupFunction extends TableFunction<Row> {
private static final long serialVersionUID = 1L;
private final Map<Row, List<Row>> data;
private transient boolean isOpenCalled = false;
protected TestValuesLookupFunction(Map<Row, List<Row>> data) {
this.data = data;
}
@Override
public void open(FunctionContext context) throws Exception {
RESOURCE_COUNTER.incrementAndGet();
isOpenCalled = true;
}
public void eval(Object... inputs) {
checkArgument(isOpenCalled, "open() is not called.");
Row key = Row.of(inputs);
if (Arrays.asList(inputs).contains(null)) {
throw new IllegalArgumentException(String.format(
"Lookup key %s contains null value, which should not happen.", key));
}
List<Row> list = data.get(key);
if (list != null) {
list.forEach(this::collect);
}
}
@Override
public void close() throws Exception {
RESOURCE_COUNTER.decrementAndGet();
}
}
/**
* An async lookup function which find matched rows with the given fields.
* NOTE: We have to declare it as public because it will be used in code generation.
*/
public static class AsyncTestValueLookupFunction extends AsyncTableFunction<Row> {
private static final long serialVersionUID = 1L;
private final Map<Row, List<Row>> mapping;
private transient boolean isOpenCalled = false;
private transient ExecutorService executor;
protected AsyncTestValueLookupFunction(Map<Row, List<Row>> mapping) {
this.mapping = mapping;
}
@Override
public void open(FunctionContext context) throws Exception {
RESOURCE_COUNTER.incrementAndGet();
isOpenCalled = true;
executor = Executors.newSingleThreadExecutor();
}
public void eval(CompletableFuture<Collection<Row>> resultFuture, Object... inputs) {
checkArgument(isOpenCalled, "open() is not called.");
final Row key = Row.of(inputs);
if (Arrays.asList(inputs).contains(null)) {
throw new IllegalArgumentException(String.format(
"Lookup key %s contains null value, which should not happen.", key));
}
CompletableFuture
.supplyAsync(() -> {
List<Row> list = mapping.get(key);
if (list == null) {
return Collections.<Row>emptyList();
} else {
return list;
}
}, executor)
.thenAccept(resultFuture::complete);
}
@Override
public void close() throws Exception {
RESOURCE_COUNTER.decrementAndGet();
if (executor != null && !executor.isShutdown()) {
executor.shutdown();
}
}
}
}
......@@ -24,8 +24,13 @@ import org.apache.flink.api.java.io.CollectionInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.ConfigOption;
import org.apache.flink.configuration.ConfigOptions;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.api.functions.source.FromElementsFunction;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.connector.sink.OutputFormatProvider;
import org.apache.flink.table.connector.sink.SinkFunctionProvider;
import org.apache.flink.table.connector.source.AsyncTableFunctionProvider;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.InputFormatProvider;
......@@ -36,13 +41,20 @@ import org.apache.flink.table.connector.source.TableFunctionProvider;
import org.apache.flink.table.connector.source.abilities.SupportsFilterPushDown;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.expressions.ResolvedExpression;
import org.apache.flink.table.factories.DynamicTableSinkFactory;
import org.apache.flink.table.factories.DynamicTableSourceFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.functions.AsyncTableFunction;
import org.apache.flink.table.functions.FunctionContext;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.AppendingOutputFormat;
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.AppendingSinkFunction;
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.AsyncTestValueLookupFunction;
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.KeyedUpsertingSinkFunction;
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.RetractingSinkFunction;
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.TestValuesLookupFunction;
import org.apache.flink.table.planner.utils.JavaScalaConversionUtil;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.utils.TableSchemaUtils;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.FlinkException;
......@@ -60,20 +72,15 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import scala.collection.Seq;
import static org.apache.flink.util.Preconditions.checkArgument;
/**
* Test implementation of {@link DynamicTableSourceFactory} that creates
* a source that produces a sequence of values.
*/
public class TestValuesTableFactory implements DynamicTableSourceFactory {
public final class TestValuesTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory {
// --------------------------------------------------------------------------------------------
// Data Registration
......@@ -122,11 +129,31 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
return registerChangelogData(JavaScalaConversionUtil.toJava(data));
}
/**
* Returns received raw results of the registered table sink.
* The raw results are encoded with {@link RowKind}.
*
* @param tableName the table name of the registered table sink.
*/
public static List<String> getRawResults(String tableName) {
return TestValuesRuntimeFunctions.getRawResults(tableName);
}
/**
* Returns materialized (final) results of the registered table sink.
*
* @param tableName the table name of the registered table sink.
*/
public static List<String> getResults(String tableName) {
return TestValuesRuntimeFunctions.getResults(tableName);
}
/**
* Removes the registered data under the given data id.
*/
public static void clearAllRegisteredData() {
public static void clearAllData() {
registeredData.clear();
TestValuesRuntimeFunctions.clearResults();
}
/**
......@@ -181,6 +208,11 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
.stringType()
.defaultValue("SourceFunction"); // another is "InputFormat"
private static final ConfigOption<String> RUNTIME_SINK = ConfigOptions
.key("runtime-sink")
.stringType()
.defaultValue("SinkFunction"); // another is "OutputFormat"
private static final ConfigOption<String> TABLE_SOURCE_CLASS = ConfigOptions
.key("table-source-class")
.stringType()
......@@ -196,6 +228,11 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
.booleanType()
.defaultValue(false);
private static final ConfigOption<Boolean> SINK_INSERT_ONLY = ConfigOptions
.key("sink-insert-only")
.booleanType()
.defaultValue(true);
@Override
public String factoryIdentifier() {
return IDENTIFIER;
......@@ -236,6 +273,20 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
}
}
@Override
public DynamicTableSink createDynamicTableSink(Context context) {
FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
helper.validate();
boolean isInsertOnly = helper.getOptions().get(SINK_INSERT_ONLY);
String runtimeSink = helper.getOptions().get(RUNTIME_SINK);
TableSchema schema = context.getCatalogTable().getSchema();
return new TestValuesTableSink(
schema,
context.getObjectIdentifier().getObjectName(),
isInsertOnly,
runtimeSink);
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
return Collections.emptySet();
......@@ -250,7 +301,10 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
RUNTIME_SOURCE,
TABLE_SOURCE_CLASS,
LOOKUP_FUNCTION_CLASS,
ASYNC_ENABLED));
ASYNC_ENABLED,
TABLE_SOURCE_CLASS,
SINK_INSERT_ONLY,
RUNTIME_SINK));
}
private ChangelogMode parseChangelogMode(String string) {
......@@ -277,7 +331,7 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
}
// --------------------------------------------------------------------------------------------
// Table source
// Table sources
// --------------------------------------------------------------------------------------------
/**
......@@ -405,93 +459,6 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
}
}
/**
* A lookup function which find matched rows with the given fields.
*/
public static class TestValuesLookupFunction extends TableFunction<Row> {
private static final long serialVersionUID = 1L;
private final Map<Row, List<Row>> data;
private transient boolean isOpenCalled = false;
private TestValuesLookupFunction(Map<Row, List<Row>> data) {
this.data = data;
}
@Override
public void open(FunctionContext context) throws Exception {
RESOURCE_COUNTER.incrementAndGet();
isOpenCalled = true;
}
public void eval(Object... inputs) {
checkArgument(isOpenCalled, "open() is not called.");
Row key = Row.of(inputs);
if (Arrays.asList(inputs).contains(null)) {
throw new IllegalArgumentException(String.format(
"Lookup key %s contains null value, which should not happen.", key));
}
List<Row> list = data.get(key);
if (list != null) {
list.forEach(this::collect);
}
}
@Override
public void close() throws Exception {
RESOURCE_COUNTER.decrementAndGet();
}
}
/**
* An async lookup function which find matched rows with the given fields.
*/
public static class AsyncTestValueLookupFunction extends AsyncTableFunction<Row> {
private static final long serialVersionUID = 1L;
private final Map<Row, List<Row>> mapping;
private transient boolean isOpenCalled = false;
private transient ExecutorService executor;
private AsyncTestValueLookupFunction(Map<Row, List<Row>> mapping) {
this.mapping = mapping;
}
@Override
public void open(FunctionContext context) throws Exception {
RESOURCE_COUNTER.incrementAndGet();
isOpenCalled = true;
executor = Executors.newSingleThreadExecutor();
}
public void eval(CompletableFuture<Collection<Row>> resultFuture, Object... inputs) {
checkArgument(isOpenCalled, "open() is not called.");
final Row key = Row.of(inputs);
if (Arrays.asList(inputs).contains(null)) {
throw new IllegalArgumentException(String.format(
"Lookup key %s contains null value, which should not happen.", key));
}
CompletableFuture
.supplyAsync(() -> {
List<Row> list = mapping.get(key);
if (list == null) {
return Collections.<Row>emptyList();
} else {
return list;
}
}, executor)
.thenAccept(resultFuture::complete);
}
@Override
public void close() throws Exception {
RESOURCE_COUNTER.decrementAndGet();
if (executor != null && !executor.isShutdown()) {
executor.shutdown();
}
}
}
/**
* A mocked {@link LookupTableSource} for validation test.
*/
......@@ -543,4 +510,101 @@ public class TestValuesTableFactory implements DynamicTableSourceFactory {
return null;
}
}
// --------------------------------------------------------------------------------------------
// Table sinks
// --------------------------------------------------------------------------------------------
/**
* Values {@link DynamicTableSink} for testing.
*/
private static class TestValuesTableSink implements DynamicTableSink {
private final TableSchema schema;
private final String tableName;
private final boolean isInsertOnly;
private final String runtimeSink;
private TestValuesTableSink(
TableSchema schema,
String tableName,
boolean isInsertOnly, String runtimeSink) {
this.schema = schema;
this.tableName = tableName;
this.isInsertOnly = isInsertOnly;
this.runtimeSink = runtimeSink;
}
@Override
public ChangelogMode getChangelogMode(ChangelogMode requestedMode) {
if (isInsertOnly) {
return ChangelogMode.insertOnly();
} else {
ChangelogMode.Builder builder = ChangelogMode.newBuilder();
if (schema.getPrimaryKey().isPresent()) {
// can update on key, ignore UPDATE_BEFORE
for (RowKind kind : requestedMode.getContainedKinds()) {
if (kind != RowKind.UPDATE_BEFORE) {
builder.addContainedKind(kind);
}
}
return builder.build();
} else {
// don't have key, works in retract mode
return requestedMode;
}
}
}
@Override
public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
DataStructureConverter converter = context.createDataStructureConverter(schema.toPhysicalRowDataType());
if (isInsertOnly) {
if (runtimeSink.equals("SinkFunction")) {
return SinkFunctionProvider.of(
new AppendingSinkFunction(
tableName,
converter));
} else if (runtimeSink.equals("OutputFormat")) {
return OutputFormatProvider.of(
new AppendingOutputFormat(
tableName,
converter));
} else {
throw new IllegalArgumentException("Unsupported runtime sink class: " + runtimeSink);
}
} else {
// we don't support OutputFormat for updating query in the TestValues connector
assert runtimeSink.equals("SinkFunction");
SinkFunction<RowData> sinkFunction;
if (schema.getPrimaryKey().isPresent()) {
int[] keyIndices = TableSchemaUtils.getPrimaryKeyIndices(schema);
sinkFunction = new KeyedUpsertingSinkFunction(
tableName,
converter,
keyIndices);
} else {
sinkFunction = new RetractingSinkFunction(
tableName,
converter);
}
return SinkFunctionProvider.of(sinkFunction);
}
}
@Override
public DynamicTableSink copy() {
return new TestValuesTableSink(
schema,
tableName,
isInsertOnly,
runtimeSink);
}
@Override
public String asSummaryString() {
return "TestValues";
}
}
}
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to you under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<Root>
<TestCase name="testSingleSink">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.sink], fields=[a])
+- LogicalProject(cnt=[$1])
+- LogicalAggregate(group=[{0}], cnt=[COUNT()])
+- LogicalProject(a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Sink(table=[default_catalog.default_database.sink], fields=[a])
+- Calc(select=[cnt])
+- HashAggregate(isMerge=[true], groupBy=[a], select=[a, Final_COUNT(count1$0) AS cnt])
+- Exchange(distribution=[hash[a]])
+- LocalHashAggregate(groupBy=[a], select=[a, Partial_COUNT(*) AS count1$0])
+- Calc(select=[a])
+- BoundedStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c])
]]>
</Resource>
</TestCase>
<TestCase name="testMultiSinks">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.sink1], fields=[total_sum])
+- LogicalAggregate(group=[{}], total_sum=[SUM($0)])
+- LogicalProject(sum_a=[$1])
+- LogicalAggregate(group=[{0}], sum_a=[SUM($1)])
+- LogicalProject(c=[$2], a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
LogicalSink(table=[default_catalog.default_database.sink2], fields=[total_min])
+- LogicalAggregate(group=[{}], total_min=[MIN($0)])
+- LogicalProject(sum_a=[$1])
+- LogicalAggregate(group=[{0}], sum_a=[SUM($1)])
+- LogicalProject(c=[$2], a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Calc(select=[sum_a], reuse_id=[1])
+- HashAggregate(isMerge=[true], groupBy=[c], select=[c, Final_SUM(sum$0) AS sum_a])
+- Exchange(distribution=[hash[c]])
+- LocalHashAggregate(groupBy=[c], select=[c, Partial_SUM(a) AS sum$0])
+- Calc(select=[c, a])
+- BoundedStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c])
Sink(table=[default_catalog.default_database.sink1], fields=[total_sum])
+- HashAggregate(isMerge=[true], select=[Final_SUM(sum$0) AS total_sum])
+- Exchange(distribution=[single])
+- LocalHashAggregate(select=[Partial_SUM(sum_a) AS sum$0])
+- Reused(reference_id=[1])
Sink(table=[default_catalog.default_database.sink2], fields=[total_min])
+- HashAggregate(isMerge=[true], select=[Final_MIN(min$0) AS total_min])
+- Exchange(distribution=[single])
+- LocalHashAggregate(select=[Partial_MIN(sum_a) AS min$0])
+- Reused(reference_id=[1])
]]>
</Resource>
</TestCase>
</Root>
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to you under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<Root>
<TestCase name="testAppendSink">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.appendSink], fields=[a, b])
+- LogicalProject(EXPR$0=[+($0, $1)], c=[$2])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Sink(table=[default_catalog.default_database.appendSink], fields=[a, b], changelogMode=[NONE])
+- Calc(select=[+(a, b) AS EXPR$0, c], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c], changelogMode=[I])
]]>
</Resource>
</TestCase>
<TestCase name="testAppendUpsertAndRetractSink">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.appendSink], fields=[a, b])
+- LogicalProject(a=[$0], b=[$1])
+- LogicalUnion(all=[true])
:- LogicalProject(a=[$0], b=[$1])
: +- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
+- LogicalProject(d=[$0], e=[$1])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable2]])
LogicalSink(table=[default_catalog.default_database.retractSink], fields=[total_sum])
+- LogicalAggregate(group=[{}], total_sum=[SUM($0)])
+- LogicalProject(a=[$0])
+- LogicalUnion(all=[true])
:- LogicalProject(a=[$0], b=[$1])
: +- LogicalUnion(all=[true])
: :- LogicalProject(a=[$0], b=[$1])
: : +- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
: +- LogicalProject(d=[$0], e=[$1])
: +- LogicalTableScan(table=[[default_catalog, default_database, MyTable2]])
+- LogicalProject(i=[$0], j=[$1])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable3]])
LogicalSink(table=[default_catalog.default_database.upsertSink], fields=[a, total_min])
+- LogicalAggregate(group=[{0}], total_min=[MIN($1)])
+- LogicalUnion(all=[true])
:- LogicalProject(a=[$0], b=[$1])
: +- LogicalUnion(all=[true])
: :- LogicalProject(a=[$0], b=[$1])
: : +- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
: +- LogicalProject(d=[$0], e=[$1])
: +- LogicalTableScan(table=[[default_catalog, default_database, MyTable2]])
+- LogicalProject(i=[$0], j=[$1])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable3]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Union(all=[true], union=[a, b], changelogMode=[I], reuse_id=[1])
:- Calc(select=[a, b], changelogMode=[I])
: +- DataStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c], changelogMode=[I])
+- Calc(select=[d, e], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable2]], fields=[d, e, f], changelogMode=[I])
Sink(table=[default_catalog.default_database.appendSink], fields=[a, b], changelogMode=[NONE])
+- Reused(reference_id=[1])
Union(all=[true], union=[a, b], changelogMode=[I], reuse_id=[2])
:- Reused(reference_id=[1])
+- Calc(select=[i, j], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable3]], fields=[i, j, k], changelogMode=[I])
Sink(table=[default_catalog.default_database.retractSink], fields=[total_sum], changelogMode=[NONE])
+- GroupAggregate(select=[SUM(a) AS total_sum], changelogMode=[I,UB,UA])
+- Exchange(distribution=[single], changelogMode=[I])
+- Calc(select=[a], changelogMode=[I])
+- Reused(reference_id=[2])
Sink(table=[default_catalog.default_database.upsertSink], fields=[a, total_min], changelogMode=[NONE])
+- GroupAggregate(groupBy=[a], select=[a, MIN(b) AS total_min], changelogMode=[I,UA])
+- Exchange(distribution=[hash[a]], changelogMode=[I])
+- Reused(reference_id=[2])
]]>
</Resource>
</TestCase>
<TestCase name="testRetractAndUpsertSink">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.retractSink], fields=[b, cnt])
+- LogicalProject(b=[$0], cnt=[$1])
+- LogicalFilter(condition=[<($0, 4)])
+- LogicalAggregate(group=[{0}], cnt=[COUNT($1)])
+- LogicalProject(b=[$1], a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
LogicalSink(table=[default_catalog.default_database.upsertSink], fields=[b, cnt])
+- LogicalProject(b=[$0], cnt=[$1])
+- LogicalFilter(condition=[AND(>=($0, 4), <($0, 6))])
+- LogicalAggregate(group=[{0}], cnt=[COUNT($1)])
+- LogicalProject(b=[$1], a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
LogicalSink(table=[default_catalog.default_database.upsertSink], fields=[b, cnt])
+- LogicalAggregate(group=[{0}], frequency=[COUNT($1)])
+- LogicalProject(cnt=[$1], b=[$0])
+- LogicalFilter(condition=[<($0, 4)])
+- LogicalAggregate(group=[{0}], cnt=[COUNT($1)])
+- LogicalProject(b=[$1], a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
GroupAggregate(groupBy=[b], select=[b, COUNT(a) AS cnt], changelogMode=[I,UB,UA], reuse_id=[1])
+- Exchange(distribution=[hash[b]], changelogMode=[I])
+- Calc(select=[b, a], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c], changelogMode=[I])
Calc(select=[b, cnt], where=[<(b, 4)], changelogMode=[I,UB,UA], reuse_id=[2])
+- Reused(reference_id=[1])
Sink(table=[default_catalog.default_database.retractSink], fields=[b, cnt], changelogMode=[NONE])
+- Reused(reference_id=[2])
Sink(table=[default_catalog.default_database.upsertSink], fields=[b, cnt], changelogMode=[NONE])
+- Calc(select=[b, cnt], where=[AND(>=(b, 4), <(b, 6))], changelogMode=[I,UB,UA])
+- Reused(reference_id=[1])
Sink(table=[default_catalog.default_database.upsertSink], fields=[b, cnt], changelogMode=[NONE])
+- GroupAggregate(groupBy=[cnt], select=[cnt, COUNT_RETRACT(b) AS frequency], changelogMode=[I,UA,D])
+- Exchange(distribution=[hash[cnt]], changelogMode=[I,UB,UA])
+- Reused(reference_id=[2])
]]>
</Resource>
</TestCase>
<TestCase name="testUpsertSink">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.upsertSink], fields=[a, cnt])
+- LogicalAggregate(group=[{0}], cnt=[COUNT()])
+- LogicalProject(a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Sink(table=[default_catalog.default_database.upsertSink], fields=[a, cnt], changelogMode=[NONE])
+- GroupAggregate(groupBy=[a], select=[a, COUNT(*) AS cnt], changelogMode=[I,UA])
+- Exchange(distribution=[hash[a]], changelogMode=[I])
+- Calc(select=[a], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c], changelogMode=[I])
]]>
</Resource>
</TestCase>
<TestCase name="testRetractSink1">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.retractSink], fields=[a, cnt])
+- LogicalAggregate(group=[{0}], cnt=[COUNT()])
+- LogicalProject(a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Sink(table=[default_catalog.default_database.retractSink], fields=[a, cnt], changelogMode=[NONE])
+- GroupAggregate(groupBy=[a], select=[a, COUNT(*) AS cnt], changelogMode=[I,UB,UA])
+- Exchange(distribution=[hash[a]], changelogMode=[I])
+- Calc(select=[a], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c], changelogMode=[I])
]]>
</Resource>
</TestCase>
<TestCase name="testRetractSink2">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.retractSink], fields=[cnt, a])
+- LogicalAggregate(group=[{0}], a=[COUNT($1)])
+- LogicalProject(cnt=[$1], a=[$0])
+- LogicalAggregate(group=[{0}], cnt=[COUNT()])
+- LogicalProject(a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Sink(table=[default_catalog.default_database.retractSink], fields=[cnt, a], changelogMode=[NONE])
+- GroupAggregate(groupBy=[cnt], select=[cnt, COUNT_RETRACT(a) AS a], changelogMode=[I,UB,UA,D])
+- Exchange(distribution=[hash[cnt]], changelogMode=[I,UB,UA])
+- GroupAggregate(groupBy=[a], select=[a, COUNT(*) AS cnt], changelogMode=[I,UB,UA])
+- Exchange(distribution=[hash[a]], changelogMode=[I])
+- Calc(select=[a], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c], changelogMode=[I])
]]>
</Resource>
</TestCase>
<TestCase name="testUpsertSinkWithFilter">
<Resource name="planBefore">
<![CDATA[
LogicalSink(table=[default_catalog.default_database.upsertSink], fields=[a, cnt])
+- LogicalProject(a=[$0], cnt=[$1])
+- LogicalFilter(condition=[<($1, 10)])
+- LogicalAggregate(group=[{0}], cnt=[COUNT()])
+- LogicalProject(a=[$0])
+- LogicalTableScan(table=[[default_catalog, default_database, MyTable]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
Sink(table=[default_catalog.default_database.upsertSink], fields=[a, cnt], changelogMode=[NONE])
+- Calc(select=[a, cnt], where=[<(cnt, 10)], changelogMode=[I,UB,UA])
+- GroupAggregate(groupBy=[a], select=[a, COUNT(*) AS cnt], changelogMode=[I,UB,UA])
+- Exchange(distribution=[hash[a]], changelogMode=[I])
+- Calc(select=[a], changelogMode=[I])
+- DataStreamScan(table=[[default_catalog, default_database, MyTable]], fields=[a, b, c], changelogMode=[I])
]]>
</Resource>
</TestCase>
</Root>
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.batch.sql
import org.apache.flink.api.scala._
import org.apache.flink.table.api.scala._
import org.apache.flink.table.planner.plan.optimize.RelNodeBlockPlanBuilder
import org.apache.flink.table.planner.utils.TableTestBase
import org.apache.flink.table.types.logical.{BigIntType, IntType}
import org.junit.Test
class TableSinkTest extends TableTestBase {
val LONG = new BigIntType()
val INT = new IntType()
private val util = batchTestUtil()
util.addDataStream[(Int, Long, String)]("MyTable", 'a, 'b, 'c)
@Test
def testSingleSink(): Unit = {
util.addTable(
s"""
|CREATE TABLE sink (
| `a` BIGINT
|) WITH (
| 'connector' = 'values'
|)
|""".stripMargin)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql("INSERT INTO sink SELECT COUNT(*) AS cnt FROM MyTable GROUP BY a")
util.verifyPlan(stmtSet)
}
@Test
def testMultiSinks(): Unit = {
util.addTable(
s"""
|CREATE TABLE sink1 (
| `total_sum` INT
|) WITH (
| 'connector' = 'values'
|)
|""".stripMargin)
util.addTable(
s"""
|CREATE TABLE sink2 (
| `total_min` INT
|) WITH (
| 'connector' = 'values'
|)
|""".stripMargin)
util.tableEnv.getConfig.getConfiguration.setBoolean(
RelNodeBlockPlanBuilder.TABLE_OPTIMIZER_REUSE_OPTIMIZE_BLOCK_WITH_DIGEST_ENABLED, true)
val table1 = util.tableEnv.sqlQuery("SELECT SUM(a) AS sum_a, c FROM MyTable GROUP BY c")
util.tableEnv.createTemporaryView("table1", table1)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql("INSERT INTO sink1 SELECT SUM(sum_a) AS total_sum FROM table1")
stmtSet.addInsertSql("INSERT INTO sink2 SELECT MIN(sum_a) AS total_min FROM table1")
util.verifyPlan(stmtSet)
}
}
......@@ -24,7 +24,7 @@ import org.apache.flink.table.catalog.{CatalogViewImpl, ObjectPath}
import org.apache.flink.table.planner.JHashMap
import org.apache.flink.table.planner.plan.hint.OptionsHintTest.{IS_BOUNDED, Param}
import org.apache.flink.table.planner.plan.nodes.calcite.LogicalLegacySink
import org.apache.flink.table.planner.utils.{OptionsTableSink, TableTestBase, TableTestUtil, TestingStatementSet, TestingTableEnvironment}
import org.apache.flink.table.planner.utils.{OptionsTableSink, TableTestBase, TableTestUtil, TestingStatementSet}
import org.hamcrest.Matchers._
import org.junit.Assert.{assertEquals, assertThat}
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.plan.stream.sql
import org.apache.flink.api.scala._
import org.apache.flink.table.api.scala._
import org.apache.flink.table.api.{DataTypes, ExplainDetail, TableException}
import org.apache.flink.table.planner.utils.TableTestBase
import org.apache.flink.table.types.logical.LogicalType
import org.junit.Test
class TableSinkTest extends TableTestBase {
private val util = streamTestUtil()
util.addDataStream[(Int, Long, String)]("MyTable", 'a, 'b, 'c)
val STRING: LogicalType = DataTypes.STRING().getLogicalType
val LONG: LogicalType = DataTypes.BIGINT().getLogicalType
val INT: LogicalType = DataTypes.INT().getLogicalType
@Test
def testExceptionForAppendSink(): Unit = {
util.addTable(
s"""
|CREATE TABLE appendSink (
| `a` BIGINT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true'
|)
|""".stripMargin)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql(
"INSERT INTO appendSink SELECT COUNT(*) AS cnt FROM MyTable GROUP BY a")
thrown.expect(classOf[TableException])
thrown.expectMessage("Table sink 'default_catalog.default_database.appendSink' doesn't " +
"support consuming update changes which is produced by node " +
"GroupAggregate(groupBy=[a], select=[a, COUNT(*) AS cnt])")
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testExceptionForOverAggregate(): Unit = {
util.addTable(
s"""
|CREATE TABLE retractSink1 (
| `cnt` BIGINT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
util.addTable(
s"""
|CREATE TABLE retractSink2 (
| `cnt` BIGINT,
| `total` BIGINT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
val table = util.tableEnv.sqlQuery("SELECT COUNT(*) AS cnt FROM MyTable GROUP BY a")
util.tableEnv.createTemporaryView("TempTable", table)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql("INSERT INTO retractSink1 SELECT * FROM TempTable")
stmtSet.addInsertSql(
"INSERT INTO retractSink2 SELECT cnt, SUM(cnt) OVER (ORDER BY PROCTIME()) FROM TempTable")
thrown.expect(classOf[TableException])
thrown.expectMessage("OverAggregate doesn't support consuming update changes " +
"which is produced by node GroupAggregate(groupBy=[a], select=[a, COUNT(*) AS cnt])")
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testAppendSink(): Unit = {
util.addTable(
s"""
|CREATE TABLE appendSink (
| `a` BIGINT,
| `b` STRING
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true'
|)
|""".stripMargin)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql("INSERT INTO appendSink SELECT a + b, c FROM MyTable")
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testRetractSink1(): Unit = {
util.addTable(
s"""
|CREATE TABLE retractSink (
| `a` INT,
| `cnt` BIGINT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql(
"INSERT INTO retractSink SELECT a, COUNT(*) AS cnt FROM MyTable GROUP BY a")
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testRetractSink2(): Unit = {
util.addTable(
s"""
|CREATE TABLE retractSink (
| `cnt` BIGINT,
| `a` BIGINT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
val dml =
"""
|INSERT INTO retractSink
|SELECT cnt, COUNT(a) AS a FROM (
| SELECT a, COUNT(*) AS cnt FROM MyTable GROUP BY a) t
|GROUP BY cnt
""".stripMargin
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql(dml)
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testUpsertSink(): Unit = {
util.addTable(
s"""
|CREATE TABLE upsertSink (
| `a` INT,
| `cnt` BIGINT,
| PRIMARY KEY (a) NOT ENFORCED
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql(
"INSERT INTO upsertSink SELECT a, COUNT(*) AS cnt FROM MyTable GROUP BY a")
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testUpsertSinkWithFilter(): Unit = {
util.addTable(
s"""
|CREATE TABLE upsertSink (
| `a` INT,
| `cnt` BIGINT,
| PRIMARY KEY (a) NOT ENFORCED
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
val sql =
"""
|INSERT INTO upsertSink
|SELECT *
|FROM (SELECT a, COUNT(*) AS cnt FROM MyTable GROUP BY a)
|WHERE cnt < 10
|""".stripMargin
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql(sql)
// a filter after aggregation, the Aggregation and Calc should produce UPDATE_BEFORE
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testRetractAndUpsertSink(): Unit = {
util.addTable(
s"""
|CREATE TABLE retractSink (
| `b` BIGINT,
| `cnt` BIGINT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
util.addTable(
s"""
|CREATE TABLE upsertSink (
| `b` BIGINT,
| `cnt` BIGINT,
| PRIMARY KEY (b) NOT ENFORCED
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
val table = util.tableEnv.sqlQuery("SELECT b, COUNT(a) AS cnt FROM MyTable GROUP BY b")
util.tableEnv.createTemporaryView("TempTable", table)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql(
"INSERT INTO retractSink SELECT b, cnt FROM TempTable WHERE b < 4")
stmtSet.addInsertSql(
"INSERT INTO upsertSink SELECT b, cnt FROM TempTable WHERE b >= 4 AND b < 6")
stmtSet.addInsertSql(
"INSERT INTO upsertSink " +
"SELECT cnt, COUNT(b) AS frequency FROM TempTable WHERE b < 4 GROUP BY cnt")
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
@Test
def testAppendUpsertAndRetractSink(): Unit = {
util.addDataStream[(Int, Long, String)]("MyTable2", 'd, 'e, 'f)
util.addDataStream[(Int, Long, String)]("MyTable3", 'i, 'j, 'k)
util.addTable(
s"""
|CREATE TABLE appendSink (
| `a` INT,
| `b` BIGINT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true'
|)
|""".stripMargin)
val table = util.tableEnv.sqlQuery(
"SELECT a, b FROM MyTable UNION ALL SELECT d, e FROM MyTable2")
util.tableEnv.createTemporaryView("TempTable", table)
val stmtSet = util.tableEnv.createStatementSet()
stmtSet.addInsertSql("INSERT INTO appendSink SELECT * FROM TempTable")
util.addTable(
s"""
|CREATE TABLE retractSink (
| `total_sum` INT
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
val table1 = util.tableEnv.sqlQuery(
"SELECT a, b FROM TempTable UNION ALL SELECT i, j FROM MyTable3")
util.tableEnv.createTemporaryView("TempTable1", table1)
stmtSet.addInsertSql("INSERT INTO retractSink SELECT SUM(a) AS total_sum FROM TempTable1")
util.addTable(
s"""
|CREATE TABLE upsertSink (
| `a` INT,
| `total_min` BIGINT,
| PRIMARY KEY (a) NOT ENFORCED
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'false'
|)
|""".stripMargin)
stmtSet.addInsertSql(
"INSERT INTO upsertSink SELECT a, MIN(b) AS total_min FROM TempTable1 GROUP BY a")
util.verifyPlan(stmtSet, ExplainDetail.CHANGELOG_MODE)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.planner.runtime.batch.table
import org.apache.flink.table.api.DataTypes
import org.apache.flink.table.api.scala._
import org.apache.flink.table.planner.factories.TestValuesTableFactory
import org.apache.flink.table.planner.runtime.utils.BatchTestBase
import org.apache.flink.table.planner.runtime.utils.TestData._
import org.apache.flink.util.ExceptionUtils
import org.junit.Assert.{assertEquals, assertTrue, fail}
import org.junit.Test
import scala.collection.JavaConversions._
class TableSinkITCase extends BatchTestBase {
@Test
def testDecimalOnOutputFormatTableSink(): Unit = {
tEnv.executeSql(
s"""
|CREATE TABLE sink (
| `c` VARCHAR(5),
| `b` DECIMAL(10, 0),
| `d` CHAR(5)
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true',
| 'runtime-sink' = 'OutputFormat'
|)
|""".stripMargin)
registerCollection("MyTable", data3, type3, "a, b, c", nullablesOfData3)
tEnv.from("MyTable")
.where('a > 20)
.select("12345", 55.cast(DataTypes.DECIMAL(10, 0)), "12345".cast(DataTypes.CHAR(5)))
.insertInto("sink")
tEnv.execute("job name")
val result = TestValuesTableFactory.getResults("sink")
val expected = Seq("12345,55,12345")
assertEquals(expected.sorted, result.sorted)
}
@Test
def testDecimalOnSinkFunctionTableSink(): Unit = {
tEnv.executeSql(
s"""
|CREATE TABLE sink (
| `c` VARCHAR(5),
| `b` DECIMAL(10, 0),
| `d` CHAR(5)
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true'
|)
|""".stripMargin)
registerCollection("MyTable", data3, type3, "a, b, c", nullablesOfData3)
tEnv.from("MyTable")
.where('a > 20)
.select("12345", 55.cast(DataTypes.DECIMAL(10, 0)), "12345".cast(DataTypes.CHAR(5)))
.insertInto("sink")
tEnv.execute("job name")
val result = TestValuesTableFactory.getResults("sink")
val expected = Seq("12345,55,12345")
assertEquals(expected.sorted, result.sorted)
}
@Test
def testSinkWithKey(): Unit = {
tEnv.executeSql(
s"""
|CREATE TABLE testSink (
| `a` INT,
| `b` DOUBLE,
| PRIMARY KEY (a) NOT ENFORCED
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true'
|)
|""".stripMargin)
registerCollection("MyTable", simpleData2, simpleType2, "a, b", nullableOfSimpleData2)
tEnv.from("MyTable")
.groupBy('a)
.select('a, 'b.sum())
.insertInto("testSink")
tEnv.execute("")
val result = TestValuesTableFactory.getResults("testSink")
val expected = List(
"1,0.1",
"2,0.4",
"3,1.0",
"4,2.2",
"5,3.9")
assertEquals(expected.sorted, result.sorted)
}
@Test
def testSinkWithoutKey(): Unit = {
tEnv.executeSql(
s"""
|CREATE TABLE testSink (
| `a` INT,
| `b` DOUBLE
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true'
|)
|""".stripMargin)
registerCollection("MyTable", simpleData2, simpleType2, "a, b", nullableOfSimpleData2)
tEnv.from("MyTable")
.groupBy('a)
.select('a, 'b.sum())
.insertInto("testSink")
tEnv.execute("")
val result = TestValuesTableFactory.getResults("testSink")
val expected = List(
"1,0.1",
"2,0.4",
"3,1.0",
"4,2.2",
"5,3.9")
assertEquals(expected.sorted, result.sorted)
}
@Test
def testNotNullEnforcer(): Unit = {
val dataId = TestValuesTableFactory.registerData(nullData4)
tEnv.executeSql(
s"""
|CREATE TABLE nullable_src (
| category STRING,
| shopId INT,
| num INT
|) WITH (
| 'connector' = 'values',
| 'data-id' = '$dataId',
| 'bounded' = 'true'
|)
|""".stripMargin)
tEnv.executeSql(
s"""
|CREATE TABLE not_null_sink (
| category STRING,
| shopId INT,
| num INT NOT NULL
|) WITH (
| 'connector' = 'values',
| 'sink-insert-only' = 'true'
|)
|""".stripMargin)
tEnv.sqlUpdate("INSERT INTO not_null_sink SELECT * FROM nullable_src")
// default should fail, because there are null values in the source
try {
tEnv.execute("job name")
fail("Execution should fail.")
} catch {
case t: Throwable =>
val exception = ExceptionUtils.findThrowableWithMessage(
t,
"Column 'num' is NOT NULL, however, a null value is being written into it. " +
"You can set job configuration 'table.exec.sink.not-null-enforcer'='drop' " +
"to suppress this exception and drop such records silently.")
assertTrue(exception.isPresent)
}
// enable drop enforcer to make the query can run
tEnv.getConfig.getConfiguration.setString("table.exec.sink.not-null-enforcer", "drop")
tEnv.sqlUpdate("INSERT INTO not_null_sink SELECT * FROM nullable_src")
tEnv.execute("job name")
val result = TestValuesTableFactory.getResults("not_null_sink")
val expected = List("book,1,12", "book,4,11", "fruit,3,44")
assertEquals(expected.sorted, result.sorted)
}
}
......@@ -81,7 +81,7 @@ class BatchTestBase extends BatchAbstractTestBase {
@After
def after(): Unit = {
TestValuesTableFactory.clearAllRegisteredData()
TestValuesTableFactory.clearAllData()
}
/**
......
......@@ -62,7 +62,7 @@ class StreamingTestBase extends AbstractTestBase {
@After
def after(): Unit = {
StreamTestSink.clear()
TestValuesTableFactory.clearAllRegisteredData()
TestValuesTableFactory.clearAllData()
}
/**
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.runtime.connector.sink;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.data.util.DataFormatConverters.DataFormatConverter;
import javax.annotation.Nullable;
/**
* This class wraps a {@link DataFormatConverter} so it can be used in
* a {@link DynamicTableSink} as a {@link DynamicTableSink.DataStructureConverter}.
*/
public final class DataFormatConverterWrapper implements DynamicTableSink.DataStructureConverter {
private static final long serialVersionUID = 1L;
private final DataFormatConverter<Object, Object> formatConverter;
public DataFormatConverterWrapper(DataFormatConverter<Object, Object> formatConverter) {
this.formatConverter = formatConverter;
}
@Override
public void open(Context context) {
// do nothing
}
@Nullable
@Override
public Object toExternal(@Nullable Object internalStructure) {
return formatConverter.toExternal(internalStructure);
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.runtime.connector.sink;
import org.apache.flink.table.connector.sink.DynamicTableSink;
import org.apache.flink.table.data.util.DataFormatConverters;
import org.apache.flink.table.types.DataType;
import static org.apache.flink.table.data.util.DataFormatConverters.getConverterForDataType;
/**
* Implementation of {@link DynamicTableSink.Context}. Currently we delegate
* {@link #createDataStructureConverter} to {@link DataFormatConverters.DataFormatConverter}.
*
* <p>In the future, we can code generate the implementation of {@link #createDataStructureConverter}
* for better performance.
*/
public class SinkRuntimeProviderContext implements DynamicTableSink.Context {
private final boolean isBounded;
public SinkRuntimeProviderContext(boolean isBounded) {
this.isBounded = isBounded;
}
@Override
public boolean isBounded() {
return isBounded;
}
@SuppressWarnings("unchecked")
@Override
public DynamicTableSink.DataStructureConverter createDataStructureConverter(DataType consumedDataType) {
DataFormatConverters.DataFormatConverter<Object, Object> converter = getConverterForDataType(consumedDataType);
return new DataFormatConverterWrapper(converter);
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.runtime.operators.sink;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator;
import org.apache.flink.streaming.api.operators.ChainingStrategy;
import org.apache.flink.streaming.api.operators.InternalTimerService;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.api.operators.StreamOperator;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.config.ExecutionConfigOptions;
import org.apache.flink.table.api.config.ExecutionConfigOptions.NotNullEnforcer;
import org.apache.flink.table.data.RowData;
/**
* A {@link StreamOperator} for executing {@link SinkFunction SinkFunctions}. This operator
* also checks writing null values into NOT NULL columns.
*/
public class SinkOperator extends AbstractUdfStreamOperator<Object, SinkFunction<RowData>>
implements OneInputStreamOperator<RowData, Object> {
private static final long serialVersionUID = 1L;
private final int rowtimeFieldIndex;
private final int[] notNullFieldIndices;
private final String[] allFieldNames;
private final NotNullEnforcer notNullEnforcer;
private final boolean notNullCheck;
private transient SimpleContext sinkContext;
/** We listen to this ourselves because we don't have an {@link InternalTimerService}. */
private long currentWatermark = Long.MIN_VALUE;
public SinkOperator(
SinkFunction<RowData> sinkFunction,
int rowtimeFieldIndex,
NotNullEnforcer notNullEnforcer,
int[] notNullFieldIndices,
String[] allFieldNames) {
super(sinkFunction);
this.rowtimeFieldIndex = rowtimeFieldIndex;
this.notNullFieldIndices = notNullFieldIndices;
this.notNullEnforcer = notNullEnforcer;
this.notNullCheck = notNullFieldIndices.length > 0;
this.allFieldNames = allFieldNames;
chainingStrategy = ChainingStrategy.ALWAYS;
}
@Override
public void open() throws Exception {
super.open();
this.sinkContext = new SimpleContext(getProcessingTimeService());
}
@Override
public void processElement(StreamRecord<RowData> element) throws Exception {
sinkContext.element = element;
RowData row = element.getValue();
if (notNullCheck) {
if (failOrFilterNullValues(row)) {
return;
}
}
userFunction.invoke(row, sinkContext);
}
private boolean failOrFilterNullValues(RowData row) {
for (int index : notNullFieldIndices) {
if (row.isNullAt(index)) {
if (notNullEnforcer == NotNullEnforcer.ERROR) {
String optionKey = ExecutionConfigOptions.TABLE_EXEC_SINK_NOT_NULL_ENFORCER.key();
throw new TableException(
String.format("Column '%s' is NOT NULL, however, a null value is being written into it. " +
"You can set job configuration '" + optionKey + "'='drop' " +
"to suppress this exception and drop such records silently.", allFieldNames[index]));
} else {
// simply drop the record
return true;
}
}
}
return false;
}
@Override
protected void reportOrForwardLatencyMarker(LatencyMarker marker) {
// all operators are tracking latencies
this.latencyStats.reportLatency(marker);
// sinks don't forward latency markers
}
@Override
public void processWatermark(Watermark mark) throws Exception {
super.processWatermark(mark);
this.currentWatermark = mark.getTimestamp();
}
private class SimpleContext implements SinkFunction.Context<RowData> {
private StreamRecord<RowData> element;
private final ProcessingTimeService processingTimeService;
public SimpleContext(ProcessingTimeService processingTimeService) {
this.processingTimeService = processingTimeService;
}
@Override
public long currentProcessingTime() {
return processingTimeService.getCurrentProcessingTime();
}
@Override
public long currentWatermark() {
return currentWatermark;
}
@Override
public Long timestamp() {
if (rowtimeFieldIndex > 0) {
return element.getValue().getLong(rowtimeFieldIndex);
}
return null;
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册