提交 be72c247 编写于 作者: A Alexey Milovidov

Simplification of Nullable [#CLICKHOUSE-2]

上级 f5a06cee
......@@ -401,7 +401,6 @@ void Connection::sendData(const Block & block, const String & name)
size_t prev_bytes = out->count();
block.checkNestedArraysOffsets();
block_out->write(block);
maybe_compressed_out->next();
out->next();
......
......@@ -85,6 +85,12 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override;
void forEachSubcolumn(ColumnCallback callback) override
{
callback(offsets);
callback(data);
}
private:
ColumnPtr data;
ColumnPtr offsets; /// Displacements can be shared across multiple columns - to implement nested data structures.
......
......@@ -243,6 +243,11 @@ public:
data->getExtremes(min, max);
}
void forEachSubcolumn(ColumnCallback callback) override
{
callback(data);
}
/// Not part of the common interface.
......
......@@ -68,6 +68,12 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override;
void forEachSubcolumn(ColumnCallback callback) override
{
callback(nested_column);
callback(null_map);
}
/// Return the column that represents values.
ColumnPtr & getNestedColumn() { return nested_column; }
const ColumnPtr & getNestedColumn() const { return nested_column; }
......
......@@ -299,5 +299,12 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const
columns[i]->getExtremes(min_backend[i], max_backend[i]);
}
void ColumnTuple::forEachSubcolumn(ColumnCallback callback)
{
for (auto & column : columns)
callback(column);
}
}
......@@ -59,6 +59,7 @@ public:
size_t byteSize() const override;
size_t allocatedBytes() const override;
ColumnPtr convertToFullColumnIfConst() const override;
void forEachSubcolumn(ColumnCallback callback) override;
const Block & getData() const { return data; }
Block & getData() { return data; }
......
......@@ -263,6 +263,11 @@ public:
/// Zero, if could be determined.
virtual size_t allocatedBytes() const = 0;
/// If the column contains subcolumns (such as Array, Nullable, etc), enumerate them.
/// Shallow: doesn't do recursive calls.
using ColumnCallback = std::function<void(ColumnPtr&)>;
virtual void forEachSubcolumn(ColumnCallback) {}
virtual ~IColumn() {}
protected:
......
......@@ -2,13 +2,6 @@
#include <Core/Block.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/typeid_cast.h>
......@@ -16,6 +9,7 @@
#include <iterator>
#include <memory>
namespace DB
{
......@@ -28,58 +22,6 @@ namespace ErrorCodes
}
void Block::addDefaults(const NamesAndTypesList & required_columns)
{
/// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths.
/// First, remember the offset columns for all arrays in the block.
std::map<String, ColumnPtr> offset_columns;
for (const auto & elem : data)
{
if (const ColumnArray * array = typeid_cast<const ColumnArray *>(&*elem.column))
{
String offsets_name = DataTypeNested::extractNestedTableName(elem.name);
auto & offsets_column = offset_columns[offsets_name];
/// If for some reason there are different displacement columns for one nested structure, then we take nonempty.
if (!offsets_column || offsets_column->empty())
offsets_column = array->getOffsetsColumn();
}
}
for (const auto & requested_column : required_columns)
{
if (has(requested_column.name))
continue;
ColumnWithTypeAndName column_to_add;
column_to_add.name = requested_column.name;
column_to_add.type = requested_column.type;
String offsets_name = DataTypeNested::extractNestedTableName(column_to_add.name);
if (offset_columns.count(offsets_name))
{
ColumnPtr offsets_column = offset_columns[offsets_name];
DataTypePtr nested_type = typeid_cast<DataTypeArray &>(*column_to_add.type).getNestedType();
size_t nested_rows = offsets_column->empty() ? 0
: typeid_cast<ColumnUInt64 &>(*offsets_column).getData().back();
ColumnPtr nested_column = nested_type->createConstColumn(nested_rows, nested_type->getDefault())->convertToFullColumnIfConst();
column_to_add.column = std::make_shared<ColumnArray>(nested_column, offsets_column);
}
else
{
/** It is necessary to turn a constant column into a full column, since in part of blocks (from other parts),
* it can be full (or the interpreter may decide that it is constant everywhere).
*/
column_to_add.column = column_to_add.type->createConstColumn(rows(), column_to_add.type->getDefault())->convertToFullColumnIfConst();
}
insert(std::move(column_to_add));
}
}
Block::Block(std::initializer_list<ColumnWithTypeAndName> il) : data{il}
{
initializeIndexByName();
......@@ -386,59 +328,6 @@ NamesAndTypesList Block::getColumnsList() const
}
void Block::checkNestedArraysOffsets() const
{
/// Pointers to array columns, to check the equality of offset columns in nested data structures
using ArrayColumns = std::map<String, const ColumnArray *>;
ArrayColumns array_columns;
for (const auto & elem : data)
{
if (const ColumnArray * column_array = typeid_cast<const ColumnArray *>(elem.column.get()))
{
String name = DataTypeNested::extractNestedTableName(elem.name);
ArrayColumns::const_iterator it = array_columns.find(name);
if (array_columns.end() == it)
array_columns[name] = column_array;
else
{
if (!it->second->hasEqualOffsets(*column_array))
throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
}
}
}
}
void Block::optimizeNestedArraysOffsets()
{
/// Pointers to array columns, to check the equality of offset columns in nested data structures
using ArrayColumns = std::map<String, ColumnArray *>;
ArrayColumns array_columns;
for (auto & elem : data)
{
if (ColumnArray * column_array = typeid_cast<ColumnArray *>(elem.column.get()))
{
String name = DataTypeNested::extractNestedTableName(elem.name);
ArrayColumns::const_iterator it = array_columns.find(name);
if (array_columns.end() == it)
array_columns[name] = column_array;
else
{
if (!it->second->hasEqualOffsets(*column_array))
throw Exception("Sizes of nested arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
/// make columns of arrays offsets inside one nested table point to the same place
column_array->getOffsetsColumn() = it->second->getOffsetsColumn();
}
}
}
}
bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs)
{
size_t columns = lhs.columns();
......@@ -543,53 +432,17 @@ void Block::unshareColumns()
{
std::unordered_set<void*> pointers;
for (auto & elem : data)
IColumn::ColumnCallback callback = [&](ColumnPtr & subcolumn)
{
if (!pointers.insert(elem.column.get()).second)
{
elem.column = elem.column->clone();
}
else if (ColumnArray * arr = typeid_cast<ColumnArray *>(elem.column.get()))
{
ColumnPtr & offsets = arr->getOffsetsColumn();
if (!pointers.insert(offsets.get()).second)
offsets = offsets->clone();
if (!pointers.insert(subcolumn.get()).second)
subcolumn = subcolumn->clone();
subcolumn->forEachSubcolumn(callback);
};
ColumnPtr & nested = arr->getDataPtr();
if (!pointers.insert(nested.get()).second)
nested = nested->clone();
}
else if (ColumnTuple * tuple = typeid_cast<ColumnTuple *>(elem.column.get()))
{
Block & tuple_block = tuple->getData();
Columns & tuple_columns = tuple->getColumns();
size_t size = tuple_block.columns();
for (size_t i = 0; i < size; ++i)
{
if (!pointers.insert(tuple_columns[i].get()).second)
{
tuple_columns[i] = tuple_columns[i]->clone();
tuple_block.getByPosition(i).column = tuple_columns[i];
}
}
}
else if (ColumnNullable * nullable = typeid_cast<ColumnNullable *>(elem.column.get()))
{
ColumnPtr & null_map = nullable->getNullMapColumn();
if (!pointers.insert(null_map.get()).second)
null_map = null_map->clone();
ColumnPtr & nested = nullable->getNestedColumn();
if (!pointers.insert(nested.get()).second)
nested = nested->clone();
}
else if (ColumnConst * col_const = typeid_cast<ColumnConst *>(elem.column.get()))
{
ColumnPtr & nested = col_const->getDataColumnPtr();
if (!pointers.insert(nested.get()).second)
nested = nested->clone();
}
for (auto & elem : data)
{
callback(elem.column);
elem.column->forEachSubcolumn(callback);
}
}
......
#pragma once
#include <vector>
#include <list>
#include <map>
#include <initializer_list>
#include <Common/Exception.h>
#include <Core/BlockInfo.h>
#include <Core/NamesAndTypes.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/ColumnNumbers.h>
#include <Common/Exception.h>
......@@ -55,8 +53,6 @@ public:
void erase(size_t position);
/// remove the column with the specified name
void erase(const String & name);
/// Adds missing columns to the block with default values
void addDefaults(const NamesAndTypesList & required_columns);
/// References are invalidated after calling functions above.
......@@ -105,13 +101,6 @@ public:
/** Get a block with columns that have been rearranged in the order of their names. */
Block sortColumns() const;
/** Replaces the offset columns within the nested tables by one common for the table.
* Throws an exception if these offsets suddenly turn out to be different.
*/
void optimizeNestedArraysOffsets();
/** The same, only without changing the offsets. */
void checkNestedArraysOffsets() const;
void clear();
void swap(Block & other) noexcept;
......
#include <DataStreams/AddingDefaultBlockOutputStream.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/DataTypeArray.h>
#include <Columns/ColumnArray.h>
#include <Core/Block.h>
namespace DB
{
void AddingDefaultBlockOutputStream::write(const DB::Block & block)
{
Block res = block;
......@@ -15,9 +21,60 @@ void AddingDefaultBlockOutputStream::write(const DB::Block & block)
evaluateMissingDefaults(res, *required_columns, column_defaults, context);
/// Adds not specified default values.
/// @todo this may be moved before `evaluateMissingDefaults` with passing {required_columns - explicitly-defaulted columns}
if (!only_explicit_column_defaults)
/// @todo this line may be moved before `evaluateMissingDefaults` with passing {required_columns - explicitly-defaulted columns}
res.addDefaults(*required_columns);
{
size_t rows = res.rows();
/// For missing columns of nested structure, you need to create not a column of empty arrays, but a column of arrays of correct lengths.
/// First, remember the offset columns for all arrays in the block.
std::map<String, ColumnPtr> offset_columns;
for (size_t i = 0, size = res.columns(); i < size; ++i)
{
const auto & elem = res.getByPosition(i);
if (const ColumnArray * array = typeid_cast<const ColumnArray *>(&*elem.column))
{
String offsets_name = DataTypeNested::extractNestedTableName(elem.name);
auto & offsets_column = offset_columns[offsets_name];
/// If for some reason there are different offset columns for one nested structure, then we take nonempty.
if (!offsets_column || offsets_column->empty())
offsets_column = array->getOffsetsColumn();
}
}
for (const auto & requested_column : *required_columns)
{
if (res.has(requested_column.name))
continue;
ColumnWithTypeAndName column_to_add;
column_to_add.name = requested_column.name;
column_to_add.type = requested_column.type;
String offsets_name = DataTypeNested::extractNestedTableName(column_to_add.name);
if (offset_columns.count(offsets_name))
{
ColumnPtr offsets_column = offset_columns[offsets_name];
DataTypePtr nested_type = typeid_cast<DataTypeArray &>(*column_to_add.type).getNestedType();
UInt64 nested_rows = rows ? get<UInt64>((*offsets_column)[rows - 1]) : 0;
ColumnPtr nested_column = nested_type->createConstColumn(nested_rows, nested_type->getDefault())->convertToFullColumnIfConst();
column_to_add.column = std::make_shared<ColumnArray>(nested_column, offsets_column);
}
else
{
/** It is necessary to turn a constant column into a full column, since in part of blocks (from other parts),
* it can be full (or the interpreter may decide that it is constant everywhere).
*/
column_to_add.column = column_to_add.type->createConstColumn(rows, column_to_add.type->getDefault())->convertToFullColumnIfConst();
}
res.insert(std::move(column_to_add));
}
}
output->write(res);
}
......@@ -36,4 +93,5 @@ void AddingDefaultBlockOutputStream::writeSuffix()
{
output->writeSuffix();
}
}
......@@ -122,8 +122,6 @@ Block BlockInputStreamFromRowInputStream::readImpl()
if (res.rows() == 0)
res.clear();
else
res.optimizeNestedArraysOffsets();
return res;
}
......
......@@ -157,7 +157,9 @@ DataTypePtr getLeastCommonType(const DataTypes & types)
if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
{
have_nullable = true;
nested_types.emplace_back(type_nullable->getNestedType());
if (!type_nullable->isNull())
nested_types.emplace_back(type_nullable->getNestedType());
}
else
nested_types.emplace_back(type);
......
......@@ -667,42 +667,6 @@ bool FunctionArrayElement::executeGeneric(Block & block, const ColumnNumbers & a
return true;
}
bool FunctionArrayElement::executeConstConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index,
ArrayImpl::NullMapBuilder & builder)
{
const ColumnConst * col_array = checkAndGetColumnConst<ColumnArray>(block.getByPosition(arguments[0]).column.get());
if (!col_array)
return false;
Array array = col_array->getValue<Array>();
size_t array_size = array.size();
size_t real_index = 0;
if (index.getType() == Field::Types::UInt64)
real_index = safeGet<UInt64>(index) - 1;
else if (index.getType() == Field::Types::Int64)
real_index = array_size + safeGet<Int64>(index);
else
throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR);
Field value;
if (real_index < array_size)
value = array.at(real_index);
if (value.isNull())
value = block.getByPosition(result).type->getDefault();
block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn(
block.rows(),
value);
if (builder)
builder.update(real_index);
return true;
}
template <typename IndexType>
bool FunctionArrayElement::executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray<IndexType> & indices,
ArrayImpl::NullMapBuilder & builder)
......@@ -978,7 +942,6 @@ void FunctionArrayElement::perform(Block & block, const ColumnNumbers & argument
|| executeNumberConst<Int64> (block, arguments, result, index, builder)
|| executeNumberConst<Float32> (block, arguments, result, index, builder)
|| executeNumberConst<Float64> (block, arguments, result, index, builder)
|| executeConstConst (block, arguments, result, index, builder)
|| executeStringConst (block, arguments, result, index, builder)
|| executeGenericConst (block, arguments, result, index, builder)))
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
......
......@@ -152,9 +152,6 @@ private:
bool executeGeneric(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray<IndexType> & indices,
ArrayImpl::NullMapBuilder & builder);
bool executeConstConst(Block & block, const ColumnNumbers & arguments, size_t result, const Field & index,
ArrayImpl::NullMapBuilder & builder);
template <typename IndexType>
bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result, const PaddedPODArray<IndexType> & indices,
ArrayImpl::NullMapBuilder & builder);
......
......@@ -734,17 +734,9 @@ public:
/// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
bool cond_is_nullable = arguments[0]->isNullable();
bool then_is_nullable = arguments[1]->isNullable();
bool else_is_nullable = arguments[2]->isNullable();
if (cond_is_nullable || then_is_nullable || else_is_nullable)
{
if (arguments[0]->isNullable())
return makeNullableDataTypeIfNot(getReturnTypeImpl({
getNestedDataType(arguments[0]),
getNestedDataType(arguments[1]),
getNestedDataType(arguments[2])}));
}
getNestedDataType(arguments[0]), arguments[1], arguments[2]}));
if (!checkDataType<DataTypeUInt8>(arguments[0].get()))
throw Exception("Illegal type of first argument (condition) of function if. Must be UInt8.",
......
......@@ -1115,15 +1115,15 @@ using FunctionToFloat64OrZero = FunctionConvertOrZero<DataTypeFloat64, NameToFlo
class FunctionCast final : public IFunction
{
public:
FunctionCast(const Context & context) : context(context) {}
private:
using WrapperType = std::function<void(Block &, const ColumnNumbers &, size_t)>;
const Context & context;
WrapperType wrapper_function;
std::function<Monotonicity(const IDataType &, const Field &, const Field &)> monotonicity_for_range;
public:
FunctionCast(const Context & context) : context(context) {}
private:
template <typename DataType>
WrapperType createWrapper(const DataTypePtr & from_type, const DataType * const)
{
......@@ -1410,32 +1410,19 @@ private:
}
/// Actions to be taken when performing a conversion.
struct Action
struct NullableConversion
{
/// If neither the input type nor the output type is nullable or null,
/// we perform the conversion without any pre and/or processing.
static constexpr auto NONE = UInt64(0);
/// The input has a nullable type. We must extract its nested type
/// before performing any conversion.
static constexpr auto UNWRAP_NULLABLE_INPUT = UInt64(1) << 0;
/// The output has a nullable type. We must wrap the result from the
/// conversion into a ColumnNullable.
static constexpr auto WRAP_RESULT_INTO_NULLABLE = UInt64(1) << 1;
/// The input is the NULL value. Before performing any conversion,
/// we will turn it into a single UInt8 zero value.
static constexpr auto CONVERT_NULL = UInt64(1) << 2;
bool source_is_nullable = false;
bool result_is_nullable = false;
};
WrapperType prepare(const DataTypePtr & from_type, const IDataType * to_type, const uint64_t action)
WrapperType prepare(const DataTypePtr & from_type, const IDataType * to_type, const NullableConversion nullable_conversion)
{
auto wrapper = prepareImpl((action & Action::CONVERT_NULL) ?
std::make_shared<DataTypeUInt8>() :
from_type,
to_type);
auto wrapper = prepareImpl(from_type, to_type);
if (action & Action::WRAP_RESULT_INTO_NULLABLE)
if (nullable_conversion.result_is_nullable)
{
return [wrapper, action] (Block & block, const ColumnNumbers & arguments, const size_t result)
return [wrapper, nullable_conversion] (Block & block, const ColumnNumbers & arguments, const size_t result)
{
/// Create a temporary block on which to perform the operation.
auto & res = block.getByPosition(result);
......@@ -1444,17 +1431,8 @@ private:
const auto & nested_type = nullable_type.getNestedType();
Block tmp_block;
if (action & Action::UNWRAP_NULLABLE_INPUT)
if (nullable_conversion.source_is_nullable)
tmp_block = createBlockWithNestedColumns(block, arguments);
else if (action & Action::CONVERT_NULL)
{
/// The input is replaced by a trivial UInt8 column
/// which contains only one row whose value is 0.
tmp_block = block;
auto & elem = tmp_block.getByPosition(arguments[0]);
elem.column = std::make_shared<ColumnUInt8>(block.rows(), 0);
elem.type = std::make_shared<DataTypeUInt8>();
}
else
tmp_block = block;
......@@ -1467,7 +1445,7 @@ private:
/// Wrap the result into a nullable column.
ColumnPtr null_map;
if (action & Action::UNWRAP_NULLABLE_INPUT)
if (nullable_conversion.source_is_nullable)
{
/// This is a conversion from a nullable to a nullable type.
/// So we just keep the null map of the input argument.
......@@ -1475,11 +1453,6 @@ private:
const auto & nullable_col = static_cast<const ColumnNullable &>(*col);
null_map = nullable_col.getNullMapColumn();
}
else if (action & Action::CONVERT_NULL)
{
/// A NULL value has been converted to a nullable type.
null_map = std::make_shared<ColumnUInt8>(block.rows(), 1);
}
else
{
/// This is a conversion from an ordinary type to a nullable type.
......@@ -1540,8 +1513,7 @@ private:
/// but it is disabled because deserializing aggregate functions state might be unsafe.
throw Exception{
"Conversion from " + from_type->getName() + " to " + to_type->getName() +
" is not supported",
"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
ErrorCodes::CANNOT_CONVERT_TYPE};
}
......@@ -1610,37 +1582,35 @@ public:
out_return_type = DataTypeFactory::instance().get(type_col->getValue<String>());
/// Determine whether pre-processing and/or post-processing must take
/// place during conversion.
uint64_t action = Action::NONE;
const auto & from_type = arguments.front().type;
if (from_type->isNullable())
action |= Action::UNWRAP_NULLABLE_INPUT;
else if (from_type->isNull() && !out_return_type->isNull())
action |= Action::CONVERT_NULL;
/// Determine whether pre-processing and/or post-processing must take place during conversion.
const DataTypePtr & from_type = arguments.front().type;
if (out_return_type->isNullable())
action |= Action::WRAP_RESULT_INTO_NULLABLE;
NullableConversion nullable_conversion;
nullable_conversion.source_is_nullable = from_type->isNullable();
nullable_conversion.result_is_nullable = out_return_type->isNullable();
/// Check that the requested conversion is allowed.
if (!(action & Action::WRAP_RESULT_INTO_NULLABLE))
if (nullable_conversion.source_is_nullable && !nullable_conversion.result_is_nullable)
throw Exception{"Cannot convert data from a nullable type to a non-nullable type",
ErrorCodes::CANNOT_CONVERT_TYPE};
if (from_type->isNull())
{
if (action & Action::CONVERT_NULL)
throw Exception{"Cannot convert NULL into a non-nullable type",
ErrorCodes::CANNOT_CONVERT_TYPE};
else if (action & Action::UNWRAP_NULLABLE_INPUT)
throw Exception{"Cannot convert data from a nullable type to a non-nullable type",
ErrorCodes::CANNOT_CONVERT_TYPE};
wrapper_function = [](Block & block, const ColumnNumbers &, const size_t result)
{
auto & res = block.getByPosition(result);
res.column = res.type->createConstColumn(block.rows(), Null())->convertToFullColumnIfConst();
};
return;
}
DataTypePtr from_inner_type;
const IDataType * to_inner_type;
/// Create the requested conversion.
if (action & Action::WRAP_RESULT_INTO_NULLABLE)
if (nullable_conversion.result_is_nullable)
{
if (action & Action::UNWRAP_NULLABLE_INPUT)
if (nullable_conversion.source_is_nullable)
{
const auto & nullable_type = static_cast<const DataTypeNullable &>(*from_type);
from_inner_type = nullable_type.getNestedType();
......@@ -1657,7 +1627,7 @@ public:
to_inner_type = out_return_type.get();
}
wrapper_function = prepare(from_inner_type, to_inner_type, action);
wrapper_function = prepare(from_inner_type, to_inner_type, nullable_conversion);
prepareMonotonicityInformation(from_inner_type, to_inner_type);
}
......
......@@ -2,9 +2,9 @@
#include <Common/LRUCache.h>
#include <Common/SipHash.h>
#include <Common/UInt128.h>
#include <Common/ProfileEvents.h>
#include <IO/BufferWithOwnMemory.h>
#include <Interpreters/AggregationCommon.h>
namespace ProfileEvents
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册