提交 c4687b5f 编写于 作者: A Alexey Milovidov

Unification of Nested: development [#CLICKHOUSE-2].

上级 005f6b96
......@@ -250,7 +250,6 @@ namespace ErrorCodes
extern const int TABLE_IS_READ_ONLY = 242;
extern const int NOT_ENOUGH_SPACE = 243;
extern const int UNEXPECTED_ZOOKEEPER_ERROR = 244;
extern const int INVALID_NESTED_NAME = 245;
extern const int CORRUPTED_DATA = 246;
extern const int INCORRECT_MARK = 247;
extern const int INVALID_PARTITION_VALUE = 248;
......
......@@ -89,9 +89,16 @@ Names NamesAndTypesList::getNames() const
Names res;
res.reserve(size());
for (const NameAndTypePair & column : *this)
{
res.push_back(column.name);
}
return res;
}
DataTypes NamesAndTypesList::getTypes() const
{
DataTypes res;
res.reserve(size());
for (const NameAndTypePair & column : *this)
res.push_back(column.type);
return res;
}
......
......@@ -59,6 +59,7 @@ public:
size_t sizeOfDifference(const NamesAndTypesList & rhs) const;
Names getNames() const;
DataTypes getTypes() const;
/// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns.
NamesAndTypesList filter(const NameSet & names) const;
......
#include <string.h>
#include <Common/typeid_cast.h>
#include <Common/StringUtils.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
......@@ -13,11 +14,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int INVALID_NESTED_NAME;
}
namespace Nested
{
......@@ -27,23 +23,48 @@ std::string concatenateName(const std::string & nested_table_name, const std::st
}
std::string extractTableName(const std::string & nested_name)
/** Name can be treated as compound if and only if both parts are simple identifiers.
*/
std::pair<std::string, std::string> splitName(const std::string & name)
{
const char * first_pos = strchr(nested_name.data(), '.');
const char * last_pos = strrchr(nested_name.data(), '.');
if (first_pos != last_pos)
throw Exception("Invalid nested column name: " + nested_name, ErrorCodes::INVALID_NESTED_NAME);
return first_pos == nullptr ? nested_name : nested_name.substr(0, first_pos - nested_name.data());
const char * begin = name.data();
const char * pos = begin;
const char * end = begin + name.size();
if (pos >= end || !isValidIdentifierBegin(*pos))
return {name, {}};
++pos;
while (pos < end && isWordCharASCII(*pos))
++pos;
if (pos >= end || *pos != '.')
return {name, {}};
const char * first_end = pos;
++pos;
const char * second_begin = pos;
if (pos >= end || !isValidIdentifierBegin(*pos))
return {name, {}};
++pos;
while (pos < end && isWordCharASCII(*pos))
++pos;
if (pos != end)
return {name, {}};
return {{ begin, first_end }, { second_begin, end }};
}
std::string extractElementName(const std::string & nested_name)
std::string extractTableName(const std::string & nested_name)
{
const char * first_pos = strchr(nested_name.data(), '.');
const char * last_pos = strrchr(nested_name.data(), '.');
if (first_pos != last_pos)
throw Exception("Invalid nested column name: " + nested_name, ErrorCodes::INVALID_NESTED_NAME);
return last_pos == nullptr ? nested_name : nested_name.substr(last_pos - nested_name.data() + 1);
auto splitted = splitName(nested_name);
return splitted.first;
}
......@@ -79,7 +100,31 @@ NamesAndTypesList flatten(const NamesAndTypesList & names_and_types)
NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
{
return names_and_types; // TODO
NamesAndTypesList res;
std::map<std::string, NamesAndTypesList> nested;
for (const auto & name_type : names_and_types)
{
bool collected = false;
if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get()))
{
auto splitted = splitName(name_type.name);
if (!splitted.second.empty())
{
nested[splitted.first].emplace_back(splitted.second, type_arr->getNestedType());
collected = true;
}
}
if (!collected)
res.push_back(name_type);
}
for (const auto & name_elems : nested)
res.emplace_back(name_elems.first, std::make_shared<DataTypeArray>(
std::make_shared<DataTypeTuple>(name_elems.second.getTypes(), name_elems.second.getNames())));
return res;
}
}
......
......@@ -10,10 +10,10 @@ namespace Nested
{
std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name);
std::pair<std::string, std::string> splitName(const std::string & name);
/// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot.
std::string extractTableName(const std::string & nested_name);
/// Returns the name suffix after the first dot on the right '.'. Or the name is unchanged if there is no dot.
std::string extractElementName(const std::string & nested_name);
/// Replace Array(Tuple(...)) columns to a multiple of Array columns in a form of `column_name.element_name`.
NamesAndTypesList flatten(const NamesAndTypesList & names_and_types);
......
......@@ -1912,11 +1912,10 @@ void ExpressionAnalyzer::getArrayJoinedColumns()
bool found = false;
for (const auto & column_name_type : columns)
{
String table_name = Nested::extractTableName(column_name_type.name);
String column_name = Nested::extractElementName(column_name_type.name);
if (table_name == source_name)
auto splitted = Nested::splitName(column_name_type.name);
if (splitted.first == source_name && !splitted.second.empty())
{
array_join_result_to_source[Nested::concatenateName(result_name, column_name)] = column_name_type.name;
array_join_result_to_source[Nested::concatenateName(result_name, splitted.second)] = column_name_type.name;
found = true;
break;
}
......@@ -1939,38 +1938,33 @@ void ExpressionAnalyzer::getArrayJoinedColumnsImpl(const ASTPtr & ast)
{
if (node->kind == ASTIdentifier::Column)
{
String table_name = Nested::extractTableName(node->name);
auto splitted = Nested::splitName(node->name); /// ParsedParams, Key1
if (array_join_alias_to_name.count(node->name))
{
/// ARRAY JOIN was written with an array column. Example: SELECT K1 FROM ... ARRAY JOIN ParsedParams.Key1 AS K1
array_join_result_to_source[node->name] = array_join_alias_to_name[node->name]; /// K1 -> ParsedParams.Key1
}
else if (array_join_alias_to_name.count(table_name))
else if (array_join_alias_to_name.count(splitted.first) && !splitted.second.empty())
{
/// ARRAY JOIN was written with a nested table. Example: SELECT PP.KEY1 FROM ... ARRAY JOIN ParsedParams AS PP
String nested_column = Nested::extractElementName(node->name); /// Key1
array_join_result_to_source[node->name] /// PP.Key1 -> ParsedParams.Key1
= Nested::concatenateName(array_join_alias_to_name[table_name], nested_column);
= Nested::concatenateName(array_join_alias_to_name[splitted.first], splitted.second);
}
else if (array_join_name_to_alias.count(node->name))
{
/** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams.Key1 AS PP.Key1.
* That is, the query uses the original array, replicated by itself.
*/
String nested_column = Nested::extractElementName(node->name); /// Key1
array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1
array_join_name_to_alias[node->name]] = node->name;
}
else if (array_join_name_to_alias.count(table_name))
else if (array_join_name_to_alias.count(splitted.first) && !splitted.second.empty())
{
/** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams AS PP.
*/
String nested_column = Nested::extractElementName(node->name); /// Key1
array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1
Nested::concatenateName(array_join_name_to_alias[table_name], nested_column)] = node->name;
Nested::concatenateName(array_join_name_to_alias[splitted.first], splitted.second)] = node->name;
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册