Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
oceanbase
oceanbase
提交
e6ebfb49
O
oceanbase
项目概览
oceanbase
/
oceanbase
2 年多 前同步成功
通知
262
Star
6084
Fork
1301
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
oceanbase
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
e6ebfb49
编写于
11月 11, 2025
作者:
D
doubleMocha
提交者:
ob-robot
11月 11, 2025
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support COLUMN_NAME_CASE_SENSITIVE
上级
a5334347
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
234 addition
and
51 deletion
+234
-51
src/objit/include/objit/common/ob_item_type.h
src/objit/include/objit/common/ob_item_type.h
+2
-0
src/share/ob_compatibility_control_feature_def.h
src/share/ob_compatibility_control_feature_def.h
+4
-0
src/share/schema/ob_schema_printer_external_table.cpp
src/share/schema/ob_schema_printer_external_table.cpp
+15
-0
src/sql/engine/cmd/ob_load_data_parser.cpp
src/sql/engine/cmd/ob_load_data_parser.cpp
+28
-0
src/sql/engine/cmd/ob_load_data_parser.h
src/sql/engine/cmd/ob_load_data_parser.h
+21
-8
src/sql/engine/table/ob_orc_table_row_iter.cpp
src/sql/engine/table/ob_orc_table_row_iter.cpp
+100
-25
src/sql/engine/table/ob_orc_table_row_iter.h
src/sql/engine/table/ob_orc_table_row_iter.h
+7
-3
src/sql/engine/table/ob_parquet_table_row_iter.cpp
src/sql/engine/table/ob_parquet_table_row_iter.cpp
+27
-13
src/sql/engine/table/ob_parquet_table_row_iter.h
src/sql/engine/table/ob_parquet_table_row_iter.h
+3
-1
src/sql/parser/non_reserved_keywords_mysql_mode.c
src/sql/parser/non_reserved_keywords_mysql_mode.c
+1
-0
src/sql/parser/sql_parser_mysql_mode.y
src/sql/parser/sql_parser_mysql_mode.y
+6
-1
src/sql/resolver/ddl/ob_create_table_resolver.cpp
src/sql/resolver/ddl/ob_create_table_resolver.cpp
+2
-0
src/sql/resolver/ddl/ob_ddl_resolver.cpp
src/sql/resolver/ddl/ob_ddl_resolver.cpp
+3
-0
src/sql/resolver/ddl/ob_ddl_resolver.h
src/sql/resolver/ddl/ob_ddl_resolver.h
+1
-0
src/sql/resolver/ob_resolver_utils.cpp
src/sql/resolver/ob_resolver_utils.cpp
+14
-0
未找到文件。
src/objit/include/objit/common/ob_item_type.h
浏览文件 @
e6ebfb49
...
...
@@ -2967,6 +2967,8 @@ typedef enum ObItemType
T_HTTP_TIMEOUT
=
4931
,
T_HTTP_KEEPLIVE_TIME
=
4932
,
T_RANDOM_PARTITION
=
4933
,
// random distribution
T_COLUMN_NAME_CASE_SENSITIVE
=
4934
,
T_MAX
//Attention: add a new type before T_MAX
}
ObItemType
;
...
...
src/share/ob_compatibility_control_feature_def.h
浏览文件 @
e6ebfb49
...
...
@@ -66,4 +66,8 @@ DEF_COMPAT_CONTROL_FEATURE(IMPLICIT_FIRST_CENTURY_YEAR,
DEF_COMPAT_CONTROL_FEATURE
(
PLSQL_CAN_TRANSFORM_SQL_TO_ASSIGN
,
"Whether can transform the PL/SQL select into from dual statement to an assignment statement"
,
MOCK_CLUSTER_VERSION_4_4_2_0
)
DEF_COMPAT_CONTROL_FEATURE
(
EXTERNAL_COLUMN_NAME_CASE_INSENSITIVE
,
"Whether column name mapping in Parquet/ORC file is case insensitive"
,
CLUSTER_VERSION_4_5_0_0
)
#endif
src/share/schema/ob_schema_printer_external_table.cpp
浏览文件 @
e6ebfb49
...
...
@@ -283,6 +283,13 @@ int ObSchemaPrinter::print_external_table_file_info(const ObTableSchema &table_s
OB_FAIL
(
databuff_printf
(
buf
,
buf_len
,
pos
,
"
\n
COLUMN_INDEX_TYPE = '%.*s',"
,
static_cast
<
int
>
(
STRLEN
(
column_index_type
)),
column_index_type
)))
{
SHARE_SCHEMA_LOG
(
WARN
,
"fail to print column index type"
,
K
(
ret
));
}
else
if
(
orc
.
column_name_case_sensitive_
&&
OB_FAIL
(
databuff_printf
(
buf
,
buf_len
,
pos
,
"
\n
COLUMN_NAME_CASE_SENSITIVE = %s,"
,
orc
.
column_name_case_sensitive_
?
"TRUE"
:
"FALSE"
)))
{
SHARE_SCHEMA_LOG
(
WARN
,
"fail to print column name case sensitive"
,
K
(
ret
));
}
}
else
if
(
OB_SUCC
(
ret
)
&&
ObExternalFileFormat
::
PARQUET_FORMAT
==
format
.
format_type_
)
{
const
ObParquetGeneralFormat
&
parquet
=
format
.
parquet_format_
;
...
...
@@ -291,6 +298,14 @@ int ObSchemaPrinter::print_external_table_file_info(const ObTableSchema &table_s
OB_FAIL
(
databuff_printf
(
buf
,
buf_len
,
pos
,
"
\n
COLUMN_INDEX_TYPE = '%.*s',"
,
static_cast
<
int
>
(
STRLEN
(
column_index_type
)),
column_index_type
)))
{
SHARE_SCHEMA_LOG
(
WARN
,
"fail to print column index type"
,
K
(
ret
));
}
else
if
(
parquet
.
column_name_case_sensitive_
&&
OB_FAIL
(
databuff_printf
(
buf
,
buf_len
,
pos
,
"
\n
COLUMN_NAME_CASE_SENSITIVE = %s,"
,
parquet
.
column_name_case_sensitive_
?
"TRUE"
:
"FALSE"
)))
{
SHARE_SCHEMA_LOG
(
WARN
,
"fail to print column name case sensitive"
,
K
(
ret
));
}
}
if
(
OB_SUCC
(
ret
))
{
...
...
src/sql/engine/cmd/ob_load_data_parser.cpp
浏览文件 @
e6ebfb49
...
...
@@ -765,6 +765,12 @@ int ObParquetGeneralFormat::to_json_kv_string(char *buf, const int64_t buf_len,
OZ
(
databuff_printf
(
buf
,
buf_len
,
pos
,
"
\"
%s
\"
:
\"
%s
\"
"
,
OPTION_NAMES
[
idx
++
],
column_index_type_to_string
(
column_index_type_
)));
}
if
(
GET_MIN_CLUSTER_VERSION
()
>=
CLUSTER_VERSION_4_5_0_0
)
{
OZ
(
J_COMMA
());
OZ
(
databuff_printf
(
buf
,
buf_len
,
pos
,
R"("%s":%s)"
,
OPTION_NAMES
[
idx
++
],
STR_BOOL
(
column_name_case_sensitive_
)));
}
return
ret
;
}
...
...
@@ -790,6 +796,14 @@ int ObParquetGeneralFormat::load_from_json_data(json::Pair *&node, common::ObIAl
node
=
node
->
get_next
();
}
}
if
(
OB_NOT_NULL
(
node
)
&&
0
==
node
->
name_
.
case_compare
(
OPTION_NAMES
[
idx
++
]))
{
if
(
json
::
JT_TRUE
==
node
->
value_
->
get_type
())
{
column_name_case_sensitive_
=
true
;
}
else
{
column_name_case_sensitive_
=
false
;
}
node
=
node
->
get_next
();
}
return
ret
;
}
...
...
@@ -822,6 +836,12 @@ int ObOrcGeneralFormat::to_json_kv_string(char *buf, const int64_t buf_len, int6
OZ
(
databuff_printf
(
buf
,
buf_len
,
pos
,
"
\"
%s
\"
:
\"
%s
\"
"
,
OPTION_NAMES
[
idx
++
],
column_index_type_to_string
(
column_index_type_
)));
}
if
(
GET_MIN_CLUSTER_VERSION
()
>=
CLUSTER_VERSION_4_5_0_0
)
{
OZ
(
J_COMMA
());
OZ
(
databuff_printf
(
buf
,
buf_len
,
pos
,
R"("%s":%s)"
,
OPTION_NAMES
[
idx
++
],
STR_BOOL
(
column_name_case_sensitive_
)));
}
return
ret
;
}
...
...
@@ -879,6 +899,14 @@ int ObOrcGeneralFormat::load_from_json_data(json::Pair *&node, common::ObIAlloca
node
=
node
->
get_next
();
}
}
if
(
OB_NOT_NULL
(
node
)
&&
0
==
node
->
name_
.
case_compare
(
OPTION_NAMES
[
idx
++
]))
{
if
(
json
::
JT_TRUE
==
node
->
value_
->
get_type
())
{
column_name_case_sensitive_
=
true
;
}
else
{
column_name_case_sensitive_
=
false
;
}
node
=
node
->
get_next
();
}
return
ret
;
}
...
...
src/sql/engine/cmd/ob_load_data_parser.h
浏览文件 @
e6ebfb49
...
...
@@ -233,12 +233,14 @@ struct ObParquetGeneralFormat {
ObParquetGeneralFormat
()
:
row_group_size_
(
256LL
*
1024
*
1024
),
/* default 256 MB */
compress_type_index_
(
0
),
/* default UNCOMPRESSED */
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
)
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
),
column_name_case_sensitive_
(
false
)
{}
static
constexpr
const
char
*
OPTION_NAMES
[]
=
{
"ROW_GROUP_SIZE"
,
"COMPRESSION"
,
"COLUMN_INDEX_TYPE"
"COLUMN_INDEX_TYPE"
,
"COLUMN_NAME_CASE_SENSITIVE"
};
static
constexpr
const
char
*
COMPRESSION_ALGORITHMS
[]
=
{
"UNCOMPRESSED"
,
...
...
@@ -257,9 +259,13 @@ struct ObParquetGeneralFormat {
int64_t
row_group_size_
;
int64_t
compress_type_index_
;
sql
::
ColumnIndexType
column_index_type_
;
bool
column_name_case_sensitive_
;
int
to_json_kv_string
(
char
*
buf
,
const
int64_t
buf_len
,
int64_t
&
pos
)
const
;
int
load_from_json_data
(
json
::
Pair
*&
node
,
common
::
ObIAllocator
&
allocator
);
TO_STRING_KV
(
K_
(
row_group_size
),
K_
(
compress_type_index
),
K_
(
column_index_type
));
TO_STRING_KV
(
K_
(
row_group_size
),
K_
(
compress_type_index
),
K_
(
column_index_type
),
K_
(
column_name_case_sensitive
));
OB_UNIS_VERSION
(
1
);
};
...
...
@@ -270,7 +276,8 @@ struct ObOrcGeneralFormat {
compression_block_size_
(
256LL
*
1024
),
/* default 256 KB */
row_index_stride_
(
10000
),
column_use_bloom_filter_
(),
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
)
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
),
column_name_case_sensitive_
(
false
)
{}
static
constexpr
const
char
*
OPTION_NAMES
[]
=
{
"STRIPE_SIZE"
,
...
...
@@ -278,7 +285,8 @@ struct ObOrcGeneralFormat {
"COMPRESSION_BLOCK_SIZE"
,
"ROW_INDEX_STRIDE"
,
"COLUMN_USE_BLOOM_FILTER"
,
"COLUMN_INDEX_TYPE"
"COLUMN_INDEX_TYPE"
,
"COLUMN_NAME_CASE_SENSITIVE"
};
static
constexpr
const
char
*
COMPRESSION_ALGORITHMS
[]
=
{
"UNCOMPRESSED"
,
...
...
@@ -296,11 +304,16 @@ struct ObOrcGeneralFormat {
int64_t
row_index_stride_
;
common
::
ObArrayWrap
<
int64_t
>
column_use_bloom_filter_
;
sql
::
ColumnIndexType
column_index_type_
;
bool
column_name_case_sensitive_
;
int
to_json_kv_string
(
char
*
buf
,
const
int64_t
buf_len
,
int64_t
&
pos
)
const
;
int
load_from_json_data
(
json
::
Pair
*&
node
,
common
::
ObIAllocator
&
allocator
);
TO_STRING_KV
(
K
(
stripe_size_
),
K
(
compress_type_index_
),
K
(
compression_block_size_
),
K
(
row_index_stride_
),
K
(
column_use_bloom_filter_
),
K
(
column_index_type_
));
TO_STRING_KV
(
K
(
stripe_size_
),
K
(
compress_type_index_
),
K
(
compression_block_size_
),
K
(
row_index_stride_
),
K
(
column_use_bloom_filter_
),
K
(
column_index_type_
),
K
(
column_name_case_sensitive_
));
OB_UNIS_VERSION
(
1
);
};
...
...
src/sql/engine/table/ob_orc_table_row_iter.cpp
浏览文件 @
e6ebfb49
...
...
@@ -52,7 +52,14 @@ int ObOrcTableRowIterator::build_type_name_id_map(const orc::Type* type, ObIArra
OZ
(
col_names
.
push_back
(
field_name
));
ObString
path
;
OZ
(
to_dot_column_path
(
col_names
,
path
));
OZ
(
name_to_id_
.
set_refactored
(
path
,
type
->
getSubtype
(
i
)
->
getColumnId
(),
1
/*overwrite*/
));
if
(
!
is_col_name_case_sensitive_
)
{
ObString
capitalize_str
;
OZ
(
ob_simple_low_to_up
(
allocator_
,
path
,
capitalize_str
));
OZ
(
name_to_id_
.
set_refactored
(
capitalize_str
,
type
->
getSubtype
(
i
)
->
getColumnId
(),
1
/*overwrite*/
));
}
else
{
OZ
(
name_to_id_
.
set_refactored
(
path
,
type
->
getSubtype
(
i
)
->
getColumnId
(),
1
/*overwrite*/
));
}
OZ
(
build_type_name_id_map
(
type
->
getSubtype
(
i
),
col_names
));
if
(
OB_FAIL
(
ret
))
{
}
else
if
(
col_names
.
count
()
>
0
)
{
...
...
@@ -77,7 +84,13 @@ int ObOrcTableRowIterator::compute_column_id_by_index_type(int64_t index, int64_
ObDataAccessPathExtraInfo
*
data_access_info
=
static_cast
<
ObDataAccessPathExtraInfo
*>
(
file_column_exprs_
.
at
(
index
)
->
extra_info_
);
col_name
=
data_access_info
->
data_access_path_
;
OZ
(
name_to_id_
.
get_refactored
(
col_name
,
orc_col_id
));
if
(
!
is_col_name_case_sensitive_
)
{
ObString
capitalize_str
;
OZ
(
ob_simple_low_to_up
(
allocator_
,
col_name
,
capitalize_str
));
OZ
(
name_to_id_
.
get_refactored
(
capitalize_str
,
orc_col_id
));
}
else
{
OZ
(
name_to_id_
.
get_refactored
(
col_name
,
orc_col_id
));
}
break
;
}
case
sql
::
ColumnIndexType
::
POSITION
:
...
...
@@ -332,6 +345,25 @@ int ObOrcTableRowIterator::init(const storage::ObTableScanParam *scan_param)
OZ
(
init_query_flag
());
}
}
bool
insensitive_feature_enabled
=
false
;
uint64_t
compat_version
=
0
;
OZ
(
scan_param
->
op_
->
get_eval_ctx
().
exec_ctx_
.
get_my_session
()
->
get_compatibility_version
(
compat_version
));
OZ
(
ObCompatControl
::
check_feature_enable
(
compat_version
,
ObCompatFeatureType
::
EXTERNAL_COLUMN_NAME_CASE_INSENSITIVE
,
insensitive_feature_enabled
));
if
(
OB_SUCC
(
ret
))
{
if
(
insensitive_feature_enabled
)
{
is_col_name_case_sensitive_
=
scan_param
->
external_file_format_
.
orc_format_
.
column_name_case_sensitive_
;
}
else
{
is_col_name_case_sensitive_
=
true
;
}
}
return
ret
;
}
...
...
@@ -899,12 +931,52 @@ bool ObOrcTableRowIterator::is_contain_attribute_key(const orc::Type *type)
return
contains_id
;
}
int
ObOrcTableRowIterator
::
find_column_type_id_by_name
(
const
orc
::
Type
*
type
,
const
ObString
&
col_name
,
ObIArray
<
ObString
>
&
col_names
,
uint64_t
&
type_id
)
{
int
ret
=
OB_SUCCESS
;
CK
(
type
!=
nullptr
);
if
(
OB_SUCC
(
ret
)
&&
orc
::
TypeKind
::
STRUCT
==
type
->
getKind
())
{
for
(
size_t
i
=
0
;
OB_SUCC
(
ret
)
&&
i
<
type
->
getSubtypeCount
()
&&
type_id
==
0
;
++
i
)
{
const
std
::
string
&
cpp_field_name
=
type
->
getFieldName
(
i
);
ObString
field_name
;
OZ
(
ob_write_string
(
allocator_
,
ObString
(
cpp_field_name
.
c_str
()),
field_name
));
OZ
(
col_names
.
push_back
(
field_name
));
ObString
path
;
OZ
(
to_dot_column_path
(
col_names
,
path
));
// Compare with input column name (case-insensitive if needed)
bool
is_match
=
!
is_col_name_case_sensitive_
?
(
col_name
.
case_compare
(
path
)
==
0
)
:
(
col_name
.
compare
(
path
)
==
0
);
if
(
is_match
)
{
type_id
=
type
->
getSubtype
(
i
)
->
getColumnId
();
}
else
{
OZ
(
find_column_type_id_by_name
(
type
->
getSubtype
(
i
),
col_name
,
col_names
,
type_id
));
}
if
(
OB_FAIL
(
ret
))
{
}
else
if
(
col_names
.
count
()
>
0
)
{
col_names
.
pop_back
();
}
}
}
else
{
// For non-struct types, recursively search in subtypes
for
(
size_t
j
=
0
;
OB_SUCC
(
ret
)
&&
j
<
type
->
getSubtypeCount
()
&&
type_id
==
0
;
++
j
)
{
OZ
(
find_column_type_id_by_name
(
type
->
getSubtype
(
j
),
col_name
,
col_names
,
type_id
));
}
}
return
ret
;
}
int
ObOrcTableRowIterator
::
create_row_readers
()
{
int
ret
=
OB_SUCCESS
;
std
::
list
<
std
::
string
>
project_column_names
;
std
::
list
<
uint64_t
>
project_column_ids
;
std
::
list
<
std
::
string
>
eager_column_names
;
std
::
list
<
uint64_t
>
eager_column_ids
;
column_index_type_
=
scan_param_
->
external_file_format_
.
orc_format_
.
column_index_type_
;
...
...
@@ -922,21 +994,35 @@ int ObOrcTableRowIterator::create_row_readers()
if
(
OB_SUCC
(
ret
))
{
switch
(
column_index_type_
)
{
case
sql
::
ColumnIndexType
::
NAME
:
{
orc
::
RowReaderOptions
rowReaderOptions
;
all_row_reader_
=
reader_
->
createRowReader
(
rowReaderOptions
);
for
(
int64_t
i
=
0
;
OB_SUCC
(
ret
)
&&
i
<
file_column_exprs_
.
count
();
i
++
)
{
ObDataAccessPathExtraInfo
*
data_access_info
=
static_cast
<
ObDataAccessPathExtraInfo
*>
(
file_column_exprs_
.
at
(
i
)
->
extra_info_
);
ObDataAccessPathExtraInfo
*
data_access_info
=
static_cast
<
ObDataAccessPathExtraInfo
*>
(
file_column_exprs_
.
at
(
i
)
->
extra_info_
);
uint64_t
orc_col_id
=
0
;
ObArray
<
ObString
>
col_names
;
OZ
(
find_column_type_id_by_name
(
&
all_row_reader_
->
getSelectedType
(),
data_access_info
->
data_access_path_
,
col_names
,
orc_col_id
));
if
(
OB_FAIL
(
ret
))
{
}
else
if
(
orc_col_id
==
0
)
{
ret
=
OB_INVALID_EXTERNAL_FILE_COLUMN_PATH
;
LOG_USER_ERROR
(
OB_INVALID_EXTERNAL_FILE_COLUMN_PATH
,
data_access_info
->
data_access_path_
.
length
(),
data_access_info
->
data_access_path_
.
ptr
());
}
if
(
OB_SUCC
(
ret
))
{
std
::
string
col_name
(
data_access_info
->
data_access_path_
.
ptr
(),
data_access_info
->
data_access_path_
.
length
());
bool
is_project_column
=
true
;
if
(
is_eager_column_
.
count
()
>
0
&&
is_eager_column_
.
at
(
i
))
{
eager_column_
names
.
push_front
(
col_name
);
eager_column_
ids
.
push_back
(
orc_col_id
);
if
(
!
is_dup_project_
.
at
(
i
))
{
is_project_column
=
false
;
}
}
if
(
is_project_column
)
{
project_column_
names
.
push_front
(
col_name
);
project_column_
ids
.
push_front
(
orc_col_id
);
}
}
}
...
...
@@ -945,7 +1031,7 @@ int ObOrcTableRowIterator::create_row_readers()
case
sql
::
ColumnIndexType
::
POSITION
:
{
for
(
uint64_t
i
=
0
;
OB_SUCC
(
ret
)
&&
i
<
file_column_exprs_
.
count
();
i
++
)
{
bool
is_project_column
=
true
;
int64_t
column_id
=
file_column_exprs_
.
at
(
i
)
->
extra_
-
1
;
int64_t
column_id
=
file_column_exprs_
.
at
(
i
)
->
extra_
;
if
(
is_eager_column_
.
count
()
>
0
&&
is_eager_column_
.
at
(
i
))
{
eager_column_ids
.
push_back
(
column_id
);
if
(
!
is_dup_project_
.
at
(
i
))
{
...
...
@@ -1007,15 +1093,11 @@ int ObOrcTableRowIterator::create_row_readers()
}
else
{
project_reader_
.
init
(
capacity
,
project_column_ids
,
reader_
.
get
());
}
}
else
if
(
project_column_names
.
size
()
>
0
)
{
project_reader_
.
init
(
capacity
,
project_column_names
,
reader_
.
get
());
}
else
{
project_reader_
.
row_id_
=
0
;
}
if
(
sector_reader_
!=
nullptr
)
{
if
(
eager_column_names
.
size
()
>
0
)
{
sector_reader_
->
get_eager_reader
().
init
(
capacity
,
eager_column_names
,
reader_
.
get
());
}
else
if
(
eager_column_ids
.
size
()
>
0
)
{
if
(
eager_column_ids
.
size
()
>
0
)
{
if
(
is_hive_lake_table
())
{
sector_reader_
->
get_eager_reader
().
init_for_hive_table
(
capacity
,
eager_column_ids
,
reader_
.
get
());
}
else
{
...
...
@@ -2908,19 +2990,12 @@ int ObOrcTableRowIterator::DataLoader::load_double(ObEvalCtx &eval_ctx)
return
ret
;
}
template
<
typename
T
>
void
ObOrcTableRowIterator
::
OrcRowReader
::
init
(
int64_t
capacity
,
const
std
::
list
<
T
>&
include_columns
,
const
std
::
list
<
uint64_t
>&
include_columns
,
orc
::
Reader
*
reader
)
{
orc
::
RowReaderOptions
rowReaderOptions
;
if
constexpr
(
std
::
is_same_v
<
T
,
uint64_t
>
)
{
// 如果是uint64_t,使用includeTypes(传入的是type ID)
rowReaderOptions
.
includeTypes
(
include_columns
);
}
else
{
// 如果是string,使用include(传入的是字段名)
rowReaderOptions
.
include
(
include_columns
);
}
rowReaderOptions
.
includeTypes
(
include_columns
);
row_reader_
=
reader
->
createRowReader
(
rowReaderOptions
);
// create orc read batch for reuse.
orc_batch_
=
row_reader_
->
createRowBatch
(
capacity
);
...
...
src/sql/engine/table/ob_orc_table_row_iter.h
浏览文件 @
e6ebfb49
...
...
@@ -246,7 +246,7 @@ namespace sql {
ObOrcTableRowIterator
()
:
query_flag_
(
0
),
inner_sector_reader_
(
nullptr
),
sector_reader_
(
nullptr
),
bit_vector_cache_
(
NULL
),
options_
(),
file_prebuffer_
(
data_access_driver_
),
reader_metrics_
(),
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
)
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
)
,
is_col_name_case_sensitive_
(
false
)
{}
virtual
~
ObOrcTableRowIterator
()
{
...
...
@@ -330,8 +330,7 @@ private:
data_loaders_
.
reset
();
}
template
<
typename
T
>
void
init
(
int64_t
capacity
,
const
std
::
list
<
T
>&
include_columns
,
orc
::
Reader
*
reader
);
void
init
(
int64_t
capacity
,
const
std
::
list
<
uint64_t
>&
include_columns
,
orc
::
Reader
*
reader
);
void
init_for_hive_table
(
int64_t
capacity
,
const
std
::
list
<
uint64_t
>&
include_columns
,
orc
::
Reader
*
reader
);
...
...
@@ -436,6 +435,10 @@ private:
OrcRowReader
&
reader
,
ObColumnDefaultValue
*
default_value
);
int
compute_column_id_by_index_type
(
int64_t
index
,
int64_t
&
orc_col_id
);
int
to_dot_column_path
(
ObIArray
<
ObString
>
&
col_names
,
ObString
&
path
);
int
find_column_type_id_by_name
(
const
orc
::
Type
*
type
,
const
ObString
&
col_name
,
ObIArray
<
ObString
>
&
col_names
,
uint64_t
&
type_id
);
int
get_data_column_batch
(
const
orc
::
Type
*
type
,
const
orc
::
StructVectorBatch
*
root_batch
,
const
int
col_id
,
orc
::
ColumnVectorBatch
*&
batch
);
ObExternalTableAccessOptions
&
make_external_table_access_options
(
stmt
::
StmtType
stmt_type
);
...
...
@@ -518,6 +521,7 @@ private:
common
::
ObArrayWrap
<
ObFilePreBuffer
::
ColumnRangeSlices
*>
column_range_slices_
;
ObLakeTableReaderMetrics
reader_metrics_
;
sql
::
ColumnIndexType
column_index_type_
;
bool
is_col_name_case_sensitive_
;
};
}
...
...
src/sql/engine/table/ob_parquet_table_row_iter.cpp
浏览文件 @
e6ebfb49
...
...
@@ -162,6 +162,24 @@ int ObParquetTableRowIterator::init(const storage::ObTableScanParam *scan_param)
mem_attr_
));
}
bool
insensitive_feature_enabled
=
false
;
uint64_t
compat_version
=
0
;
OZ
(
scan_param
->
op_
->
get_eval_ctx
().
exec_ctx_
.
get_my_session
()
->
get_compatibility_version
(
compat_version
));
OZ
(
ObCompatControl
::
check_feature_enable
(
compat_version
,
ObCompatFeatureType
::
EXTERNAL_COLUMN_NAME_CASE_INSENSITIVE
,
insensitive_feature_enabled
));
if
(
OB_SUCC
(
ret
))
{
if
(
insensitive_feature_enabled
)
{
is_col_name_case_sensitive_
=
scan_param
->
external_file_format_
.
parquet_format_
.
column_name_case_sensitive_
;
}
else
{
is_col_name_case_sensitive_
=
true
;
}
}
return
ret
;
}
...
...
@@ -185,21 +203,17 @@ int ObParquetTableRowIterator::compute_column_id_by_index_type(int index, int &f
case
sql
::
ColumnIndexType
::
NAME
:
{
ObDataAccessPathExtraInfo
*
data_access_info
=
static_cast
<
ObDataAccessPathExtraInfo
*>
(
file_column_exprs_
.
at
(
index
)
->
extra_info_
);
file_col_id
=
-
1
;
if
(
is_collection_column
)
{
for
(
int
i
=
0
;
i
<
file_meta_
->
schema
()
->
num_columns
();
i
++
)
{
const
std
::
string
&
field_path
=
file_meta_
->
schema
()
->
GetColumnRoot
(
i
)
->
name
();
if
(
field_path
.
compare
(
0
,
field_path
.
length
(),
data_access_info
->
data_access_path_
.
ptr
(),
data_access_info
->
data_access_path_
.
length
())
==
0
)
{
file_col_id
=
i
;
break
;
}
for
(
int
i
=
0
;
i
<
file_meta_
->
schema
()
->
num_columns
();
i
++
)
{
const
std
::
string
&
field_path
=
file_meta_
->
schema
()
->
GetColumnRoot
(
i
)
->
name
();
ObString
field_path_obstr
(
field_path
.
length
(),
field_path
.
c_str
());
if
(
!
is_col_name_case_sensitive_
?
data_access_info
->
data_access_path_
.
case_compare
(
field_path_obstr
)
==
0
:
data_access_info
->
data_access_path_
.
compare
(
field_path_obstr
)
==
0
)
{
file_col_id
=
i
;
break
;
}
}
else
{
file_col_id
=
file_meta_
->
schema
()
->
ColumnIndex
(
std
::
string
(
data_access_info
->
data_access_path_
.
ptr
(),
data_access_info
->
data_access_path_
.
length
()));
}
break
;
}
case
sql
::
ColumnIndexType
::
POSITION
:
{
...
...
src/sql/engine/table/ob_parquet_table_row_iter.h
浏览文件 @
e6ebfb49
...
...
@@ -168,7 +168,8 @@ public:
stat_
(),
mode_
(
FilterCalcMode
::
DYNAMIC_EAGER_CALC
),
reader_metrics_
(),
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
)
{}
column_index_type_
(
sql
::
ColumnIndexType
::
NAME
),
is_col_name_case_sensitive_
(
false
)
{}
virtual
~
ObParquetTableRowIterator
();
int
init
(
const
storage
::
ObTableScanParam
*
scan_param
)
override
;
...
...
@@ -495,6 +496,7 @@ private:
FilterCalcMode
mode_
;
ObLakeTableReaderMetrics
reader_metrics_
;
sql
::
ColumnIndexType
column_index_type_
;
bool
is_col_name_case_sensitive_
;
};
}
...
...
src/sql/parser/non_reserved_keywords_mysql_mode.c
浏览文件 @
e6ebfb49
...
...
@@ -161,6 +161,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] =
{
"column_format"
,
COLUMN_FORMAT
},
{
"column_index_type"
,
COLUMN_INDEX_TYPE
},
{
"column_name"
,
COLUMN_NAME
},
{
"column_name_case_sensitive"
,
COLUMN_NAME_CASE_SENSITIVE
},
{
"column"
,
COLUMN
},
{
"columns"
,
COLUMNS
},
{
"column_bloom_filter"
,
COLUMN_BLOOM_FILTER
},
...
...
src/sql/parser/sql_parser_mysql_mode.y
浏览文件 @
e6ebfb49
...
...
@@ -281,7 +281,7 @@ END_P SET_VAR DELIMITER
CACHE CALIBRATION CALIBRATION_INFO CANCEL CASCADED CAST CATALOG CATALOGS CATALOG_NAME CHAIN CHANGED CHARSET CHECKSUM CHECKPOINT CHUNK CIPHER
CLASS_ORIGIN CLEAN CLEAR CLIENT CLONE CLOG CLOSE CLUSTER CLUSTERING CLUSTER_ID CLUSTER_NAME COALESCE COLUMN_BLOOM_FILTER COLUMN_STAT
CODE COLLATION COLLECT_STATISTICS_ON_CREATE COLUMN_FORMAT COLUMN_INDEX_TYPE COLUMN_NAME COLUMNS COMMENT COMMIT COMMITTED COMPACT COMPLETION COMPLETE
CODE COLLATION COLLECT_STATISTICS_ON_CREATE COLUMN_FORMAT COLUMN_INDEX_TYPE COLUMN_NAME COLUMN
_NAME_CASE_SENSITIVE COLUMN
S COMMENT COMMIT COMMITTED COMPACT COMPLETION COMPLETE
COMPRESSED COMPRESSION COMPRESSION_BLOCK_SIZE COMPRESSION_CODE COMPUTATION COMPUTE CONCURRENT CONCURRENT_LIMITING_RULE CONDENSED CONDITIONAL CONFIGS CONNECTION CONSISTENT CONSISTENT_MODE CONSTRAINT_CATALOG
CONSTRAINT_NAME CONSTRAINT_SCHEMA CONTAINS CONTEXT CONTRIBUTORS COPY COSINE COUNT CPU CREATE_TIMESTAMP CREDENTIAL
CTXCAT CTX_ID CUBE CURDATE CURRENT STACKED CURTIME CURSOR_NAME CUME_DIST CYCLE CALC_PARTITION_ID CONNECT CACHE_REFRESH_INTERVAL_SEC
...
...
@@ -9739,6 +9739,10 @@ TYPE COMP_EQ STRING_VALUE
{
malloc_non_terminal_node($$, result->malloc_pool_, T_IGNORE_LAST_EMPTY_COLUMN, 1, $3);
}
| COLUMN_NAME_CASE_SENSITIVE COMP_EQ BOOL_VALUE
{
malloc_non_terminal_node($$, result->malloc_pool_, T_COLUMN_NAME_CASE_SENSITIVE, 1, $3);
}
;
/*****************************************************************************
...
...
@@ -26475,6 +26479,7 @@ ACCESS_INFO
| COLUMN_FORMAT
| COLUMN_INDEX_TYPE
| COLUMN_NAME
| COLUMN_NAME_CASE_SENSITIVE
| COLUMN_STAT
| COLUMNS
| COMMENT
...
...
src/sql/resolver/ddl/ob_create_table_resolver.cpp
浏览文件 @
e6ebfb49
...
...
@@ -3623,8 +3623,10 @@ int ObCreateTableResolver::resolve_external_table_format_early(const ParseNode *
}
else
{
if
(
format
.
format_type_
==
ObExternalFileFormat
::
FormatType
::
ORC_FORMAT
)
{
column_index_type_
=
format
.
orc_format_
.
column_index_type_
;
column_name_case_sensitive_
=
format
.
orc_format_
.
column_name_case_sensitive_
;
}
else
if
(
format
.
format_type_
==
ObExternalFileFormat
::
FormatType
::
PARQUET_FORMAT
)
{
column_index_type_
=
format
.
parquet_format_
.
column_index_type_
;
column_name_case_sensitive_
=
format
.
parquet_format_
.
column_name_case_sensitive_
;
}
}
}
...
...
src/sql/resolver/ddl/ob_ddl_resolver.cpp
浏览文件 @
e6ebfb49
...
...
@@ -113,6 +113,7 @@ ObDDLResolver::ObDDLResolver(ObResolverParams ¶ms)
auto_increment_cache_size_(0),
external_table_format_type_(ObExternalFileFormat::INVALID_FORMAT),
column_index_type_(sql::ColumnIndexType::NAME),
column_name_case_sensitive_(false),
mocked_external_table_column_ids_(),
index_params_(),
table_organization_(ObTableOrganizationType::OB_ORGANIZATION_INVALID),
...
...
@@ -3792,8 +3793,10 @@ int ObDDLResolver::resolve_column_definition(ObColumnSchemaV2 &column,
format.format_type_ = external_table_format_type_;
if (format.format_type_ == ObExternalFileFormat::FormatType::ORC_FORMAT) {
format.orc_format_.column_index_type_ = column_index_type_;
format.orc_format_.column_name_case_sensitive_ = column_name_case_sensitive_;
} else if (format.format_type_ == ObExternalFileFormat::FormatType::PARQUET_FORMAT) {
format.parquet_format_.column_index_type_ = column_index_type_;
format.parquet_format_.column_name_case_sensitive_ = column_name_case_sensitive_;
}
ObString mock_gen_column_str;
if (OB_FAIL(format.mock_gen_column_def(column, *allocator_, mock_gen_column_str))) {
...
...
src/sql/resolver/ddl/ob_ddl_resolver.h
浏览文件 @
e6ebfb49
...
...
@@ -1171,6 +1171,7 @@ protected:
int64_t
auto_increment_cache_size_
;
ObExternalFileFormat
::
FormatType
external_table_format_type_
;
sql
::
ColumnIndexType
column_index_type_
;
bool
column_name_case_sensitive_
;
common
::
ObBitSet
<>
mocked_external_table_column_ids_
;
common
::
ObString
index_params_
;
ObTableOrganizationType
table_organization_
;
...
...
src/sql/resolver/ob_resolver_utils.cpp
浏览文件 @
e6ebfb49
...
...
@@ -10612,6 +10612,20 @@ int ObResolverUtils::resolve_file_format(const ParseNode *node, ObExternalFileFo
}
break
;
}
case
T_COLUMN_NAME_CASE_SENSITIVE
:
{
if
(
GET_MIN_CLUSTER_VERSION
()
<
CLUSTER_VERSION_4_5_0_0
)
{
ret
=
OB_NOT_SUPPORTED
;
LOG_USER_ERROR
(
OB_NOT_SUPPORTED
,
"cluster version is less than 4.5.0.0, column_name_case_sensitive"
);
}
else
if
(
format
.
format_type_
==
ObExternalFileFormat
::
PARQUET_FORMAT
)
{
format
.
parquet_format_
.
column_name_case_sensitive_
=
node
->
children_
[
0
]
->
value_
;
}
else
if
(
format
.
format_type_
==
ObExternalFileFormat
::
ORC_FORMAT
)
{
format
.
orc_format_
.
column_name_case_sensitive_
=
node
->
children_
[
0
]
->
value_
;
}
else
{
ret
=
OB_INVALID_ARGUMENT
;
LOG_WARN
(
"invalid file format option"
,
K
(
ret
),
K
(
node
->
type_
));
}
break
;
}
default:
{
ret
=
OB_INVALID_ARGUMENT
;
LOG_WARN
(
"invalid file format option"
,
K
(
ret
),
K
(
node
->
type_
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录