提交 e142f566 编写于 作者: B bf0 提交者: wangzelin.wzl

fix load data handle null field

上级 14588491
......@@ -1277,7 +1277,8 @@ int ObCSVParser::next_line(bool& yield_line)
if (is_escaped_flag_) {
escaped_res = escaped_char(*cur_pos_, &with_back_slash);
}
if (cur_field_end_pos_ != cur_pos_ && !is_fast_parse_) {
bool has_escaped = cur_field_end_pos_ != cur_pos_;
if (has_escaped && !is_fast_parse_) {
*cur_field_end_pos_ = escaped_res;
}
......@@ -1285,12 +1286,13 @@ int ObCSVParser::next_line(bool& yield_line)
if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) {
if (!line_term_matched || cur_field_begin_pos_ < cur_pos_) {
handle_one_field(cur_field_end_pos_);
handle_one_field(cur_field_end_pos_, has_escaped);
field_id_++;
}
char* next_pos = cur_pos_ + 1;
cur_field_begin_pos_ = next_pos;
cur_field_end_pos_ = cur_pos_;
in_enclose_flag_ = false;
if (line_term_matched && (!formats_.is_line_term_by_counting_field_ || field_id_ == total_field_nums_)) {
if (OB_UNLIKELY(field_id_ != total_field_nums_)) {
ret = deal_with_irregular_line();
......@@ -1313,7 +1315,8 @@ int ObCSVParser::next_line(bool& yield_line)
if (!yield && is_last_buf_ && cur_pos_ == buf_end_pos_) {
if (cur_field_begin_pos_ < cur_pos_) {
// new field, terminated with an eof
handle_one_field(cur_field_end_pos_);
bool has_escaped = cur_field_end_pos_ != cur_pos_;
handle_one_field(cur_field_end_pos_, has_escaped);
field_id_++;
}
cur_field_begin_pos_ = cur_pos_;
......
......@@ -575,10 +575,10 @@ public:
}
private:
bool is_terminate_char(char cur_char, char*& cur_pos, bool& is_line_term);
bool is_enclosed_field_start(char* cur_pos, char& cur_char);
void handle_one_field(char* field_end_pos);
void deal_with_empty_field(ObString& field_str, int64_t index);
bool is_terminate_char(char cur_char, char *&cur_pos, bool &is_line_term);
bool is_enclosed_field_start(char *cur_pos, char &cur_char);
void handle_one_field(char *field_end_pos, bool has_escaped);
void deal_with_empty_field(ObString &field_str, int64_t index);
// void deal_with_field_with_escaped_chars(ObString &field_str);
int deal_with_irregular_line();
void remove_enclosed_char(char*& cur_field_end_pos);
......@@ -647,7 +647,6 @@ OB_INLINE bool ObCSVParser::is_terminate_char(char cur_char, char*& cur_pos, boo
// with in_enclose_flag_ = true, a term char is valid only if an enclosed char before it
if (static_cast<int64_t>(*pre_pos) == formats_.enclose_char_ &&
cur_field_begin_pos_ != pre_pos) { // 123---->'---->123
in_enclose_flag_ = false;
remove_enclosed_char(cur_pos);
ret_bool = true; // return true
} else {
......@@ -670,14 +669,17 @@ OB_INLINE bool ObCSVParser::is_enclosed_field_start(char* cur_pos, char& cur_cha
&& cur_pos == cur_field_begin_pos_;
}
OB_INLINE void ObCSVParser::handle_one_field(char* field_end_pos)
OB_INLINE void ObCSVParser::handle_one_field(char *field_end_pos, bool has_escaped)
{
if (OB_LIKELY(field_id_ < total_field_nums_)) {
int32_t str_len = static_cast<int32_t>(field_end_pos - cur_field_begin_pos_);
if (OB_UNLIKELY(str_len <= 0)) {
deal_with_empty_field(values_in_line_.at(field_id_), field_id_);
} else {
if (str_len == 1 && *cur_field_begin_pos_ == 'N' && cur_pos_ - cur_field_begin_pos_ == 2) {
if (!in_enclose_flag_ &&
((str_len == 1 && *cur_field_begin_pos_ == 'N' && has_escaped && cur_pos_ - cur_field_begin_pos_ == 2) ||
(formats_.enclose_char_ != INT64_MAX && !has_escaped && str_len == 4 &&
0 == MEMCMP(cur_field_begin_pos_, "NULL", 4)))) {
values_in_line_.at(field_id_).assign_ptr(&ObLoadDataUtils::NULL_VALUE_FLAG, 1);
} else {
values_in_line_.at(field_id_).assign_ptr(cur_field_begin_pos_, str_len);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册