From e142f566bd46d8bd37b51c99de85cda0dd448e7a Mon Sep 17 00:00:00 2001 From: bf0 Date: Tue, 24 Aug 2021 21:00:22 +0800 Subject: [PATCH] fix load data handle null field --- src/sql/engine/cmd/ob_load_data_impl.cpp | 9 ++++++--- src/sql/engine/cmd/ob_load_data_impl.h | 16 +++++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/sql/engine/cmd/ob_load_data_impl.cpp b/src/sql/engine/cmd/ob_load_data_impl.cpp index 291f9d59cf..b936d66720 100644 --- a/src/sql/engine/cmd/ob_load_data_impl.cpp +++ b/src/sql/engine/cmd/ob_load_data_impl.cpp @@ -1277,7 +1277,8 @@ int ObCSVParser::next_line(bool& yield_line) if (is_escaped_flag_) { escaped_res = escaped_char(*cur_pos_, &with_back_slash); } - if (cur_field_end_pos_ != cur_pos_ && !is_fast_parse_) { + bool has_escaped = cur_field_end_pos_ != cur_pos_; + if (has_escaped && !is_fast_parse_) { *cur_field_end_pos_ = escaped_res; } @@ -1285,12 +1286,13 @@ int ObCSVParser::next_line(bool& yield_line) if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) { if (!line_term_matched || cur_field_begin_pos_ < cur_pos_) { - handle_one_field(cur_field_end_pos_); + handle_one_field(cur_field_end_pos_, has_escaped); field_id_++; } char* next_pos = cur_pos_ + 1; cur_field_begin_pos_ = next_pos; cur_field_end_pos_ = cur_pos_; + in_enclose_flag_ = false; if (line_term_matched && (!formats_.is_line_term_by_counting_field_ || field_id_ == total_field_nums_)) { if (OB_UNLIKELY(field_id_ != total_field_nums_)) { ret = deal_with_irregular_line(); @@ -1313,7 +1315,8 @@ int ObCSVParser::next_line(bool& yield_line) if (!yield && is_last_buf_ && cur_pos_ == buf_end_pos_) { if (cur_field_begin_pos_ < cur_pos_) { // new field, terminated with an eof - handle_one_field(cur_field_end_pos_); + bool has_escaped = cur_field_end_pos_ != cur_pos_; + handle_one_field(cur_field_end_pos_, has_escaped); field_id_++; } cur_field_begin_pos_ = cur_pos_; diff --git a/src/sql/engine/cmd/ob_load_data_impl.h b/src/sql/engine/cmd/ob_load_data_impl.h index 08a868d90a..59429fc785 100644 --- a/src/sql/engine/cmd/ob_load_data_impl.h +++ b/src/sql/engine/cmd/ob_load_data_impl.h @@ -575,10 +575,10 @@ public: } private: - bool is_terminate_char(char cur_char, char*& cur_pos, bool& is_line_term); - bool is_enclosed_field_start(char* cur_pos, char& cur_char); - void handle_one_field(char* field_end_pos); - void deal_with_empty_field(ObString& field_str, int64_t index); + bool is_terminate_char(char cur_char, char *&cur_pos, bool &is_line_term); + bool is_enclosed_field_start(char *cur_pos, char &cur_char); + void handle_one_field(char *field_end_pos, bool has_escaped); + void deal_with_empty_field(ObString &field_str, int64_t index); // void deal_with_field_with_escaped_chars(ObString &field_str); int deal_with_irregular_line(); void remove_enclosed_char(char*& cur_field_end_pos); @@ -647,7 +647,6 @@ OB_INLINE bool ObCSVParser::is_terminate_char(char cur_char, char*& cur_pos, boo // with in_enclose_flag_ = true, a term char is valid only if an enclosed char before it if (static_cast(*pre_pos) == formats_.enclose_char_ && cur_field_begin_pos_ != pre_pos) { // 123---->'---->123 - in_enclose_flag_ = false; remove_enclosed_char(cur_pos); ret_bool = true; // return true } else { @@ -670,14 +669,17 @@ OB_INLINE bool ObCSVParser::is_enclosed_field_start(char* cur_pos, char& cur_cha && cur_pos == cur_field_begin_pos_; } -OB_INLINE void ObCSVParser::handle_one_field(char* field_end_pos) +OB_INLINE void ObCSVParser::handle_one_field(char *field_end_pos, bool has_escaped) { if (OB_LIKELY(field_id_ < total_field_nums_)) { int32_t str_len = static_cast(field_end_pos - cur_field_begin_pos_); if (OB_UNLIKELY(str_len <= 0)) { deal_with_empty_field(values_in_line_.at(field_id_), field_id_); } else { - if (str_len == 1 && *cur_field_begin_pos_ == 'N' && cur_pos_ - cur_field_begin_pos_ == 2) { + if (!in_enclose_flag_ && + ((str_len == 1 && *cur_field_begin_pos_ == 'N' && has_escaped && cur_pos_ - cur_field_begin_pos_ == 2) || + (formats_.enclose_char_ != INT64_MAX && !has_escaped && str_len == 4 && + 0 == MEMCMP(cur_field_begin_pos_, "NULL", 4)))) { values_in_line_.at(field_id_).assign_ptr(&ObLoadDataUtils::NULL_VALUE_FLAG, 1); } else { values_in_line_.at(field_id_).assign_ptr(cur_field_begin_pos_, str_len); -- GitLab