diff --git a/src/sql/engine/cmd/ob_load_data_impl.cpp b/src/sql/engine/cmd/ob_load_data_impl.cpp index b936d66720bbfccfb888011579f1accead61e476..f57c6c68cd189bd6cd98c9173d287f7aa54e4772 100644 --- a/src/sql/engine/cmd/ob_load_data_impl.cpp +++ b/src/sql/engine/cmd/ob_load_data_impl.cpp @@ -1264,29 +1264,29 @@ int ObCSVParser::next_line(bool& yield_line) bool yield = false; int with_back_slash = 0; - for (; !yield && cur_pos_ != buf_end_pos_; ++cur_pos_) { - + for (; !yield && cur_pos_ != buf_end_pos_; ++cur_pos_, ++cur_field_end_pos_) { + bool line_term_matched = false; if (*cur_pos_ == formats_.enclose_char_ && !in_enclose_flag_ && cur_pos_ == cur_field_begin_pos_) { in_enclose_flag_ = true; - } - - if (!is_escaped_flag_ && *cur_pos_ == formats_.escape_char_) { - is_escaped_flag_ = true; - } else { - char escaped_res = *cur_pos_; - if (is_escaped_flag_) { - escaped_res = escaped_char(*cur_pos_, &with_back_slash); + last_end_enclosed_ = NULL; + } else if ((*cur_pos_ == formats_.escape_char_ && formats_.escape_char_ != formats_.enclose_char_) || + (in_enclose_flag_ && formats_.enclose_char_ == *cur_pos_ && cur_pos_ < buf_end_pos_ && + formats_.enclose_char_ == *(cur_pos_ + 1))) { + if (cur_pos_ < buf_end_pos_) { + cur_pos_++; + if (!is_fast_parse_) { + *cur_field_end_pos_ = escaped_char(*cur_pos_, &with_back_slash); + } } - bool has_escaped = cur_field_end_pos_ != cur_pos_; - if (has_escaped && !is_fast_parse_) { - *cur_field_end_pos_ = escaped_res; + } else { + if (cur_field_end_pos_ != cur_pos_ && !is_fast_parse_) { + *cur_field_end_pos_ = *cur_pos_; } - - bool line_term_matched = false; - - if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) { + if (formats_.enclose_char_ == *cur_pos_) { + last_end_enclosed_ = cur_field_end_pos_; + } else if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) { if (!line_term_matched || cur_field_begin_pos_ < cur_pos_) { - handle_one_field(cur_field_end_pos_, has_escaped); + handle_one_field(cur_field_end_pos_, cur_field_end_pos_ != cur_pos_); field_id_++; } char* next_pos = cur_pos_ + 1; @@ -1303,12 +1303,6 @@ int ObCSVParser::next_line(bool& yield_line) cur_line_begin_pos_ = next_pos; } } - - if (is_escaped_flag_) { - is_escaped_flag_ = false; - } - - ++cur_field_end_pos_; } } diff --git a/src/sql/engine/cmd/ob_load_data_impl.h b/src/sql/engine/cmd/ob_load_data_impl.h index 59429fc785066c2c601218bde19778ce977f42f4..b8e57caa96a9015ef25325388a04804087969bcf 100644 --- a/src/sql/engine/cmd/ob_load_data_impl.h +++ b/src/sql/engine/cmd/ob_load_data_impl.h @@ -542,6 +542,7 @@ public: cur_line_begin_pos_ = NULL; buf_begin_pos_ = NULL; buf_end_pos_ = NULL; + last_end_enclosed_ = NULL; field_id_ = 0; in_enclose_flag_ = false; is_escaped_flag_ = false; @@ -590,12 +591,13 @@ private: common::ObBitSet<> string_type_column_; // parsing state variables bool is_last_buf_; - char* cur_pos_; - char* cur_field_begin_pos_; - char* cur_field_end_pos_; - char* cur_line_begin_pos_; - char* buf_begin_pos_; - char* buf_end_pos_; + char *cur_pos_; + char *cur_field_begin_pos_; + char *cur_field_end_pos_; + char *cur_line_begin_pos_; + char *buf_begin_pos_; + char *buf_end_pos_; + char *last_end_enclosed_; int64_t field_id_; bool in_enclose_flag_; bool is_escaped_flag_; @@ -643,10 +645,8 @@ OB_INLINE bool ObCSVParser::is_terminate_char(char cur_char, char*& cur_pos, boo if (!in_enclose_flag_) { ret_bool = true; // return true } else { - char* pre_pos = cur_pos - 1; // with in_enclose_flag_ = true, a term char is valid only if an enclosed char before it - if (static_cast(*pre_pos) == formats_.enclose_char_ && - cur_field_begin_pos_ != pre_pos) { // 123---->'---->123 + if (last_end_enclosed_ == cur_pos - 1) { remove_enclosed_char(cur_pos); ret_bool = true; // return true } else { diff --git a/src/sql/resolver/cmd/ob_load_data_resolver.cpp b/src/sql/resolver/cmd/ob_load_data_resolver.cpp index 5ba0899f51277c264664647ea09fb020b6d7c665..c3e177a48e2178bd1566d2790516a64033650f92 100644 --- a/src/sql/resolver/cmd/ob_load_data_resolver.cpp +++ b/src/sql/resolver/cmd/ob_load_data_resolver.cpp @@ -534,12 +534,14 @@ int ObLoadDataResolver::validate_stmt(ObLoadDataStmt* stmt) escape_char = (data_struct_in_file.field_escaped_str_.empty() ? INT64_MAX : static_cast(data_struct_in_file.field_escaped_str_[0])); + /* if (OB_SUCC(ret)) { if (escape_char != ObDataInFileStruct::DEFAULT_FIELD_ESCAPED_CHAR) { ret = OB_WRONG_FIELD_TERMINATORS; LOG_USER_ERROR(OB_WRONG_FIELD_TERMINATORS); } } + */ if (OB_SUCC(ret)) { const char* is_ambiguous_field_sep = strchr("ntrb0ZN", static_cast(field_sep_char)); const char* is_unsafe_field_sep = strchr(".0123456789e+-", static_cast(field_sep_char));