提交 7be8c365 编写于 作者: B bf0 提交者: wangzelin.wzl

fix load data parser bug

上级 ca421cf8
......@@ -1264,29 +1264,29 @@ int ObCSVParser::next_line(bool& yield_line)
bool yield = false;
int with_back_slash = 0;
for (; !yield && cur_pos_ != buf_end_pos_; ++cur_pos_) {
for (; !yield && cur_pos_ != buf_end_pos_; ++cur_pos_, ++cur_field_end_pos_) {
bool line_term_matched = false;
if (*cur_pos_ == formats_.enclose_char_ && !in_enclose_flag_ && cur_pos_ == cur_field_begin_pos_) {
in_enclose_flag_ = true;
}
if (!is_escaped_flag_ && *cur_pos_ == formats_.escape_char_) {
is_escaped_flag_ = true;
} else {
char escaped_res = *cur_pos_;
if (is_escaped_flag_) {
escaped_res = escaped_char(*cur_pos_, &with_back_slash);
last_end_enclosed_ = NULL;
} else if ((*cur_pos_ == formats_.escape_char_ && formats_.escape_char_ != formats_.enclose_char_) ||
(in_enclose_flag_ && formats_.enclose_char_ == *cur_pos_ && cur_pos_ < buf_end_pos_ &&
formats_.enclose_char_ == *(cur_pos_ + 1))) {
if (cur_pos_ < buf_end_pos_) {
cur_pos_++;
if (!is_fast_parse_) {
*cur_field_end_pos_ = escaped_char(*cur_pos_, &with_back_slash);
}
}
bool has_escaped = cur_field_end_pos_ != cur_pos_;
if (has_escaped && !is_fast_parse_) {
*cur_field_end_pos_ = escaped_res;
} else {
if (cur_field_end_pos_ != cur_pos_ && !is_fast_parse_) {
*cur_field_end_pos_ = *cur_pos_;
}
bool line_term_matched = false;
if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) {
if (formats_.enclose_char_ == *cur_pos_) {
last_end_enclosed_ = cur_field_end_pos_;
} else if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) {
if (!line_term_matched || cur_field_begin_pos_ < cur_pos_) {
handle_one_field(cur_field_end_pos_, has_escaped);
handle_one_field(cur_field_end_pos_, cur_field_end_pos_ != cur_pos_);
field_id_++;
}
char* next_pos = cur_pos_ + 1;
......@@ -1303,12 +1303,6 @@ int ObCSVParser::next_line(bool& yield_line)
cur_line_begin_pos_ = next_pos;
}
}
if (is_escaped_flag_) {
is_escaped_flag_ = false;
}
++cur_field_end_pos_;
}
}
......
......@@ -542,6 +542,7 @@ public:
cur_line_begin_pos_ = NULL;
buf_begin_pos_ = NULL;
buf_end_pos_ = NULL;
last_end_enclosed_ = NULL;
field_id_ = 0;
in_enclose_flag_ = false;
is_escaped_flag_ = false;
......@@ -590,12 +591,13 @@ private:
common::ObBitSet<> string_type_column_;
// parsing state variables
bool is_last_buf_;
char* cur_pos_;
char* cur_field_begin_pos_;
char* cur_field_end_pos_;
char* cur_line_begin_pos_;
char* buf_begin_pos_;
char* buf_end_pos_;
char *cur_pos_;
char *cur_field_begin_pos_;
char *cur_field_end_pos_;
char *cur_line_begin_pos_;
char *buf_begin_pos_;
char *buf_end_pos_;
char *last_end_enclosed_;
int64_t field_id_;
bool in_enclose_flag_;
bool is_escaped_flag_;
......@@ -643,10 +645,8 @@ OB_INLINE bool ObCSVParser::is_terminate_char(char cur_char, char*& cur_pos, boo
if (!in_enclose_flag_) {
ret_bool = true; // return true
} else {
char* pre_pos = cur_pos - 1;
// with in_enclose_flag_ = true, a term char is valid only if an enclosed char before it
if (static_cast<int64_t>(*pre_pos) == formats_.enclose_char_ &&
cur_field_begin_pos_ != pre_pos) { // 123---->'---->123
if (last_end_enclosed_ == cur_pos - 1) {
remove_enclosed_char(cur_pos);
ret_bool = true; // return true
} else {
......
......@@ -534,12 +534,14 @@ int ObLoadDataResolver::validate_stmt(ObLoadDataStmt* stmt)
escape_char = (data_struct_in_file.field_escaped_str_.empty()
? INT64_MAX
: static_cast<int64_t>(data_struct_in_file.field_escaped_str_[0]));
/*
if (OB_SUCC(ret)) {
if (escape_char != ObDataInFileStruct::DEFAULT_FIELD_ESCAPED_CHAR) {
ret = OB_WRONG_FIELD_TERMINATORS;
LOG_USER_ERROR(OB_WRONG_FIELD_TERMINATORS);
}
}
*/
if (OB_SUCC(ret)) {
const char* is_ambiguous_field_sep = strchr("ntrb0ZN", static_cast<int>(field_sep_char));
const char* is_unsafe_field_sep = strchr(".0123456789e+-", static_cast<int>(field_sep_char));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册