提交 e142f566 编写于 作者: B bf0 提交者: wangzelin.wzl

fix load data handle null field

上级 14588491
...@@ -1277,7 +1277,8 @@ int ObCSVParser::next_line(bool& yield_line) ...@@ -1277,7 +1277,8 @@ int ObCSVParser::next_line(bool& yield_line)
if (is_escaped_flag_) { if (is_escaped_flag_) {
escaped_res = escaped_char(*cur_pos_, &with_back_slash); escaped_res = escaped_char(*cur_pos_, &with_back_slash);
} }
if (cur_field_end_pos_ != cur_pos_ && !is_fast_parse_) { bool has_escaped = cur_field_end_pos_ != cur_pos_;
if (has_escaped && !is_fast_parse_) {
*cur_field_end_pos_ = escaped_res; *cur_field_end_pos_ = escaped_res;
} }
...@@ -1285,12 +1286,13 @@ int ObCSVParser::next_line(bool& yield_line) ...@@ -1285,12 +1286,13 @@ int ObCSVParser::next_line(bool& yield_line)
if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) { if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) {
if (!line_term_matched || cur_field_begin_pos_ < cur_pos_) { if (!line_term_matched || cur_field_begin_pos_ < cur_pos_) {
handle_one_field(cur_field_end_pos_); handle_one_field(cur_field_end_pos_, has_escaped);
field_id_++; field_id_++;
} }
char* next_pos = cur_pos_ + 1; char* next_pos = cur_pos_ + 1;
cur_field_begin_pos_ = next_pos; cur_field_begin_pos_ = next_pos;
cur_field_end_pos_ = cur_pos_; cur_field_end_pos_ = cur_pos_;
in_enclose_flag_ = false;
if (line_term_matched && (!formats_.is_line_term_by_counting_field_ || field_id_ == total_field_nums_)) { if (line_term_matched && (!formats_.is_line_term_by_counting_field_ || field_id_ == total_field_nums_)) {
if (OB_UNLIKELY(field_id_ != total_field_nums_)) { if (OB_UNLIKELY(field_id_ != total_field_nums_)) {
ret = deal_with_irregular_line(); ret = deal_with_irregular_line();
...@@ -1313,7 +1315,8 @@ int ObCSVParser::next_line(bool& yield_line) ...@@ -1313,7 +1315,8 @@ int ObCSVParser::next_line(bool& yield_line)
if (!yield && is_last_buf_ && cur_pos_ == buf_end_pos_) { if (!yield && is_last_buf_ && cur_pos_ == buf_end_pos_) {
if (cur_field_begin_pos_ < cur_pos_) { if (cur_field_begin_pos_ < cur_pos_) {
// new field, terminated with an eof // new field, terminated with an eof
handle_one_field(cur_field_end_pos_); bool has_escaped = cur_field_end_pos_ != cur_pos_;
handle_one_field(cur_field_end_pos_, has_escaped);
field_id_++; field_id_++;
} }
cur_field_begin_pos_ = cur_pos_; cur_field_begin_pos_ = cur_pos_;
......
...@@ -575,10 +575,10 @@ public: ...@@ -575,10 +575,10 @@ public:
} }
private: private:
bool is_terminate_char(char cur_char, char*& cur_pos, bool& is_line_term); bool is_terminate_char(char cur_char, char *&cur_pos, bool &is_line_term);
bool is_enclosed_field_start(char* cur_pos, char& cur_char); bool is_enclosed_field_start(char *cur_pos, char &cur_char);
void handle_one_field(char* field_end_pos); void handle_one_field(char *field_end_pos, bool has_escaped);
void deal_with_empty_field(ObString& field_str, int64_t index); void deal_with_empty_field(ObString &field_str, int64_t index);
// void deal_with_field_with_escaped_chars(ObString &field_str); // void deal_with_field_with_escaped_chars(ObString &field_str);
int deal_with_irregular_line(); int deal_with_irregular_line();
void remove_enclosed_char(char*& cur_field_end_pos); void remove_enclosed_char(char*& cur_field_end_pos);
...@@ -647,7 +647,6 @@ OB_INLINE bool ObCSVParser::is_terminate_char(char cur_char, char*& cur_pos, boo ...@@ -647,7 +647,6 @@ OB_INLINE bool ObCSVParser::is_terminate_char(char cur_char, char*& cur_pos, boo
// with in_enclose_flag_ = true, a term char is valid only if an enclosed char before it // with in_enclose_flag_ = true, a term char is valid only if an enclosed char before it
if (static_cast<int64_t>(*pre_pos) == formats_.enclose_char_ && if (static_cast<int64_t>(*pre_pos) == formats_.enclose_char_ &&
cur_field_begin_pos_ != pre_pos) { // 123---->'---->123 cur_field_begin_pos_ != pre_pos) { // 123---->'---->123
in_enclose_flag_ = false;
remove_enclosed_char(cur_pos); remove_enclosed_char(cur_pos);
ret_bool = true; // return true ret_bool = true; // return true
} else { } else {
...@@ -670,14 +669,17 @@ OB_INLINE bool ObCSVParser::is_enclosed_field_start(char* cur_pos, char& cur_cha ...@@ -670,14 +669,17 @@ OB_INLINE bool ObCSVParser::is_enclosed_field_start(char* cur_pos, char& cur_cha
&& cur_pos == cur_field_begin_pos_; && cur_pos == cur_field_begin_pos_;
} }
OB_INLINE void ObCSVParser::handle_one_field(char* field_end_pos) OB_INLINE void ObCSVParser::handle_one_field(char *field_end_pos, bool has_escaped)
{ {
if (OB_LIKELY(field_id_ < total_field_nums_)) { if (OB_LIKELY(field_id_ < total_field_nums_)) {
int32_t str_len = static_cast<int32_t>(field_end_pos - cur_field_begin_pos_); int32_t str_len = static_cast<int32_t>(field_end_pos - cur_field_begin_pos_);
if (OB_UNLIKELY(str_len <= 0)) { if (OB_UNLIKELY(str_len <= 0)) {
deal_with_empty_field(values_in_line_.at(field_id_), field_id_); deal_with_empty_field(values_in_line_.at(field_id_), field_id_);
} else { } else {
if (str_len == 1 && *cur_field_begin_pos_ == 'N' && cur_pos_ - cur_field_begin_pos_ == 2) { if (!in_enclose_flag_ &&
((str_len == 1 && *cur_field_begin_pos_ == 'N' && has_escaped && cur_pos_ - cur_field_begin_pos_ == 2) ||
(formats_.enclose_char_ != INT64_MAX && !has_escaped && str_len == 4 &&
0 == MEMCMP(cur_field_begin_pos_, "NULL", 4)))) {
values_in_line_.at(field_id_).assign_ptr(&ObLoadDataUtils::NULL_VALUE_FLAG, 1); values_in_line_.at(field_id_).assign_ptr(&ObLoadDataUtils::NULL_VALUE_FLAG, 1);
} else { } else {
values_in_line_.at(field_id_).assign_ptr(cur_field_begin_pos_, str_len); values_in_line_.at(field_id_).assign_ptr(cur_field_begin_pos_, str_len);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册