未验证 提交 cb2beb47 编写于 作者: N Nikita Mikhaylov 提交者: GitHub

Merge pull request #20516 from nikitamikhaylov/more-checks-filesegmentation-engine

More checks in fileSegmentationEngine 
...@@ -6,6 +6,7 @@ namespace DB ...@@ -6,6 +6,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int INCORRECT_DATA; extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
} }
std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size) std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
...@@ -28,7 +29,9 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D ...@@ -28,7 +29,9 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
if (quotes) if (quotes)
{ {
pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end()); pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
if (pos == in.buffer().end()) if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue; continue;
if (*pos == '\\') if (*pos == '\\')
{ {
...@@ -45,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D ...@@ -45,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
else else
{ {
pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end()); pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end());
if (pos == in.buffer().end()) if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue; continue;
if (*pos == '{') else if (*pos == '{')
{ {
++balance; ++balance;
++pos; ++pos;
......
...@@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator ...@@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator
char * m_data = nullptr; char * m_data = nullptr;
size_t alignment = 0; size_t alignment = 0;
Memory() {} Memory() = default;
/// If alignment != 0, then allocate memory aligned to specified value. /// If alignment != 0, then allocate memory aligned to specified value.
Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_) explicit Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
{ {
alloc(); alloc();
} }
...@@ -140,7 +140,7 @@ protected: ...@@ -140,7 +140,7 @@ protected:
Memory<> memory; Memory<> memory;
public: public:
/// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership. /// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0) explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
: Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment) : Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment)
{ {
Base::set(existing_memory ? existing_memory : memory.data(), size); Base::set(existing_memory ? existing_memory : memory.data(), size);
......
...@@ -15,6 +15,7 @@ namespace ErrorCodes ...@@ -15,6 +15,7 @@ namespace ErrorCodes
{ {
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA; extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
} }
...@@ -436,9 +437,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB ...@@ -436,9 +437,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
if (quotes) if (quotes)
{ {
pos = find_first_symbols<'"'>(pos, in.buffer().end()); pos = find_first_symbols<'"'>(pos, in.buffer().end());
if (pos == in.buffer().end()) if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue; continue;
if (*pos == '"') else if (*pos == '"')
{ {
++pos; ++pos;
if (loadAtPosition(in, memory, pos) && *pos == '"') if (loadAtPosition(in, memory, pos) && *pos == '"')
...@@ -450,9 +453,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB ...@@ -450,9 +453,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
else else
{ {
pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end()); pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end());
if (pos == in.buffer().end()) if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue; continue;
if (*pos == '"') else if (*pos == '"')
{ {
quotes = true; quotes = true;
++pos; ++pos;
......
...@@ -11,6 +11,7 @@ namespace ErrorCodes ...@@ -11,6 +11,7 @@ namespace ErrorCodes
{ {
extern const int INCORRECT_DATA; extern const int INCORRECT_DATA;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
} }
RegexpRowInputFormat::RegexpRowInputFormat( RegexpRowInputFormat::RegexpRowInputFormat(
...@@ -182,7 +183,9 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in, ...@@ -182,7 +183,9 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in,
while (loadAtPosition(in, memory, pos) && need_more_data) while (loadAtPosition(in, memory, pos) && need_more_data)
{ {
pos = find_first_symbols<'\n', '\r'>(pos, in.buffer().end()); pos = find_first_symbols<'\n', '\r'>(pos, in.buffer().end());
if (pos == in.buffer().end()) if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue; continue;
// Support DOS-style newline ("\r\n") // Support DOS-style newline ("\r\n")
......
...@@ -15,6 +15,7 @@ namespace DB ...@@ -15,6 +15,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int INCORRECT_DATA; extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
} }
...@@ -433,10 +434,11 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer ...@@ -433,10 +434,11 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
{ {
pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end());
if (pos == in.buffer().end()) if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue; continue;
else if (*pos == '\\')
if (*pos == '\\')
{ {
++pos; ++pos;
if (loadAtPosition(in, memory, pos)) if (loadAtPosition(in, memory, pos))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册