未验证 提交 cb2beb47 编写于 作者: N Nikita Mikhaylov 提交者: GitHub

Merge pull request #20516 from nikitamikhaylov/more-checks-filesegmentation-engine

More checks in fileSegmentationEngine 
......@@ -6,6 +6,7 @@ namespace DB
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
}
std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
......@@ -28,7 +29,9 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
if (quotes)
{
pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
if (pos == in.buffer().end())
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue;
if (*pos == '\\')
{
......@@ -45,9 +48,11 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
else
{
pos = find_first_symbols<'{', '}', '\\', '"'>(pos, in.buffer().end());
if (pos == in.buffer().end())
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue;
if (*pos == '{')
else if (*pos == '{')
{
++balance;
++pos;
......
......@@ -35,10 +35,10 @@ struct Memory : boost::noncopyable, Allocator
char * m_data = nullptr;
size_t alignment = 0;
Memory() {}
Memory() = default;
/// If alignment != 0, then allocate memory aligned to specified value.
Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
explicit Memory(size_t size_, size_t alignment_ = 0) : m_capacity(size_), m_size(m_capacity), alignment(alignment_)
{
alloc();
}
......@@ -140,7 +140,7 @@ protected:
Memory<> memory;
public:
/// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
explicit BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
: Base(nullptr, 0), memory(existing_memory ? 0 : size, alignment)
{
Base::set(existing_memory ? existing_memory : memory.data(), size);
......
......@@ -15,6 +15,7 @@ namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
}
......@@ -436,9 +437,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
if (quotes)
{
pos = find_first_symbols<'"'>(pos, in.buffer().end());
if (pos == in.buffer().end())
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue;
if (*pos == '"')
else if (*pos == '"')
{
++pos;
if (loadAtPosition(in, memory, pos) && *pos == '"')
......@@ -450,9 +453,11 @@ static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB
else
{
pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end());
if (pos == in.buffer().end())
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue;
if (*pos == '"')
else if (*pos == '"')
{
quotes = true;
++pos;
......
......@@ -11,6 +11,7 @@ namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
RegexpRowInputFormat::RegexpRowInputFormat(
......@@ -182,7 +183,9 @@ static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in,
while (loadAtPosition(in, memory, pos) && need_more_data)
{
pos = find_first_symbols<'\n', '\r'>(pos, in.buffer().end());
if (pos == in.buffer().end())
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue;
// Support DOS-style newline ("\r\n")
......
......@@ -15,6 +15,7 @@ namespace DB
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
}
......@@ -433,10 +434,11 @@ static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer
{
pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end());
if (pos == in.buffer().end())
if (pos > in.buffer().end())
throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR);
else if (pos == in.buffer().end())
continue;
if (*pos == '\\')
else if (*pos == '\\')
{
++pos;
if (loadAtPosition(in, memory, pos))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册