提交 f80fb09b 编写于 作者: D Danila Kutenin

Hyperscan better usage of scratch space and error handling

上级 11997ed7
......@@ -421,6 +421,7 @@ namespace ErrorCodes
extern const int UNKNOWN_PROTOBUF_FORMAT = 444;
extern const int CANNOT_MPROTECT = 445;
extern const int FUNCTION_NOT_ALLOWED = 446;
extern const int HYPERSCAN_CANNOT_SCAN_TEXT = 447;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;
......
......@@ -40,6 +40,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int TOO_MANY_BYTES;
extern const int NOT_IMPLEMENTED;
extern const int HYPERSCAN_CANNOT_SCAN_TEXT;
}
/// Is the LIKE expression reduced to finding a substring in a string?
......@@ -289,10 +290,10 @@ struct MultiMatchAnyImpl
#if USE_HYPERSCAN
const auto & hyperscan_regex = MultiRegexps::get<FindAnyIndex, MultiSearchDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_alloc_scratch(hyperscan_regex->get(), &scratch);
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
if (err != HS_SUCCESS)
throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
MultiRegexps::ScratchPtr smart_scratch(scratch);
......@@ -316,14 +317,16 @@ struct MultiMatchAnyImpl
if (length > std::numeric_limits<UInt32>::max())
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
res[i] = 0;
hs_scan(
hyperscan_regex->get(),
err = hs_scan(
hyperscan_regex->getDB(),
reinterpret_cast<const char *>(haystack_data.data()) + offset,
length,
0,
smart_scratch.get(),
on_match,
&res[i]);
if (err != HS_SUCCESS)
throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
offset = haystack_offsets[i];
}
#else
......
......@@ -84,8 +84,20 @@ namespace MultiRegexps
};
using CompilerError = std::unique_ptr<hs_compile_error_t, HyperscanDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
using ScratchPtr = std::unique_ptr<hs_scratch_t, DB::MultiRegexps::HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
using Regexps = std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>>;
using ScratchPtr = std::unique_ptr<hs_scratch_t, HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
using DataBasePtr = std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>>;
class Regexps
{
public:
Regexps(hs_database_t * db_, hs_scratch_t * scratch_) : db{db_}, scratch{scratch_} {}
hs_database_t * getDB() const { return db.get(); };
hs_scratch_t * getScratch() const { return scratch.get(); };
private:
DataBasePtr db;
ScratchPtr scratch;
};
using Pool = ObjectPoolMap<Regexps, std::pair<std::vector<String>, std::optional<UInt32>>>;
......@@ -183,7 +195,13 @@ namespace MultiRegexps
ProfileEvents::increment(ProfileEvents::RegexpCreated);
return new Regexps{db};
hs_scratch_t * scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
if (err != HS_SUCCESS)
throw Exception("Could not allocate scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
return new Regexps{db, scratch};
});
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册