diff --git a/dbms/include/DB/Functions/FunctionsStringSearch.h b/dbms/include/DB/Functions/FunctionsStringSearch.h index db777563f6a7be1d692b7bcd9c06abddceff5e76..8fa104efae243ed4003dc36eed5e6bc0e9e842a8 100644 --- a/dbms/include/DB/Functions/FunctionsStringSearch.h +++ b/dbms/include/DB/Functions/FunctionsStringSearch.h @@ -358,16 +358,6 @@ struct ExtractImpl prev_offset = cur_offset; } } - - static void constant(const std::string & data, const std::string & pattern, std::string & res) - { - std::vector vdata(data.begin(), data.end()); - ColumnArray::Offsets_t offsets(1, data.size()); - std::vector res_vdata; - ColumnArray::Offsets_t res_offsets; - vector(vdata, offsets, pattern, res_vdata, res_offsets); - res = std::string(res_vdata.begin(), res_vdata.end() - 1); - } }; @@ -487,8 +477,14 @@ public: } else if (const ColumnConstString * col = dynamic_cast(&*column)) { - std::string res; - Impl::constant(col->getData(), col_needle->getData(), res); + const std::string & data = col->getData(); + std::vector vdata(data.c_str(), data.c_str() + data.size() + 1); + ColumnArray::Offsets_t offsets(1, vdata.size()); + std::vector res_vdata; + ColumnArray::Offsets_t res_offsets; + Impl::vector(vdata, offsets, col_needle->getData(), res_vdata, res_offsets); + + std::string res = std::string(res_vdata.begin(), res_vdata.end() - 1); ColumnConstString * col_res = new ColumnConstString(col->size(), res); block.getByPosition(result).column = col_res; diff --git a/dbms/include/DB/Functions/FunctionsURL.h b/dbms/include/DB/Functions/FunctionsURL.h index 5e6fcad4251e3f335d0c356d729b95370dbee792..99ec73c96e3830acb9ba73aee8abf070a3fdfeee 100644 --- a/dbms/include/DB/Functions/FunctionsURL.h +++ b/dbms/include/DB/Functions/FunctionsURL.h @@ -4,6 +4,7 @@ #include #include #include +#include "FunctionsStringSearch.h" namespace DB @@ -256,6 +257,78 @@ struct ExtractWWW }; +struct ExtractURLParameterImpl +{ + static void vector(const std::vector & data, + const ColumnArray::Offsets_t & offsets, + std::string pattern, + std::vector & res_data, ColumnArray::Offsets_t & res_offsets) + { + res_data.reserve(data.size() / 5); + res_offsets.resize(offsets.size()); + + pattern += '='; + const char * param_str = pattern.c_str(); + size_t param_len = pattern.size(); + + std::string and_pattern = '&' + pattern; + const char * and_param_str = and_pattern.c_str(); + size_t and_param_len = and_pattern.size(); + + size_t prev_offset = 0; + size_t res_offset = 0; + + for (size_t i = 0; i < offsets.size(); ++i) + { + size_t cur_offset = offsets[i]; + + const char * pos = NULL; + + do + { + const char * str = reinterpret_cast(&data[prev_offset]); + + const char * begin = strchr(str, '?'); + if (begin == NULL) + break; + ++begin; + + if (!strncmp(begin, param_str, param_len)) + { + pos = begin + param_len; + break; + } + + pos = strstr(begin, and_param_str); + if (pos != NULL) + pos += and_param_len; + } while (false); + + if (pos != NULL) + { + const char * end = strpbrk(pos, "&#"); + if (end == NULL) + end = pos + strlen(pos); + + res_data.resize(res_offset + (end - pos) + 1); + memcpy(&res_data[res_offset], pos, end - pos); + res_offset += end - pos; + } + else + { + res_data.resize(res_offset + 1); + } + + res_data[res_offset] = 0; + ++res_offset; + res_offsets[i] = res_offset; + + prev_offset = cur_offset; + } + } +}; + + /** Выделить кусок строки, используя Extractor. */ template @@ -372,19 +445,22 @@ struct NameCutQueryString { static const char * get() { return "cutQueryStrin struct NameCutFragment { static const char * get() { return "cutFragment"; } }; struct NameCutQueryStringAndFragment { static const char * get() { return "cutQueryStringAndFragment"; } }; -typedef FunctionStringToString, NameProtocol> FunctionProtocol; +struct NameExtractURLParameter { static const char * get() { return "extractURLParameter"; } }; + +typedef FunctionStringToString, NameProtocol> FunctionProtocol; typedef FunctionStringToString >, NameDomain> FunctionDomain; typedef FunctionStringToString >, NameDomainWithoutWWW> FunctionDomainWithoutWWW; typedef FunctionStringToString, NameTopLevelDomain> FunctionTopLevelDomain; -typedef FunctionStringToString, NamePath> FunctionPath; +typedef FunctionStringToString, NamePath> FunctionPath; typedef FunctionStringToString >, NameQueryString> FunctionQueryString; typedef FunctionStringToString >, NameFragment> FunctionFragment; typedef FunctionStringToString >, NameQueryStringAndFragment> FunctionQueryStringAndFragment; typedef FunctionStringToString, NameCutWWW> FunctionCutWWW; typedef FunctionStringToString >, NameCutQueryString> FunctionCutQueryString; -typedef FunctionStringToString >, NameCutFragment> FunctionCutFragment; +typedef FunctionStringToString >, NameCutFragment> FunctionCutFragment; typedef FunctionStringToString >, NameCutQueryStringAndFragment> FunctionCutQueryStringAndFragment; +typedef FunctionsStringSearchToString FunctionExtractURLParameter; } diff --git a/dbms/src/Functions/FunctionFactory.cpp b/dbms/src/Functions/FunctionFactory.cpp index 355004da0717225b10ac25a2bd431f402c6b3ba8..daa33da9ddda63c4189ef26d5984b0e5a7e91021 100644 --- a/dbms/src/Functions/FunctionFactory.cpp +++ b/dbms/src/Functions/FunctionFactory.cpp @@ -150,6 +150,7 @@ FunctionPtr FunctionFactory::get( else if (name == "cutQueryString") return new FunctionCutQueryString; else if (name == "cutFragment") return new FunctionCutFragment; else if (name == "cutQueryStringAndFragment") return new FunctionCutQueryStringAndFragment; + else if (name == "extractURLParameter") return new FunctionExtractURLParameter; else if (name == "visibleWidth") return new FunctionVisibleWidth; else if (name == "toTypeName") return new FunctionToTypeName;