提交 ea734b6b 编写于 作者: A Alexey Milovidov

Removed compatibility layer with OLAPServer [#METR-2944].

上级 c6da35b7
......@@ -16,14 +16,10 @@ else()
endif()
add_library (dbms
src/Server/OLAPAttributesMetadata.h
src/Server/InterserverIOHTTPHandler.h
src/Server/OLAPHTTPHandler.h
src/Server/OLAPQueryConverter.h
src/Server/Server.h
src/Server/TCPHandler.h
src/Server/HTTPHandler.h
src/Server/OLAPQueryParser.h
src/Server/MetricsTransmitter.h
src/Server/UsersConfigReloader.h
src/Server/StatusFile.h
......
......@@ -67,7 +67,6 @@ public:
{
TCP = 1,
HTTP = 2,
OLAP_HTTP = 3,
};
enum class HTTPMethod
......
......@@ -3,9 +3,6 @@ add_executable(clickhouse-server
HTTPHandler.cpp
TCPHandler.cpp
InterserverIOHTTPHandler.cpp
OLAPHTTPHandler.cpp
OLAPQueryParser.cpp
OLAPQueryConverter.cpp
MetricsTransmitter.cpp
UsersConfigReloader.cpp
StatusFile.cpp
......
#pragma once
#include <math.h> // log2()
#include <openssl/md5.h>
#include <boost/algorithm/string.hpp>
#include <Poco/StringTokenizer.h>
#include <Poco/ByteOrder.h>
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/ReadHelpers.h>
#include <common/DateLUT.h>
#include <DB/Dictionaries/Embedded/RegionsHierarchy.h>
#include <DB/Dictionaries/Embedded/TechDataHierarchy.h>
/// Код в основном взят из из OLAP-server. Здесь нужен только для парсинга значений атрибутов.
namespace DB
{
namespace OLAP
{
using BinaryData = Int64;
/** Информация о типе атрибута */
struct IAttributeMetadata
{
/// получение значения из строки в запросе
virtual BinaryData parse(const std::string & s) const = 0;
virtual ~IAttributeMetadata() {}
};
/// атрибут - заглушка, всегда равен нулю, подходит для подстановки в агрегатную функцию count
struct DummyAttribute : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const { return 0; }
};
/// базовый класс для атрибутов, которые являются просто UInt8, UInt16, UInt32 или UInt64 (таких тоже много)
struct AttributeUIntBase : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return static_cast<BinaryData>(DB::parse<UInt64>(s));
}
};
/// базовый класс для атрибутов, которые являются Int8, Int16, Int32 или Int64
struct AttributeIntBase : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<Int64>(s);
}
};
/** Базовые классы для атрибутов, получаемых из времени (unix timestamp, 4 байта) */
struct AttributeDateTimeBase : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
struct tm tm;
memset(&tm, 0, sizeof(tm));
sscanf(s.c_str(), "%04d-%02d-%02d %02d:%02d:%02d",
&tm.tm_year, &tm.tm_mon, &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec);
tm.tm_mon--;
tm.tm_year -= 1900;
tm.tm_isdst = -1;
time_t res = mktime(&tm);
return res >= 0 ? res : 0;
}
};
struct AttributeDateBase : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
struct tm tm;
memset(&tm, 0, sizeof(tm));
sscanf(s.c_str(), "%04d-%02d-%02d",
&tm.tm_year, &tm.tm_mon, &tm.tm_mday);
tm.tm_mon--;
tm.tm_year -= 1900;
tm.tm_isdst = -1;
time_t res = mktime(&tm);
return res >= 0 ? res : 0;
}
};
struct AttributeTimeBase : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
struct tm tm;
memset(&tm, 0, sizeof(tm));
sscanf(s.c_str(), "%02d:%02d:%02d",
&tm.tm_hour, &tm.tm_min, &tm.tm_sec);
time_t res = mktime(&tm);
return res >= 0 ? res : 0;
}
};
using AttributeYearBase = AttributeUIntBase;
using AttributeMonthBase = AttributeUIntBase;
using AttributeDayOfWeekBase = AttributeUIntBase;
using AttributeDayOfMonthBase = AttributeUIntBase;
using AttributeWeekBase = AttributeDateBase;
using AttributeHourBase = AttributeUIntBase;
using AttributeMinuteBase = AttributeUIntBase;
using AttributeSecondBase = AttributeUIntBase;
struct AttributeShortStringBase : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
std::string tmp = s;
tmp.resize(sizeof(BinaryData));
return *reinterpret_cast<const BinaryData *>(tmp.data());
}
};
/** Атрибуты, относящиеся к времени начала визита */
using VisitStartDateTime = AttributeDateTimeBase;
using VisitStartDateTimeRoundedToMinute = AttributeDateTimeBase;
using VisitStartDateTimeRoundedToHour = AttributeDateTimeBase;
using VisitStartDateTime = AttributeDateTimeBase;
using VisitStartDate = AttributeDateBase;
using VisitStartDateRoundedToMonth = AttributeDateBase;
using VisitStartWeek = AttributeWeekBase;
using VisitStartTime = AttributeTimeBase;
using VisitStartTimeRoundedToMinute = AttributeTimeBase;
using VisitStartYear = AttributeYearBase;
using VisitStartMonth = AttributeMonthBase;
using VisitStartDayOfWeek = AttributeDayOfWeekBase;
using VisitStartDayOfMonth = AttributeDayOfMonthBase;
using VisitStartHour = AttributeHourBase;
using VisitStartMinute = AttributeMinuteBase;
using VisitStartSecond = AttributeSecondBase;
/** Атрибуты, относящиеся к времени начала первого визита */
using FirstVisitDateTime = AttributeDateTimeBase;
using FirstVisitDate = AttributeDateBase;
using FirstVisitWeek = AttributeWeekBase;
using FirstVisitTime = AttributeTimeBase;
using FirstVisitYear = AttributeYearBase;
using FirstVisitMonth = AttributeMonthBase;
using FirstVisitDayOfWeek = AttributeDayOfWeekBase;
using FirstVisitDayOfMonth = AttributeDayOfMonthBase;
using FirstVisitHour = AttributeHourBase;
using FirstVisitMinute = AttributeMinuteBase;
using FirstVisitSecond = AttributeSecondBase;
/** Атрибуты, относящиеся к времени начала предпоследнего визита */
using PredLastVisitDate = AttributeDateBase;
using PredLastVisitWeek = AttributeWeekBase;
using PredLastVisitYear = AttributeYearBase;
using PredLastVisitMonth = AttributeMonthBase;
using PredLastVisitDayOfWeek = AttributeDayOfWeekBase;
using PredLastVisitDayOfMonth = AttributeDayOfMonthBase;
/** Атрибуты, относящиеся к времени на компьютере посетителя */
using ClientDateTime = AttributeDateTimeBase;
using ClientTime = AttributeTimeBase;
using ClientTimeHour = AttributeHourBase;
using ClientTimeMinute = AttributeMinuteBase;
using ClientTimeSecond = AttributeSecondBase;
/** Базовый класс для атрибутов, для которых хранится хэш. */
struct AttributeHashBase : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
union
{
unsigned char char_data[16];
Poco::UInt64 uint64_data;
} buf;
MD5_CTX ctx;
MD5_Init(&ctx);
MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(s.data()), s.size());
MD5_Final(buf.char_data, &ctx);
return Poco::ByteOrder::flipBytes(buf.uint64_data);
}
};
using EndURLHash = AttributeHashBase;
using RefererHash = AttributeHashBase;
using SearchPhraseHash = AttributeHashBase;
using RefererDomainHash = AttributeHashBase;
using StartURLHash = AttributeHashBase;
using StartURLDomainHash = AttributeHashBase;
using RegionID = AttributeUIntBase;
using RegionCity = AttributeUIntBase;
using RegionArea = AttributeUIntBase;
using RegionCountry = AttributeUIntBase;
using TraficSourceID = AttributeIntBase;
using CorrectedTraficSourceID = AttributeIntBase;
using CorrectedSearchEngineID = AttributeUIntBase;
using IsNewUser = AttributeUIntBase;
using UserNewness = AttributeUIntBase;
struct UserNewnessInterval : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
using UserReturnTime = AttributeUIntBase;
struct UserReturnTimeInterval : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
using UserVisitsPeriod = AttributeUIntBase;
struct UserVisitsPeriodInterval : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
using VisitTime = AttributeUIntBase;
using VisitTimeInterval = AttributeUIntBase;
using PageViews = AttributeUIntBase;
using PageViewsInterval = AttributeUIntBase;
using Bounce = AttributeUIntBase;
using BouncePrecise = AttributeUIntBase;
using IsYandex = AttributeUIntBase;
using UserID = AttributeUIntBase;
using UserIDCreateDateTime = AttributeDateTimeBase;
using UserIDCreateDate = AttributeDateBase;
using UserIDAge = AttributeIntBase;
using UserIDAgeInterval = AttributeIntBase;
using TotalVisits = AttributeUIntBase;
using TotalVisitsInterval = AttributeUIntBase;
using Age = AttributeUIntBase;
using AgeInterval = AttributeUIntBase;
using Sex = AttributeUIntBase;
using Income = AttributeUIntBase;
using AdvEngineID = AttributeUIntBase;
struct DotNet : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, ".");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 8)
: ((DB::parse<UInt64>(tokenizer[0]) << 8) + DB::parse<UInt64>(tokenizer[1])));
}
};
struct DotNetMajor : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
struct Flash : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, ".");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 8)
: ((DB::parse<UInt64>(tokenizer[0]) << 8) + DB::parse<UInt64>(tokenizer[1])));
}
};
struct FlashExists : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
struct FlashMajor : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
struct Silverlight : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, ".");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1
? (DB::parse<UInt64>(tokenizer[0]) << 56)
: (tokenizer.count() == 2
? ((DB::parse<UInt64>(tokenizer[0]) << 56)
| (DB::parse<UInt64>(tokenizer[1]) << 48))
: (tokenizer.count() == 3
? ((DB::parse<UInt64>(tokenizer[0]) << 56)
| (DB::parse<UInt64>(tokenizer[1]) << 48)
| (DB::parse<UInt64>(tokenizer[2]) << 16))
: ((DB::parse<UInt64>(tokenizer[0]) << 56)
| (DB::parse<UInt64>(tokenizer[1]) << 48)
| (DB::parse<UInt64>(tokenizer[2]) << 16)
| DB::parse<UInt64>(tokenizer[3])))));
}
};
using SilverlightMajor = AttributeUIntBase;
using Hits = AttributeUIntBase;
using HitsInterval = AttributeUIntBase;
using JavaEnable = AttributeUIntBase;
using CookieEnable = AttributeUIntBase;
using JavascriptEnable = AttributeUIntBase;
using IsMobile = AttributeUIntBase;
using MobilePhoneID = AttributeUIntBase;
using MobilePhoneModelHash = AttributeHashBase;
using MobilePhoneModel = AttributeShortStringBase;
struct BrowserLanguage : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
std::string tmp = s;
tmp.resize(sizeof(UInt16));
return *reinterpret_cast<const UInt16 *>(tmp.data());
}
};
using BrowserCountry = BrowserLanguage;
using TopLevelDomain = AttributeShortStringBase;
using URLScheme = AttributeShortStringBase;
using IPNetworkID = AttributeUIntBase;
using ClientTimeZone = AttributeIntBase;
using OSID = AttributeUIntBase;
using OSMostAncestor = AttributeUIntBase;
struct ClientIP : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, ".");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 24)
: (tokenizer.count() == 2 ? (DB::parse<UInt64>(tokenizer[0]) << 24)
+ (DB::parse<UInt64>(tokenizer[1]) << 16)
: (tokenizer.count() == 3 ? (DB::parse<UInt64>(tokenizer[0]) << 24)
+ (DB::parse<UInt64>(tokenizer[1]) << 16)
+ (DB::parse<UInt64>(tokenizer[2]) << 8)
: ((DB::parse<UInt64>(tokenizer[0]) << 24)
+ (DB::parse<UInt64>(tokenizer[1]) << 16)
+ (DB::parse<UInt64>(tokenizer[2]) << 8)
+ DB::parse<UInt64>(tokenizer[3])))));
}
};
struct Resolution : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, "x");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 24)
: (tokenizer.count() == 2 ? (DB::parse<UInt64>(tokenizer[0]) << 24)
+ (DB::parse<UInt64>(tokenizer[1]) << 8)
: ((DB::parse<UInt64>(tokenizer[0]) << 24)
+ (DB::parse<UInt64>(tokenizer[1]) << 8)
+ DB::parse<UInt64>(tokenizer[2]))));
}
};
struct ResolutionWidthHeight : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, "x");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 16)
: ((DB::parse<UInt64>(tokenizer[0]) << 16)
+ DB::parse<UInt64>(tokenizer[1])));
}
};
struct ResolutionWidth : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
struct ResolutionHeight : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
using ResolutionWidthInterval = ResolutionWidth;
using ResolutionHeightInterval = ResolutionHeight;
struct ResolutionColor : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
struct WindowClientArea : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, "x");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 16)
: ((DB::parse<UInt64>(tokenizer[0]) << 16)
+ DB::parse<UInt64>(tokenizer[1])));
}
};
using WindowClientAreaInterval = WindowClientArea;
using WindowClientWidth = AttributeUIntBase;
using WindowClientWidthInterval = WindowClientWidth;
using WindowClientHeight = AttributeUIntBase;
using WindowClientHeightInterval = WindowClientHeight;
using SearchEngineID = AttributeUIntBase;
using SearchEngineMostAncestor = AttributeUIntBase;
using CodeVersion = AttributeUIntBase;
/// формат строки вида "10 7.5b", где первое число - UserAgentID, дальше - версия.
struct UserAgent : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, " .");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 24)
: (tokenizer.count() == 2 ? (DB::parse<UInt64>(tokenizer[0]) << 24)
+ (DB::parse<UInt64>(tokenizer[1]) << 16)
: ((DB::parse<UInt64>(tokenizer[0]) << 24)
+ (DB::parse<UInt64>(tokenizer[1]) << 16)
+ (static_cast<UInt32>(tokenizer[2][1]) << 8)
+ (tokenizer[2][0]))));
}
};
struct UserAgentVersion : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, ".");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 16)
: ((DB::parse<UInt64>(tokenizer[0]) << 16)
+ (static_cast<UInt32>(tokenizer[1][1]) << 8)
+ tokenizer[1][0]));
}
};
struct UserAgentMajor : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
Poco::StringTokenizer tokenizer(s, " ");
return tokenizer.count() == 0 ? 0
: (tokenizer.count() == 1 ? (DB::parse<UInt64>(tokenizer[0]) << 8)
: ((DB::parse<UInt64>(tokenizer[0]) << 8)
+ DB::parse<UInt64>(tokenizer[1])));
}
};
struct UserAgentID : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
return DB::parse<UInt64>(s);
}
};
using ClickGoodEvent = AttributeIntBase;
using ClickPriorityID = AttributeIntBase;
using ClickBannerID = AttributeIntBase;
using ClickPageID = AttributeIntBase;
using ClickPlaceID = AttributeIntBase;
using ClickTypeID = AttributeIntBase;
using ClickResourceID = AttributeIntBase;
using ClickDomainID = AttributeUIntBase;
using ClickCost = AttributeUIntBase;
using ClickURLHash = AttributeHashBase;
using ClickOrderID = AttributeUIntBase;
using GoalReachesAny = AttributeIntBase;
using GoalReachesDepth = AttributeIntBase;
using GoalReachesURL = AttributeIntBase;
using ConvertedAny = AttributeIntBase;
using ConvertedDepth = AttributeIntBase;
using ConvertedURL = AttributeIntBase;
using GoalReaches = AttributeIntBase;
using Converted = AttributeIntBase;
using CounterID = AttributeUIntBase;
using VisitID = AttributeUIntBase;
struct Interests : public IAttributeMetadata
{
BinaryData parse(const std::string & s) const
{
if(s.empty())
return 0;
using namespace boost::algorithm;
BinaryData value = 0;
///коряво
for(split_iterator<std::string::const_iterator> i
= make_split_iterator(s, token_finder(is_any_of(","),
token_compress_on)); i != split_iterator<std::string::const_iterator>(); ++i)
{
UInt16 interest = DB::parse<UInt64>(boost::copy_range<std::string>(*i));
value |= (interest == 0x2000 ? 0x2000 :
(interest == 0x1000 ? 0x1000 :
(interest == 0x800 ? 0x800 :
(interest == 0x400 ? 0x400 :
(interest == 0x200 ? 0x200 :
(interest == 0x100 ? 0x100 :
(interest == 0x80 ? 0x80 :
(interest == 0x40 ? 0x40 :
(interest == 0x20 ? 0x20 :
(interest == 0x10 ? 0x10 :
(interest == 8 ? 8 :
(interest == 4 ? 4 :
(interest == 2 ? 2 :
(interest == 1 ? 1 : 0))))))))))))));
}
return value;
}
};
using HasInterestPhoto = AttributeUIntBase;
using HasInterestMoviePremieres = AttributeUIntBase;
using HasInterestTourism = AttributeUIntBase;
using HasInterestFamilyAndChildren = AttributeUIntBase;
using HasInterestFinance = AttributeUIntBase;
using HasInterestB2B = AttributeUIntBase;
using HasInterestCars = AttributeUIntBase;
using HasInterestMobileAndInternetCommunications = AttributeUIntBase;
using HasInterestBuilding = AttributeUIntBase;
using HasInterestCulinary = AttributeUIntBase;
using HasInterestSoftware = AttributeUIntBase;
using HasInterestEstate = AttributeUIntBase;
using HasInterestHealthyLifestyle = AttributeUIntBase;
using HasInterestLiterature = AttributeUIntBase;
using OpenstatServiceNameHash = AttributeHashBase;
using OpenstatCampaignIDHash = AttributeHashBase;
using OpenstatAdIDHash = AttributeHashBase;
using OpenstatSourceIDHash = AttributeHashBase;
using UTMSourceHash = AttributeHashBase;
using UTMMediumHash = AttributeHashBase;
using UTMCampaignHash = AttributeHashBase;
using UTMContentHash = AttributeHashBase;
using UTMTermHash = AttributeHashBase;
using FromHash = AttributeHashBase;
using CLID = AttributeUIntBase;
using SocialSourceNetworkID = AttributeUIntBase;
/** Информация о типах атрибутов */
using AttributeMetadatas = std::map<std::string, Poco::SharedPtr<IAttributeMetadata>>;
inline AttributeMetadatas GetOLAPAttributeMetadata()
{
return
{
{"DummyAttribute", new DummyAttribute},
{"VisitStartDateTime", new VisitStartDateTime},
{"VisitStartDateTimeRoundedToMinute", new VisitStartDateTimeRoundedToMinute},
{"VisitStartDateTimeRoundedToHour", new VisitStartDateTimeRoundedToHour},
{"VisitStartDate", new VisitStartDate},
{"VisitStartDateRoundedToMonth", new VisitStartDateRoundedToMonth},
{"VisitStartTime", new VisitStartTime},
{"VisitStartTimeRoundedToMinute", new VisitStartTimeRoundedToMinute},
{"VisitStartYear", new VisitStartYear},
{"VisitStartMonth", new VisitStartMonth},
{"VisitStartDayOfWeek", new VisitStartDayOfWeek},
{"VisitStartDayOfMonth", new VisitStartDayOfMonth},
{"VisitStartHour", new VisitStartHour},
{"VisitStartMinute", new VisitStartMinute},
{"VisitStartSecond", new VisitStartSecond},
{"VisitStartWeek", new VisitStartWeek},
{"FirstVisitDateTime", new FirstVisitDateTime},
{"FirstVisitDate", new FirstVisitDate},
{"FirstVisitTime", new FirstVisitTime},
{"FirstVisitYear", new FirstVisitYear},
{"FirstVisitMonth", new FirstVisitMonth},
{"FirstVisitDayOfWeek", new FirstVisitDayOfWeek},
{"FirstVisitDayOfMonth", new FirstVisitDayOfMonth},
{"FirstVisitHour", new FirstVisitHour},
{"FirstVisitMinute", new FirstVisitMinute},
{"FirstVisitSecond", new FirstVisitSecond},
{"FirstVisitWeek", new FirstVisitWeek},
{"PredLastVisitDate", new PredLastVisitDate},
{"PredLastVisitYear", new PredLastVisitYear},
{"PredLastVisitMonth", new PredLastVisitMonth},
{"PredLastVisitDayOfWeek", new PredLastVisitDayOfWeek},
{"PredLastVisitDayOfMonth", new PredLastVisitDayOfMonth},
{"PredLastVisitWeek", new PredLastVisitWeek},
{"RegionID", new RegionID},
{"RegionCity", new RegionCity},
{"RegionArea", new RegionArea},
{"RegionCountry", new RegionCountry},
{"TraficSourceID", new TraficSourceID},
{"UserNewness", new UserNewness},
{"UserNewnessInterval", new UserNewnessInterval},
{"UserReturnTime", new UserReturnTime},
{"UserReturnTimeInterval", new UserReturnTimeInterval},
{"UserVisitsPeriod", new UserVisitsPeriod},
{"UserVisitsPeriodInterval",new UserVisitsPeriodInterval},
{"VisitTime", new VisitTime},
{"VisitTimeInterval", new VisitTimeInterval},
{"PageViews", new PageViews},
{"PageViewsInterval", new PageViewsInterval},
{"UserID", new UserID},
{"TotalVisits", new TotalVisits},
{"TotalVisitsInterval", new TotalVisitsInterval},
{"Age", new Age},
{"AgeInterval", new AgeInterval},
{"Sex", new Sex},
{"Income", new Income},
{"AdvEngineID", new AdvEngineID},
{"DotNet", new DotNet},
{"DotNetMajor", new DotNetMajor},
{"EndURLHash", new EndURLHash},
{"Flash", new Flash},
{"FlashMajor", new FlashMajor},
{"FlashExists", new FlashExists},
{"Hits", new Hits},
{"HitsInterval", new HitsInterval},
{"JavaEnable", new JavaEnable},
{"OSID", new OSID},
{"ClientIP", new ClientIP},
{"RefererHash", new RefererHash},
{"RefererDomainHash", new RefererDomainHash},
{"Resolution", new Resolution},
{"ResolutionWidthHeight", new ResolutionWidthHeight},
{"ResolutionWidth", new ResolutionWidth},
{"ResolutionHeight", new ResolutionHeight},
{"ResolutionWidthInterval", new ResolutionWidthInterval},
{"ResolutionHeightInterval",new ResolutionHeightInterval},
{"ResolutionColor", new ResolutionColor},
{"CookieEnable", new CookieEnable},
{"JavascriptEnable", new JavascriptEnable},
{"IsMobile", new IsMobile},
{"MobilePhoneID", new MobilePhoneID},
{"MobilePhoneModel", new MobilePhoneModel},
{"MobilePhoneModelHash", new MobilePhoneModelHash},
{"IPNetworkID", new IPNetworkID},
{"WindowClientArea", new WindowClientArea},
{"WindowClientWidth", new WindowClientWidth},
{"WindowClientHeight", new WindowClientHeight},
{"WindowClientAreaInterval",new WindowClientAreaInterval},
{"WindowClientWidthInterval",new WindowClientWidthInterval},
{"WindowClientHeightInterval",new WindowClientHeightInterval},
{"ClientTimeZone", new ClientTimeZone},
{"ClientDateTime", new ClientDateTime},
{"ClientTime", new ClientTime},
{"ClientTimeHour", new ClientTimeHour},
{"ClientTimeMinute", new ClientTimeMinute},
{"ClientTimeSecond", new ClientTimeSecond},
{"Silverlight", new Silverlight},
{"SilverlightMajor", new SilverlightMajor},
{"SearchEngineID", new SearchEngineID},
{"SearchPhraseHash", new SearchPhraseHash},
{"StartURLHash", new StartURLHash},
{"StartURLDomainHash", new StartURLDomainHash},
{"UserAgent", new UserAgent},
{"UserAgentVersion", new UserAgentVersion},
{"UserAgentMajor", new UserAgentMajor},
{"UserAgentID", new UserAgentID},
{"ClickGoodEvent", new ClickGoodEvent},
{"ClickPriorityID", new ClickPriorityID},
{"ClickBannerID", new ClickBannerID},
{"ClickPageID", new ClickPageID},
{"ClickPlaceID", new ClickPlaceID},
{"ClickTypeID", new ClickTypeID},
{"ClickResourceID", new ClickResourceID},
{"ClickDomainID", new ClickDomainID},
{"ClickCost", new ClickCost},
{"ClickURLHash", new ClickURLHash},
{"ClickOrderID", new ClickOrderID},
{"GoalReaches", new GoalReaches},
{"GoalReachesAny", new GoalReachesAny},
{"GoalReachesDepth", new GoalReachesDepth},
{"GoalReachesURL", new GoalReachesURL},
{"Converted", new Converted},
{"ConvertedAny", new ConvertedAny},
{"ConvertedDepth", new ConvertedDepth},
{"ConvertedURL", new ConvertedURL},
{"Bounce", new Bounce},
{"BouncePrecise", new BouncePrecise},
{"IsNewUser", new IsNewUser},
{"CodeVersion", new CodeVersion},
{"CounterID", new CounterID},
{"VisitID", new VisitID},
{"IsYandex", new IsYandex},
{"TopLevelDomain", new TopLevelDomain},
{"URLScheme", new URLScheme},
{"UserIDCreateDateTime", new UserIDCreateDateTime},
{"UserIDCreateDate", new UserIDCreateDate},
{"UserIDAge", new UserIDAge},
{"UserIDAgeInterval", new UserIDAgeInterval},
{"OSMostAncestor", new OSMostAncestor},
{"SearchEngineMostAncestor",new SearchEngineMostAncestor},
{"BrowserLanguage", new BrowserLanguage},
{"BrowserCountry", new BrowserCountry},
{"Interests", new Interests},
{"HasInterestPhoto", new HasInterestPhoto},
{"HasInterestMoviePremieres", new HasInterestMoviePremieres},
{"HasInterestMobileAndInternetCommunications", new HasInterestMobileAndInternetCommunications},
{"HasInterestFinance", new HasInterestFinance},
{"HasInterestFamilyAndChildren", new HasInterestFamilyAndChildren},
{"HasInterestCars", new HasInterestCars},
{"HasInterestB2B", new HasInterestB2B},
{"HasInterestTourism", new HasInterestTourism},
{"HasInterestBuilding", new HasInterestBuilding},
{"HasInterestCulinary", new HasInterestCulinary},
{"HasInterestSoftware", new HasInterestSoftware},
{"HasInterestEstate", new HasInterestEstate},
{"HasInterestHealthyLifestyle", new HasInterestHealthyLifestyle},
{"HasInterestLiterature", new HasInterestLiterature},
{"OpenstatServiceNameHash",new OpenstatServiceNameHash},
{"OpenstatCampaignIDHash", new OpenstatCampaignIDHash},
{"OpenstatAdIDHash", new OpenstatAdIDHash},
{"OpenstatSourceIDHash", new OpenstatSourceIDHash},
{"UTMSourceHash", new UTMSourceHash},
{"UTMMediumHash", new UTMMediumHash},
{"UTMCampaignHash", new UTMCampaignHash},
{"UTMContentHash", new UTMContentHash},
{"UTMTermHash", new UTMTermHash},
{"FromHash", new FromHash},
{"CLID", new CLID},
{"SocialSourceNetworkID", new SocialSourceNetworkID},
{"CorrectedTraficSourceID", new CorrectedTraficSourceID},
{"CorrectedSearchEngineID", new CorrectedSearchEngineID},
};
}
}
}
#include <Poco/Net/HTTPBasicCredentials.h>
#include <DB/Interpreters/executeQuery.h>
#include <DB/Interpreters/Quota.h>
#include <DB/IO/WriteBufferFromHTTPServerResponse.h>
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/ReadBufferFromIStream.h>
#include <DB/IO/ReadBufferFromString.h>
#include "OLAPQueryParser.h"
#include "OLAPQueryConverter.h"
#include "OLAPHTTPHandler.h"
#include <DB/Common/Stopwatch.h>
#include <iomanip>
namespace DB
{
namespace ErrorCodes
{
extern const int POCO_EXCEPTION;
extern const int STD_EXCEPTION;
extern const int UNKNOWN_EXCEPTION;
}
void OLAPHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
{
HTMLForm params(request);
std::ostringstream request_ostream;
request_ostream << request.stream().rdbuf();
std::string request_string = request_ostream.str();
LOG_TRACE(log, "Request URI: " << request.getURI());
LOG_TRACE(log, "Request body: " << request_string);
std::istringstream request_istream(request_string);
BlockInputStreamPtr query_plan;
/// Имя пользователя и пароль могут быть заданы как в параметрах URL, так и с помощью HTTP Basic authentification (и то, и другое не секъюрно).
std::string user = params.get("user", "default");
std::string password = params.get("password", "");
std::string quota_key = params.get("quota_key", "");
if (request.hasCredentials())
{
Poco::Net::HTTPBasicCredentials credentials(request);
user = credentials.getUsername();
password = credentials.getPassword();
}
Context context = *server.global_context;
context.setGlobalContext(*server.global_context);
context.setSetting("profile", profile);
context.setUser(user, password, request.clientAddress().host(), quota_key);
context.setInterface(Context::Interface::OLAP_HTTP);
context.setHTTPMethod(Context::HTTPMethod::POST);
OLAP::QueryParseResult olap_query = server.olap_parser->parse(request_istream);
std::string clickhouse_query;
server.olap_converter->OLAPServerQueryToClickHouse(olap_query, context, clickhouse_query);
LOG_TRACE(log, "Converted query: " << clickhouse_query);
ReadBufferFromString in(clickhouse_query);
WriteBufferFromHTTPServerResponse out(response);
Stopwatch watch;
executeQuery(in, out, context, query_plan, [&response] (const String & content_type) { response.setContentType(content_type); });
watch.stop();
/// Если не было эксепшена и данные ещё не отправлены - отправляются HTTP заголовки с кодом 200.
out.finalize();
}
void OLAPHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
{
/// Для того, чтобы работал keep-alive.
if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1)
response.setChunkedTransferEncoding(true);
try
{
processQuery(request, response);
LOG_INFO(log, "Done processing query");
}
catch (Exception & e)
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
std::stringstream s;
s << "Code: " << e.code()
<< ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what();
if (!response.sent())
response.send() << s.str() << std::endl;
LOG_ERROR(log, s.str());
}
catch (Poco::Exception & e)
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
std::stringstream s;
s << "Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
<< ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what();
if (!response.sent())
response.send() << s.str() << std::endl;
LOG_ERROR(log, s.str());
}
catch (std::exception & e)
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
std::stringstream s;
s << "Code: " << ErrorCodes::STD_EXCEPTION << ". " << e.what();
if (!response.sent())
response.send() << s.str() << std::endl;
LOG_ERROR(log, s.str());
}
catch (...)
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
std::stringstream s;
s << "Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ". Unknown exception.";
if (!response.sent())
response.send() << s.str() << std::endl;
LOG_ERROR(log, s.str());
}
}
}
#pragma once
#include "Server.h"
#include <DB/Common/CurrentMetrics.h>
namespace DB
{
/// Обработчик http-запросов в формате OLAP-server.
class OLAPHTTPHandler : public Poco::Net::HTTPRequestHandler
{
public:
OLAPHTTPHandler(Server & server_)
: server(server_),
log(&Logger::get("OLAPHTTPHandler")),
profile(Poco::Util::Application::instance().config().getString("olap_compatibility.profile"))
{
}
void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response);
private:
Server & server;
Logger * log;
const String profile;
CurrentMetrics::Increment metric_increment{CurrentMetrics::HTTPConnection};
void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response);
};
}
#include "OLAPQueryConverter.h"
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/WriteBufferFromString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNSUPPORTED_PARAMETER;
extern const int UNKNOWN_IDENTIFIER;
extern const int UNKNOWN_RELATION;
}
namespace OLAP
{
QueryConverter::QueryConverter(Poco::Util::AbstractConfiguration & config)
: QueryConverter(
config.getString("olap_compatibility.table_for_single_counter"),
config.getString("olap_compatibility.table_for_all_counters"))
{
}
QueryConverter::QueryConverter(const String & table_for_single_counter, const String & table_for_all_counters)
: table_for_single_counter(table_for_single_counter), table_for_all_counters(table_for_all_counters)
{
fillFormattedAttributeMap();
fillNumericAttributeMap();
fillFormattingAggregatedAttributeMap();
attribute_metadatas = GetOLAPAttributeMetadata();
}
static std::string firstWord(std::string s)
{
for (size_t i = 0; i < s.length(); ++i)
{
if ((s[i] < 'a' || s[i] > 'z') && (s[i] < 'A' || s[i] > 'Z'))
{
s.erase(s.begin() + i, s.end());
return s;
}
}
return s;
}
void QueryConverter::OLAPServerQueryToClickHouse(const QueryParseResult & query, Context & inout_context, std::string & out_query) const
{
/// Пустая строка, или строка вида ", 'ua'".
std::string regions_point_of_view_formatted;
if (!query.regions_point_of_view.empty())
{
std::stringstream tmp;
tmp << ", " << mysqlxx::quote << query.regions_point_of_view;
regions_point_of_view_formatted = tmp.str();
}
/// Проверим, умеем ли мы выполнять такой запрос.
if (query.format != FORMAT_TAB)
throw Exception("Only tab-separated output format is supported", ErrorCodes::UNSUPPORTED_PARAMETER);
/// Учтем некоторые настройки (далеко не все).
Settings new_settings = inout_context.getSettings();
if (query.concurrency != 0)
new_settings.max_threads = query.concurrency;
if (query.max_execution_time != 0)
new_settings.limits.max_execution_time = Poco::Timespan(query.max_execution_time, 0);
if (query.max_result_size != 0)
new_settings.limits.max_rows_to_group_by = query.max_result_size;
if (query.has_overflow_mode)
{
switch (query.overflow_mode)
{
case OLAP::OVERFLOW_MODE_THROW:
new_settings.limits.group_by_overflow_mode = DB::OverflowMode::THROW;
break;
case OLAP::OVERFLOW_MODE_BREAK:
new_settings.limits.group_by_overflow_mode = DB::OverflowMode::BREAK;
break;
case OLAP::OVERFLOW_MODE_ANY:
new_settings.limits.group_by_overflow_mode = DB::OverflowMode::ANY;
break;
}
}
inout_context.setSettings(new_settings);
/// Составим запрос.
out_query = "SELECT ";
std::vector<std::string> selected_expressions;
/// Что выбирать: ключи агрегации и агрегированные значения.
for (size_t i = 0; i < query.key_attributes.size(); ++i)
{
const QueryParseResult::KeyAttribute & key = query.key_attributes[i];
std::string s = convertAttributeFormatted(key.attribute, key.parameter, regions_point_of_view_formatted);
if (i > 0)
out_query += ", ";
out_query += s + " AS _" + firstWord(key.attribute) + (key.parameter ? "_" + toString(key.parameter) : "");
selected_expressions.push_back(s);
}
for (size_t i = 0; i < query.aggregates.size(); ++i)
{
const QueryParseResult::Aggregate & aggregate = query.aggregates[i];
std::string s = convertAggregateFunction(aggregate.attribute, aggregate.parameter, aggregate.function, query, regions_point_of_view_formatted);
if (query.key_attributes.size() + i > 0)
out_query += ", ";
out_query += s + " AS _" + firstWord(aggregate.function) + "_" + firstWord(aggregate.attribute) + (aggregate.parameter ? "_" + toString(aggregate.parameter) : "");
selected_expressions.push_back(s);
}
/// Из какой таблицы.
out_query += " FROM " + getTableName(query.CounterID, query.local);
/// Добавляем сэмплирование.
if (query.sample != 1)
out_query += " SAMPLE " + toString(query.sample);
/// Условия.
out_query += " WHERE ";
/// Диапазон дат.
out_query += convertDateRange(query.date_first, query.date_last);
/// Счетчик.
if (query.CounterID != 0)
out_query += " AND " + convertCounterID(query.CounterID);
/// Произвольные условия.
for (size_t i = 0; i < query.where_conditions.size(); ++i)
{
const QueryParseResult::WhereCondition & condition = query.where_conditions[i];
out_query += " AND " + convertCondition(
condition.attribute, condition.parameter, condition.relation, condition.rhs, regions_point_of_view_formatted);
}
/// Группировка.
if (!query.key_attributes.empty())
{
out_query += " GROUP BY ";
for (size_t i = 0; i < query.key_attributes.size(); ++i)
{
if (i > 0)
out_query += ", ";
out_query += selected_expressions[i];
}
}
/// Условие для групп.
out_query += " " + getHavingSection();
/// Сортировка.
if (!query.sort_columns.empty())
{
out_query += " ORDER BY ";
for (size_t i = 0; i < query.sort_columns.size(); ++i)
{
const QueryParseResult::SortColumn & column = query.sort_columns[i];
if (i > 0)
out_query += ", ";
out_query += selected_expressions[column.index - 1];
out_query += " " + convertSortDirection(column.direction);
}
}
/// Ограничение на количество выводимых строк.
if (query.limit != 0)
out_query += " LIMIT " + toString(query.limit);
}
std::string QueryConverter::convertAttributeFormatted(const std::string & attribute, unsigned parameter,
const std::string & regions_point_of_view_formatted) const
{
if (formatted_attribute_map.count(attribute))
return Poco::format(formatted_attribute_map.at(attribute), parameter);
/** Для атрибутов по регионам, выражение содержит подстановку %s,
* куда должна быть подставлена regions_point_of_view_formatted.
*/
if (regions_attributes_set.count(attribute))
return Poco::format(numeric_attribute_map.at(attribute), regions_point_of_view_formatted);
if (numeric_attribute_map.count(attribute))
{
std::string numeric = Poco::format(numeric_attribute_map.at(attribute), parameter);
if (formatting_aggregated_attribute_map.count(attribute))
return Poco::format(formatting_aggregated_attribute_map.at(attribute), std::string("(") + numeric + ")");
else
return numeric;
}
throw Exception("Unknown attribute: " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER);
}
std::string QueryConverter::convertAttributeNumeric(const std::string & attribute, unsigned parameter,
const std::string & regions_point_of_view_formatted) const
{
/** Для атрибутов по регионам, выражение содержит подстановку %s,
* куда должна быть подставлена regions_point_of_view_formatted.
*/
if (regions_attributes_set.count(attribute))
return Poco::format(numeric_attribute_map.at(attribute), regions_point_of_view_formatted);
if (numeric_attribute_map.count(attribute))
return Poco::format(numeric_attribute_map.at(attribute), parameter);
throw Exception("Unknown attribute: " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER);
}
static bool StartsWith(const std::string & str, const std::string & prefix)
{
return str.length() >= prefix.length() && str.substr(0, prefix.length()) == prefix;
}
std::string QueryConverter::convertAggregateFunction(const std::string & attribute, unsigned parameter, const std::string & name,
const QueryParseResult & query, const std::string & regions_point_of_view_formatted) const
{
bool float_value = false;
/// если включено сэмплирование, то надо умножить агрегатные функции на 1./sample
if (name == "count")
{
if (query.sample != 1)
{
float_value = true;
return "sum(Sign)*" + toString(1./query.sample);
}
else
return "sum(Sign)";
}
std::string numeric = convertAttributeNumeric(attribute, parameter, regions_point_of_view_formatted);
if (name == "uniq" ||
name == "uniq_sort" ||
name == "uniq_hash" ||
name == "uniq_approx" ||
name == "sequental_uniq" ||
StartsWith(name, "uniq_approx"))
return "uniq(" + numeric + ")";
if (name == "uniq_state")
return "uniqState(" + numeric + ")";
if (name == "uniq_hll12")
return "uniqHLL12(" + numeric + ")";
if (name == "uniq_hll12_state")
return "uniqHLL12State(" + numeric + ")";
if (name == "count_non_zero")
{
if (query.sample != 1)
{
float_value = true;
return "sum((" + numeric + ") == 0 ? toInt64(0) : toInt64(Sign)) * " + toString(1/query.sample);
}
else
return "sum((" + numeric + ") == 0 ? toInt64(0) : toInt64(Sign))";
}
if (name == "count_non_minus_one")
{
if (query.sample != 1)
{
float_value = true;
return "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(Sign)) * " + toString(1/query.sample);
}
else
return "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(Sign))";
}
bool trivial_format;
std::string format;
if (formatting_aggregated_attribute_map.count(attribute))
{
format = formatting_aggregated_attribute_map.at(attribute);
trivial_format = false;
}
else
{
format = "%s";
trivial_format = true;
}
std::string s;
if (name == "sum")
{
if (query.sample != 1)
{
s = "sum((" + numeric + ") * Sign) * " + toString(1/query.sample);
float_value = true;
}
else
s = "sum((" + numeric + ") * Sign)";
}
if (name == "sum_non_minus_one")
{
if (query.sample != 1)
{
s = "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(" + numeric + ") * Sign) * " + toString(1/query.sample);
float_value = true;
}
else
s = "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(" + numeric + ") * Sign)";
}
if (name == "avg")
{
s = "sum((" + numeric + ") * Sign) / sum(Sign)";
float_value = true;
}
if (name == "avg_non_zero")
{
s = "sum((" + numeric + ") * Sign) / sum((" + numeric + ") == 0 ? toInt64(0) : toInt64(Sign))";
float_value = true;
}
if (name == "avg_non_minus_one")
{
s = "sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(" + numeric + ") * Sign) / sum((" + numeric + ") == -1 ? toInt64(0) : toInt64(Sign))";
float_value = true;
}
if (name == "min")
s = "min(" + numeric + ")";
if (name == "max")
s = "max(" + numeric + ")";
/// Если агрегатная функция возвращает дробное число, и атрибут имеет нетривиальное форматирование, после агрегации приведем дробное число к целому.
bool need_cast = !trivial_format && float_value;
return Poco::format(format, std::string() + (need_cast ? "toInt64" : "") + "(" + s + ")");
}
std::string QueryConverter::convertConstant(const std::string & attribute, const std::string & value) const
{
if (!attribute_metadatas.count(attribute))
throw Exception("Unknown attribute " + attribute, ErrorCodes::UNKNOWN_IDENTIFIER);
return toString(attribute_metadatas.at(attribute)->parse(value));
}
std::string QueryConverter::convertCondition(
const std::string & attribute,
unsigned parameter,
const std::string & name,
const std::string & rhs,
const std::string & regions_point_of_view_formatted) const
{
std::string value = convertAttributeNumeric(attribute, parameter, regions_point_of_view_formatted);
std::string constant = convertConstant(attribute, rhs);
if (name == "equals")
return "(" + value + ")" + " == " + constant;
if (name == "not_equals")
return "(" + value + ")" + " != " + constant;
if (name == "less")
return "(" + value + ")" + " < " + constant;
if (name == "greater")
return "(" + value + ")" + " > " + constant;
if (name == "less_or_equals")
return "(" + value + ")" + " <= " + constant;
if (name == "greater_or_equals")
return "(" + value + ")" + " >= " + constant;
if (name == "region_in")
return "regionIn(" + value + ", toUInt32(" + constant + ")" + regions_point_of_view_formatted + ")";
if (name == "region_not_in")
return "NOT regionIn(" + value + ", toUInt32(" + constant + ")" + regions_point_of_view_formatted + ")";
if (name == "os_in")
return "OSIn(" + value + ", " + constant + ")";
if (name == "os_not_in")
return "NOT OSIn(" + value + ", " + constant + ")";
if (name == "se_in")
return "SEIn(toUInt8(" + value + "), toUInt8(" + constant + "))";
if (name == "se_not_in")
return "NOT SEIn(toUInt8(" + value + "), toUInt8(" + constant + "))";
if (name == "interest_has_all_from")
return "bitwiseAnd(" + value + ", " + constant + ") == " + constant;
if (name == "interest_not_has_all_from")
return "bitwiseAnd(" + value + ", " + constant + ") != " + constant;
if (name == "interest_has_any_from")
return "bitwiseAnd(" + value + ", " + constant + ") != 0";
if (name == "interest_not_has_any_from")
return "bitwiseAnd(" + value + ", " + constant + ") == 0";
throw Exception("Unknown relation " + name, ErrorCodes::UNKNOWN_RELATION);
}
std::string QueryConverter::convertSortDirection(const std::string & direction) const
{
if (direction == "descending")
return "DESC";
else
return "ASC";
}
std::string QueryConverter::convertDateRange(time_t date_first, time_t date_last) const
{
std::string first_str;
std::string last_str;
{
WriteBufferFromString first_buf(first_str);
WriteBufferFromString last_buf(last_str);
writeDateText(DateLUT::instance().toDayNum(date_first), first_buf);
writeDateText(DateLUT::instance().toDayNum(date_last), last_buf);
}
return "StartDate >= toDate('" + first_str + "') AND StartDate <= toDate('" + last_str + "')";
}
std::string QueryConverter::convertCounterID(CounterID_t CounterID) const
{
return "CounterID == " + toString(CounterID);
}
std::string QueryConverter::getTableName(CounterID_t CounterID, bool local) const
{
if (CounterID == 0 && !local)
return table_for_all_counters;
else
return table_for_single_counter;
}
std::string QueryConverter::getHavingSection() const
{
return "HAVING sum(Sign) > 0";
}
void QueryConverter::fillNumericAttributeMap()
{
#define M(a, b) numeric_attribute_map[a] = b;
M("DummyAttribute", "0")
M("VisitStartDateTime", "toInt32(StartTime)")
M("VisitStartDateTimeRoundedToMinute", "toInt32(toStartOfMinute(StartTime))")
M("VisitStartDateTimeRoundedToHour", "toInt32(toStartOfHour(StartTime))")
M("VisitStartDate", "toInt32(toDateTime(StartDate))")
M("VisitStartDateRoundedToMonth", "toInt32(toDateTime(toStartOfMonth(StartDate)))")
M("VisitStartWeek", "toInt32(toDateTime(toMonday(StartDate)))")
M("VisitStartTime", "toInt32(toTime(StartTime))")
M("VisitStartTimeRoundedToMinute", "toInt32(toStartOfMinute(toTime(StartTime)))")
M("VisitStartYear", "toYear(StartDate)")
M("VisitStartMonth", "toMonth(StartDate)")
M("VisitStartDayOfWeek", "toDayOfWeek(StartDate)")
M("VisitStartDayOfMonth", "toDayOfMonth(StartDate)")
M("VisitStartHour", "toHour(StartTime)")
M("VisitStartMinute", "toMinute(StartTime)")
M("VisitStartSecond", "toSecond(StartTime)")
M("FirstVisitDateTime", "toInt32(FirstVisit)")
M("FirstVisitDate", "toInt32(toDateTime(toDate(FirstVisit)))")
M("FirstVisitWeek", "toInt32(toDateTime(toMonday(FirstVisit)))")
M("FirstVisitTime", "toInt32(toTime(FirstVisit))")
M("FirstVisitYear", "toYear(FirstVisit)")
M("FirstVisitMonth", "toMonth(FirstVisit)")
M("FirstVisitDayOfWeek", "toDayOfWeek(FirstVisit)")
M("FirstVisitDayOfMonth", "toDayOfMonth(FirstVisit)")
M("FirstVisitHour", "toHour(FirstVisit)")
M("FirstVisitMinute", "toMinute(FirstVisit)")
M("FirstVisitSecond", "toSecond(FirstVisit)")
M("PredLastVisitDate", "toInt32(toDateTime(PredLastVisit))")
M("PredLastVisitWeek", "toInt32(toDateTime(toMonday(PredLastVisit)))")
M("PredLastVisitYear", "toYear(PredLastVisit)")
M("PredLastVisitMonth", "toMonth(PredLastVisit)")
M("PredLastVisitDayOfWeek","toDayOfWeek(PredLastVisit)")
M("PredLastVisitDayOfMonth","toDayOfMonth(PredLastVisit)")
M("ClientDateTime", "toInt32(ClientEventTime)")
M("ClientTime", "toInt32(toTime(ClientEventTime))")
M("ClientTimeHour", "toHour(ClientEventTime)")
M("ClientTimeMinute", "toMinute(ClientEventTime)")
M("ClientTimeSecond", "toSecond(ClientEventTime)")
M("SearchPhraseHash", "SearchPhraseHash")
M("RefererDomainHash", "RefererDomainHash")
M("StartURLHash", "NormalizedStartURLHash")
M("StartURLDomainHash", "StartURLDomainHash")
M("RegionID", "RegionID")
M("RegionCity", "regionToCity(RegionID%s)")
M("RegionArea", "regionToArea(RegionID%s)")
M("RegionCountry", "regionToCountry(RegionID%s)")
M("TraficSourceID", "TraficSourceID")
M("IsNewUser", "intDiv(toUInt32(FirstVisit), 1800) == intDiv(toUInt32(StartTime), 1800)")
M("UserNewness", "intDiv(toUInt64(StartTime)-toUInt64(FirstVisit), 86400)")
M("UserNewnessInterval", "roundToExp2(intDiv(toUInt64(StartTime)-toUInt64(FirstVisit), 86400))")
M("UserReturnTime", "toInt32(toDate(StartTime))-toInt32(PredLastVisit)")
M("UserReturnTimeInterval","roundToExp2(toInt32(toDate(StartTime))-toInt32(PredLastVisit))")
M("UserVisitsPeriod", "(TotalVisits <= 1 ? toUInt16(0) : toUInt16((toInt64(StartTime)-toInt64(FirstVisit)) / (86400 * (TotalVisits - 1))))")
M("UserVisitsPeriodInterval","(TotalVisits <= 1 ? toUInt16(0) : roundToExp2(toUInt16((toInt64(StartTime)-toInt64(FirstVisit)) / (86400 * (TotalVisits - 1)))))")
M("VisitTime", "Duration")
M("VisitTimeInterval", "roundDuration(Duration)")
M("PageViews", "PageViews")
M("PageViewsInterval", "roundToExp2(PageViews)")
M("Bounce", "PageViews <= 1")
M("BouncePrecise", "IsBounce")
M("IsYandex", "IsYandex")
M("UserID", "UserID")
M("UserIDCreateDateTime", "(UserID > 10000000000000000000 OR UserID %% 10000000000 > 2000000000 OR UserID %% 10000000000 < 1000000000 ? toUInt64(0) : UserID %% 10000000000)")
M("UserIDCreateDate", "(UserID > 10000000000000000000 OR UserID %% 10000000000 > 2000000000 OR UserID %% 10000000000 < 1000000000 ? toUInt64(0) : UserID %% 10000000000)")
M("UserIDAge", "(UserID > 10000000000000000000 OR UserID %% 10000000000 < 1000000000 OR UserID %% 10000000000 > toUInt64(StartTime) ? toInt64(-1) : intDiv(toInt64(StartTime) - UserID %% 10000000000, 86400))")
M("UserIDAgeInterval", "(UserID > 10000000000000000000 OR UserID %% 10000000000 < 1000000000 OR UserID %% 10000000000 > toUInt64(StartTime) ? toInt64(-1) : toInt64(roundToExp2(intDiv(toUInt64(StartTime) - UserID %% 10000000000, 86400))))")
M("TotalVisits", "TotalVisits")
M("TotalVisitsInterval", "roundToExp2(TotalVisits)")
M("Age", "Age")
M("AgeInterval", "roundAge(Age)")
M("Sex", "Sex")
M("Income", "Income")
M("AdvEngineID", "AdvEngineID")
M("DotNet", "NetMajor * 256 + NetMinor")
M("DotNetMajor", "NetMajor")
M("Flash", "FlashMajor * 256 + FlashMinor")
M("FlashExists", "FlashMajor > 0")
M("FlashMajor", "FlashMajor")
M("Silverlight", "SilverlightVersion1 * 72057594037927936 + SilverlightVersion2 * 281474976710656 + SilverlightVersion3 * 65536 + SilverlightVersion4")
M("SilverlightMajor", "SilverlightVersion1")
M("Hits", "Hits")
M("HitsInterval", "roundToExp2(Hits)")
M("JavaEnable", "JavaEnable")
M("CookieEnable", "CookieEnable")
M("JavascriptEnable", "JavascriptEnable")
M("IsMobile", "IsMobile")
M("MobilePhoneID", "MobilePhone")
M("MobilePhoneModelHash", "halfMD5(MobilePhoneModel)")
M("MobilePhoneModel", "reinterpretAsUInt64(MobilePhoneModel)")
M("BrowserLanguage", "BrowserLanguage")
M("BrowserCountry", "BrowserCountry")
M("TopLevelDomain", "TopLevelDomain")
M("URLScheme", "URLScheme")
M("IPNetworkID", "IPNetworkID")
M("ClientTimeZone", "ClientTimeZone")
M("OSID", "OS")
M("OSMostAncestor", "OSToRoot(OS)")
M("ClientIP", "ClientIP")
M("Resolution", "ResolutionWidth * 16777216 + ResolutionHeight * 256 + ResolutionDepth")
M("ResolutionWidthHeight","ResolutionWidth * 65536 + ResolutionHeight")
M("ResolutionWidth", "ResolutionWidth")
M("ResolutionHeight", "ResolutionHeight")
M("ResolutionWidthInterval","intDiv(ResolutionWidth, 100) * 100")
M("ResolutionHeightInterval","intDiv(ResolutionHeight, 100) * 100")
M("ResolutionColor", "ResolutionDepth")
M("WindowClientArea", "WindowClientWidth * 65536 + WindowClientHeight")
M("WindowClientAreaInterval","intDiv(WindowClientWidth, 100) * 6553600 + intDiv(WindowClientHeight, 100) * 100")
M("WindowClientWidth", "WindowClientWidth")
M("WindowClientWidthInterval","intDiv(WindowClientWidth, 100) * 100")
M("WindowClientHeight", "WindowClientHeight")
M("WindowClientHeightInterval","intDiv(WindowClientHeight, 100) * 100")
M("SearchEngineID", "SearchEngineID")
M("SearchEngineMostAncestor", "SEToRoot(toUInt8(SearchEngineID))")
M("CodeVersion", "CodeVersion")
M("UserAgent", "UserAgent * 16777216 + UserAgentMajor * 65536 + UserAgentMinor")
M("UserAgentVersion", "UserAgentMajor * 65536 + UserAgentMinor")
M("UserAgentMajor", "UserAgent * 256 + UserAgentMajor")
M("UserAgentID", "UserAgent")
M("ClickGoodEvent", "ClickGoodEvent")
M("ClickPriorityID", "ClickPriorityID")
M("ClickBannerID", "ClickBannerID")
M("ClickPageID", "ClickPageID")
M("ClickPlaceID", "ClickPlaceID")
M("ClickTypeID", "ClickTypeID")
M("ClickResourceID", "ClickResourceID")
M("ClickDomainID", "ClickDomainID")
M("ClickCost", "ClickCost")
M("ClickURLHash", "ClickURLHash")
M("ClickOrderID", "ClickOrderID")
M("GoalReachesAny", "GoalReachesAny")
M("GoalReachesDepth", "GoalReachesDepth")
M("GoalReachesURL", "GoalReachesURL")
M("ConvertedAny", "(GoalReachesAny > 1 ? toInt32(1) : GoalReachesAny)")
M("ConvertedDepth", "(GoalReachesDepth > 1 ? toInt32(1) : GoalReachesDepth)")
M("ConvertedURL", "(GoalReachesURL > 1 ? toInt32(1) : GoalReachesURL)")
M("GoalReaches", "countEqual(Goals.ID, toUInt32(%u))")
M("Converted", "has(Goals.ID, toUInt32(%u))")
M("CounterID", "CounterID")
M("VisitID", "VisitID")
M("Interests", "Interests")
M("HasInterestPhoto", "modulo(intDiv(Interests, 128), 2)")
M("HasInterestMoviePremieres","modulo(intDiv(Interests, 64), 2)")
M("HasInterestTourism", "modulo(intDiv(Interests, 32), 2)")
M("HasInterestFamilyAndChildren","modulo(intDiv(Interests, 16), 2)")
M("HasInterestFinance", "modulo(intDiv(Interests, 8), 2)")
M("HasInterestB2B", "modulo(intDiv(Interests, 4), 2)")
M("HasInterestCars", "modulo(intDiv(Interests, 2), 2)")
M("HasInterestMobileAndInternetCommunications","modulo(Interests, 2)")
M("HasInterestBuilding", "modulo(intDiv(Interests, 256), 2)")
M("HasInterestCulinary", "modulo(intDiv(Interests, 512), 2)")
M("OpenstatServiceNameHash","OpenstatServiceNameHash")
M("OpenstatCampaignIDHash","OpenstatCampaignIDHash")
M("OpenstatAdIDHash", "OpenstatAdIDHash")
M("OpenstatSourceIDHash", "OpenstatSourceIDHash")
M("UTMSourceHash", "UTMSourceHash")
M("UTMMediumHash", "UTMMediumHash")
M("UTMCampaignHash", "UTMCampaignHash")
M("UTMContentHash", "UTMContentHash")
M("UTMTermHash", "UTMTermHash")
M("FromHash", "FromHash")
M("CLID", "CLID")
M("SocialSourceNetworkID","SocialSourceNetworkID")
/// где 26 это Яндекс (db_dumps/SearchEngines).
M("CorrectedTraficSourceID", "(IsYandex AND SEIn(toUInt8(SearchEngineID), 26)) ? -1 : TraficSourceID")
M("CorrectedSearchEngineID", "(IsYandex AND SEIn(toUInt8(SearchEngineID), 26)) ? 0 : toUInt8(SearchEngineID)")
#undef M
}
void QueryConverter::fillFormattedAttributeMap()
{
#define M(a, b) formatted_attribute_map[a] = b;
M("VisitStartDateTime", "StartTime")
M("VisitStartDate", "StartDate")
M("VisitStartWeek", "toMonday(StartDate)")
M("VisitStartTime", "substring(toString(toTime(StartTime)), 12, 8)")
M("VisitStartDateTimeRoundedToMinute", "toStartOfMinute(StartTime)")
M("VisitStartDateTimeRoundedToHour", "toStartOfHour(StartTime)")
M("VisitStartDateRoundedToMonth", "toDateTime(toStartOfMonth(StartDate))")
M("VisitStartTimeRoundedToMinute", "substring(toString(toStartOfMinute(toTime(StartTime))), 12, 8)")
M("FirstVisitDateTime", "FirstVisit")
M("FirstVisitDate", "toDate(FirstVisit)")
M("FirstVisitWeek", "toMonday(FirstVisit)")
M("FirstVisitTime", "substring(toString(FirstVisit), 12, 8)")
M("PredLastVisitDate", "PredLastVisit")
M("PredLastVisitWeek", "toMonday(PredLastVisit)")
M("ClientDateTime", "ClientEventTime")
M("ClientTime", "substring(toString(ClientEventTime), 12, 8)")
M("DotNet", "concat(concat(toString(NetMajor), '.'), toString(NetMinor))")
M("Flash", "concat(concat(toString(FlashMajor),'.'),toString(FlashMinor))")
M("Silverlight", "concat(concat(concat(concat(concat(concat(toString(SilverlightVersion1), '.'), toString(SilverlightVersion2)), '.'), toString(SilverlightVersion3)), '.'), toString(SilverlightVersion4))")
M("MobilePhoneModel", "MobilePhoneModel")
M("ClientIP", "IPv4NumToString(ClientIP)")
M("Resolution", "concat(concat(concat(concat(toString(ResolutionWidth),'x'),toString(ResolutionHeight)),'x'),toString(ResolutionDepth))")
M("ResolutionWidthHeight","concat(concat(toString(ResolutionWidth),'x'),toString(ResolutionHeight))")
M("WindowClientArea", "concat(concat(toString(WindowClientWidth),'x'),toString(WindowClientHeight))")
M("UserAgent", "concat(concat(concat(toString(UserAgent), ' '), toString(UserAgentMajor)), UserAgentMinor == 0 ? '' : concat('.', reinterpretAsString(UserAgentMinor)))")
M("UserAgentVersion", "concat(toString(UserAgentMajor), UserAgentMinor == 0 ? '' : concat('.', reinterpretAsString(UserAgentMinor)))")
M("UserAgentMajor", "concat(concat(toString(UserAgent), ' '), toString(UserAgentMajor))")
#undef M
}
void QueryConverter::fillFormattingAggregatedAttributeMap()
{
#define M(a, b) formatting_aggregated_attribute_map[a] = b;
std::string todate = "toDate(toDateTime(%s))";
std::string todatetime = "toDateTime(%s)";
std::string cuttime = "substring(toString(toDateTime(%s)), 12, 8)";
std::string tostring = "reinterpretAsString(%s)";
M("VisitStartDateTime", todatetime)
M("VisitStartDate", todate)
M("VisitStartWeek", todate)
M("VisitStartTime", cuttime)
M("VisitStartDateTimeRoundedToMinute", todatetime)
M("VisitStartDateTimeRoundedToHour", todatetime)
M("VisitStartDateRoundedToMonth", todate)
M("VisitStartTimeRoundedToMinute", cuttime)
M("FirstVisitDateTime", todatetime)
M("FirstVisitDate", todate)
M("FirstVisitWeek", todate)
M("FirstVisitTime", cuttime)
M("PredLastVisitDate", todate)
M("PredLastVisitWeek", todate)
M("ClientDateTime", todatetime)
M("ClientTime", cuttime)
M("UserIDCreateDateTime", todatetime)
M("UserIDCreateDate", todate)
M("DotNet", "concat(concat(toString(intDiv(toUInt32(%[0]s), 256)), '.'), toString(modulo(toUInt32(%[0]s), 256)))")
M("Flash", "concat(concat(toString(intDiv(toUInt32(%[0]s), 256)), '.'), toString(modulo(toUInt32(%[0]s), 256)))")
M("Silverlight", "concat(concat(concat(concat(concat(concat(toString(intDiv(toUInt64(%[0]s), 72057594037927936)), '.'), toString(modulo(intDiv(toUInt64(%[0]s), 281474976710656), 256))), '.'), toString(modulo(intDiv(toUInt64(%[0]s), 65536), 4294967296))), '.'), toString(modulo(toUInt64(%[0]s), 65536)))")
M("MobilePhoneModel", tostring)
M("BrowserLanguage", tostring)
M("BrowserCountry", tostring)
M("TopLevelDomain", tostring)
M("URLScheme", tostring)
M("ClientIP", "IPv4NumToString(%[0]s)")
M("Resolution", "concat(concat(concat(concat(toString(intDiv(toUInt64(%[0]s), 16777216)),'x'),toString(intDiv(toUInt64(%[0]s), 256) %% 65536)),'x'),toString(toUInt64(%[0]s) %% 256))")
M("ResolutionWidthHeight","concat(concat(toString(intDiv(toUInt64(%[0]s), 65536)),'x'),toString(toUInt64(%[0]s) %% 65536))")
M("WindowClientArea", "concat(concat(toString(intDiv(toUInt64(%[0]s), 65536)),'x'),toString(toUInt64(%[0]s) %% 65536))")
M("UserAgent", "concat(concat(concat(toString(intDiv(toUInt32(%[0]s), 16777216)), ' '), toString(intDiv(toUInt32(%[0]s), 65536) %% 256)), (toUInt32(%[0]s) %% 65536) == 0 ? '' : concat('.', reinterpretAsString(toUInt32(%[0]s) %% 65536)))")
M("UserAgentVersion", "concat(toString(intDiv(toUInt32(%[0]s), 65536)), (toUInt32(%[0]s) %% 65536) == 0 ? '' : concat('.', reinterpretAsString(toUInt32(%[0]s) %% 65536)))")
M("UserAgentMajor", "concat(concat(toString(intDiv(toUInt32(%[0]s), 256)), ' '), toString(toUInt32(%[0]s) %% 256))")
M("Interests", "bitmaskToList(%s)")
#undef M
}
}
}
#pragma once
#include "OLAPQueryParser.h"
#include <DB/Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
#include "OLAPAttributesMetadata.h"
namespace DB
{
namespace OLAP
{
/// Конвертирует распаршенный XML-запрос в формате OLAP-server в SQL-подобный запрос для clickhouse.
class QueryConverter
{
public:
QueryConverter(Poco::Util::AbstractConfiguration & config);
QueryConverter(const String & table_for_single_counter, const String & table_for_all_counters);
/// Получает из запроса в формате OLAP-server запрос и настройки для clickhouse.
void OLAPServerQueryToClickHouse(const QueryParseResult & query, Context & inout_context, std::string & out_query) const;
private:
/// Значение атрибута, подходящее для вывода в ответ и для группировки по нему.
std::string convertAttributeFormatted(const std::string & attribute, unsigned parameter, const std::string & regions_point_of_view_formatted) const;
/// Числовое значение атрибута, подходящее для подстановки в условия, агрегатные функции и ключи сортировки.
std::string convertAttributeNumeric(const std::string & attribute, unsigned parameter, const std::string & regions_point_of_view_formatted) const;
/// <aggregates><aggregate> => SELECT x
std::string convertAggregateFunction(const std::string & attribute, unsigned parameter, const std::string & function,
const QueryParseResult & query, const std::string & regions_point_of_view_formatted) const;
/// <where><condition><rhs> => SELECT ... where F(A, x)
std::string convertConstant(const std::string & attribute, const std::string & value) const;
/// <where><condition> => SELECT ... WHERE x
std::string convertCondition(
const std::string & attribute,
unsigned parameter,
const std::string & relation,
const std::string & rhs,
const std::string & regions_point_of_view_formatted) const;
/// ASC или DESC
std::string convertSortDirection(const std::string & direction) const;
/// <dates> => SELECT ... WHERE x
std::string convertDateRange(time_t date_first, time_t date_last) const;
/// <counter_id> => SELECT ... WHERE x
std::string convertCounterID(CounterID_t CounterID) const;
std::string getTableName(CounterID_t CounterID, bool local) const;
std::string getHavingSection() const;
void fillFormattedAttributeMap();
void fillNumericAttributeMap();
void fillFormattingAggregatedAttributeMap();
std::string table_for_single_counter;
std::string table_for_all_counters;
/// Форматная строка для convertAttributeNumeric. Есть для всех атрибутов.
std::map<std::string, std::string> numeric_attribute_map;
/// Форматная строка для получения выводимого значения из агрегированного числового значения.
std::map<std::string, std::string> formatting_aggregated_attribute_map;
/// Форматная строка для convertAttributeFormatted.
std::map<std::string, std::string> formatted_attribute_map;
/// Список атрибутов-регионов, для которых нужна передача параметра regions_point_of_view.
std::set<std::string> regions_attributes_set =
{
"RegionCity",
"RegionArea",
"RegionCountry",
};
/// Парсеры значений атрибутов.
AttributeMetadatas attribute_metadatas;
};
}
}
#include <common/DateLUT.h>
#include <Poco/DateTimeParser.h>
#include <Poco/AutoPtr.h>
#include "OLAPQueryParser.h"
#include <DB/Common/Exception.h>
#include <DB/IO/ReadHelpers.h>
#include <DB/IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_FOUND_NODE;
extern const int FOUND_MORE_THAN_ONE_NODE;
extern const int SYNTAX_ERROR;
extern const int UNKNOWN_FORMAT;
extern const int FIRST_DATE_IS_BIGGER_THAN_LAST_DATE;
extern const int UNKNOWN_OVERFLOW_MODE;
extern const int NOT_FOUND_FUNCTION_ELEMENT_FOR_AGGREGATE;
extern const int NOT_FOUND_RELATION_ELEMENT_FOR_CONDITION;
extern const int NOT_FOUND_RHS_ELEMENT_FOR_CONDITION;
extern const int NO_ATTRIBUTES_LISTED;
extern const int UNKNOWN_DIRECTION_OF_SORTING;
extern const int INDEX_OF_COLUMN_IN_SORT_CLAUSE_IS_OUT_OF_RANGE;
}
namespace OLAP
{
static std::string getValueOfOneTextElement(Poco::XML::Document * document, const std::string & tag_name)
{
Poco::AutoPtr<Poco::XML::NodeList> node_list = document->getElementsByTagName(tag_name);
if (0 == node_list->length())
throw Exception(std::string("Not found node ") + tag_name, ErrorCodes::NOT_FOUND_NODE);
else if (1 != node_list->length())
throw Exception(std::string("Found more than one node ") + tag_name, ErrorCodes::FOUND_MORE_THAN_ONE_NODE);
return node_list->item(0)->innerText();
}
QueryParser::AttributeWithParameter QueryParser::parseAttributeWithParameter(const std::string & s)
{
AttributeWithParameter res;
Poco::RegularExpression::MatchVec matches;
if (parse_attribute_with_parameter_regexp.match(s, 0, matches))
{
if (matches.size() == 3)
{
res.first = s.substr(matches[1].offset, matches[1].length);
res.second = DB::parse<unsigned>(s.substr(matches[2].offset, matches[2].length));
return res;
}
}
throw Exception(std::string("Invalid attribute syntax: ") + s, ErrorCodes::SYNTAX_ERROR);
}
QueryParseResult QueryParser::parse(std::istream & s)
{
QueryParseResult result;
Poco::XML::DOMParser parser;
Poco::XML::InputSource source(s);
result.max_result_size = 0;
result.max_execution_time = 0;
result.sample = 1.0;
result.query = parser.parse(&source);
std::string format_element_name("format");
std::string CounterID_element_name("counter_id");
std::string date_first_element_name("first");
std::string date_last_element_name("last");
result.format = FORMAT_XML;
Poco::AutoPtr<Poco::XML::NodeList> node_list = result.query->getElementsByTagName(format_element_name);
if (node_list->length() > 1)
throw Exception(std::string("Found more than one node ") + format_element_name,
ErrorCodes::FOUND_MORE_THAN_ONE_NODE);
if (node_list->length() == 1)
{
if (node_list->item(0)->innerText() != "xml"
&& node_list->item(0)->innerText() != "tab"
&& node_list->item(0)->innerText() != "bin")
throw Exception(std::string("Unknown format: ") + node_list->item(0)->innerText(),
ErrorCodes::UNKNOWN_FORMAT);
result.format = (node_list->item(0)->innerText() == "xml") ? FORMAT_XML
: ((node_list->item(0)->innerText() == "tab") ? FORMAT_TAB
: FORMAT_BIN);
}
result.CounterID = 0;
if (result.query->getElementsByTagName(CounterID_element_name)->length() > 0)
result.CounterID = DB::parse<unsigned>(getValueOfOneTextElement(result.query, CounterID_element_name));
int time_zone_diff = 0;
result.date_first = DateLUT::instance().toDate(Poco::DateTimeParser::parse(
getValueOfOneTextElement(result.query, date_first_element_name), time_zone_diff).timestamp().epochTime());
result.date_last = DateLUT::instance().toDate(Poco::DateTimeParser::parse(
getValueOfOneTextElement(result.query, date_last_element_name), time_zone_diff).timestamp().epochTime());
if (result.date_first > result.date_last)
throw Exception("First date is bigger than last date.", ErrorCodes::FIRST_DATE_IS_BIGGER_THAN_LAST_DATE);
const auto & date_lut = DateLUT::instance();
result.days = 1 + date_lut.toDayNum(result.date_last) - date_lut.toDayNum(result.date_first);
result.cut_date_last = false;
result.cut_dates_for_goals = false;
result.concurrency = 0;
result.max_threads_per_counter = 0;
result.limit = 0;
result.local = false;
Poco::AutoPtr<Poco::XML::NodeList> settings_nodes = result.query->getElementsByTagName("settings");
if (settings_nodes->length() > 1)
throw Exception(std::string("Found more than one node settings"), ErrorCodes::FOUND_MORE_THAN_ONE_NODE);
if (settings_nodes->length() == 1)
{
Poco::AutoPtr<Poco::XML::NodeList> settings_child_nodes = settings_nodes->item(0)->childNodes();
for (unsigned i = 0; i < settings_child_nodes->length(); i++)
{
if (settings_child_nodes->item(i)->nodeName() == "max_result_size")
{
/// выставить дополнительное локальное ограничение на максимальный размер результата
result.max_result_size = DB::parse<unsigned>(settings_child_nodes->item(i)->innerText());
}
else if (settings_child_nodes->item(i)->nodeName() == "max_execution_time")
{
/// выставить дополнительное локальное ограничение на максимальное время выполнения запроса
result.max_execution_time = DB::parse<unsigned>(settings_child_nodes->item(i)->innerText());
}
else if (settings_child_nodes->item(i)->nodeName() == "cut_date_last")
{
/** обрезать запрошенный период до максимальной даты, за которую есть данные
* вместо того, чтобы сообщить об ошибке, если дата конца периода больше максимальной даты
*/
result.cut_date_last = true;
}
else if (settings_child_nodes->item(i)->nodeName() == "cut_dates_for_goals")
{
/** если за какой-либо день не существовало цели, то пропускать этот день
*/
result.cut_dates_for_goals = true;
}
else if (settings_child_nodes->item(i)->nodeName() == "overflow_mode")
{
/** определяет, что делать, если количество строк превышает max_result_size
*/
std::string overflow_mode_str = settings_child_nodes->item(i)->innerText();
if (overflow_mode_str != "throw" && overflow_mode_str != "break" && overflow_mode_str != "any")
throw Exception(std::string("Unknown overflow mode: ") + overflow_mode_str,
ErrorCodes::UNKNOWN_OVERFLOW_MODE);
result.has_overflow_mode = true;
result.overflow_mode = overflow_mode_str == "throw" ? OVERFLOW_MODE_THROW
: (overflow_mode_str == "break" ? OVERFLOW_MODE_BREAK
: OVERFLOW_MODE_ANY);
}
else if (settings_child_nodes->item(i)->nodeName() == "concurrency")
{
/// выставить количество потоков для обработки запроса
result.concurrency = DB::parse<unsigned>(settings_child_nodes->item(i)->innerText());
}
else if (settings_child_nodes->item(i)->nodeName() == "max_threads_per_counter")
{
/** Выставить локальное ограничение на максимальное количество обрабатываемых запросов
* Оно может быть больше, чем ограничение по умолчанию.
*/
result.max_threads_per_counter = DB::parse<unsigned>(settings_child_nodes->item(i)->innerText());
}
else if (settings_child_nodes->item(i)->nodeName() == "local")
{
result.local = true;
}
else if (settings_child_nodes->item(i)->nodeName() == "sample")
{
result.sample = DB::parse<Float32>(settings_child_nodes->item(i)->innerText());
if (result.sample <= 0 || result.sample > 1.)
throw Exception(std::string("Wrong sample = ") + DB::toString(result.sample) + ". Sampling must be in range (0, 1]");
}
else if (settings_child_nodes->item(i)->nodeName() == "regions_point_of_view")
{
result.regions_point_of_view = settings_child_nodes->item(i)->innerText();
}
}
}
Poco::AutoPtr<Poco::XML::NodeList> limit_nodes = result.query->getElementsByTagName("limit");
if (limit_nodes->length() > 1)
throw Exception(std::string("Found more than one node limit"), ErrorCodes::FOUND_MORE_THAN_ONE_NODE);
if (limit_nodes->length() == 1)
result.limit = DB::parse<unsigned>(limit_nodes->item(0)->innerText());
LOG_DEBUG(log, "CounterID: " << result.CounterID
<< ", dates: " << LocalDate(result.date_first) << " - " << LocalDate(result.date_last));
/// получаем список имён атрибутов
Poco::AutoPtr<Poco::XML::NodeList> attributes = result.query->getElementsByTagName("attribute");
for (unsigned i = 0; i < attributes->length(); i++)
{
std::string attribute_string = attributes->item(i)->innerText();
AttributeWithParameter attr_with_param;
std::string & attribute_name = attr_with_param.first;
unsigned & attribute_param = attr_with_param.second;
attribute_param = 0;
if (attribute_string.find('(') != std::string::npos)
attr_with_param = parseAttributeWithParameter(attribute_string);
else
attribute_name = attribute_string;
if (attributes->item(i)->parentNode()->nodeName() == "keys")
{
QueryParseResult::KeyAttribute key_attribute;
key_attribute.attribute = attribute_name;
key_attribute.parameter = attribute_param;
result.key_attributes.push_back(key_attribute);
}
if (attributes->item(i)->parentNode()->nodeName() == "aggregate")
{
Poco::AutoPtr<Poco::XML::NodeList> aggregate_nodes = attributes->item(i)->parentNode()->childNodes();
unsigned j;
for (j = 0; j < aggregate_nodes->length(); j++)
{
if (aggregate_nodes->item(j)->nodeName() == "function")
{
QueryParseResult::Aggregate aggregate;
aggregate.attribute = attribute_name;
aggregate.parameter = attribute_param;
aggregate.function = aggregate_nodes->item(j)->innerText();
result.aggregates.push_back(aggregate);
break;
}
}
if (j == aggregate_nodes->length())
throw Exception(std::string("Not found 'function' element for aggregate with attribute ") + attribute_name,
ErrorCodes::NOT_FOUND_FUNCTION_ELEMENT_FOR_AGGREGATE);
}
if (attributes->item(i)->parentNode()->nodeName() == "condition")
{
Poco::AutoPtr<Poco::XML::NodeList> condition_nodes = attributes->item(i)->parentNode()->childNodes();
QueryParseResult::WhereCondition condition;
condition.attribute = attribute_name;
condition.parameter = attribute_param;
unsigned j;
for (j = 0; j < condition_nodes->length(); j++)
{
if (condition_nodes->item(j)->nodeName() == "relation")
{
condition.relation = condition_nodes->item(j)->innerText();
break;
}
}
if (j == condition_nodes->length())
throw Exception(std::string("Not found 'relation' element for condition with attribute ") + attribute_name,
ErrorCodes::NOT_FOUND_RELATION_ELEMENT_FOR_CONDITION);
for (j = 0; j < condition_nodes->length(); j++)
{
if (condition_nodes->item(j)->nodeName() == "rhs")
{
condition.rhs = condition_nodes->item(j)->innerText();
break;
}
}
if (j == condition_nodes->length())
throw Exception(std::string("Not found 'rhs' element for condition with attribute ") + attribute_name,
ErrorCodes::NOT_FOUND_RHS_ELEMENT_FOR_CONDITION);
result.where_conditions.push_back(condition);
}
}
if (result.key_attributes.size() == 0)
throw Exception("No attributes listed.", ErrorCodes::NO_ATTRIBUTES_LISTED);
/// получаем условие сортировки
Poco::AutoPtr<Poco::XML::NodeList> sort_nodes = result.query->getElementsByTagName("sort");
if (sort_nodes->length() >= 1)
{
Poco::AutoPtr<Poco::XML::NodeList> column_nodes = sort_nodes->item(0)->childNodes();
for (unsigned i = 0; i < column_nodes->length(); i++)
{
if (column_nodes->item(i)->nodeName() != "column")
continue;
QueryParseResult::SortColumn column;
column.direction = "ascending";
Poco::AutoPtr<Poco::XML::NodeList> index_direction_nodes = column_nodes->item(i)->childNodes();
for (unsigned j = 0; j < index_direction_nodes->length(); j++)
{
if (index_direction_nodes->item(j)->nodeName() == "index")
{
column.index = DB::parse<unsigned>(index_direction_nodes->item(j)->innerText());
if (column.index < 1 || column.index > result.key_attributes.size() + result.aggregates.size())
throw Exception("Index of column in sort clause is out of range.",
ErrorCodes::INDEX_OF_COLUMN_IN_SORT_CLAUSE_IS_OUT_OF_RANGE);
}
if (index_direction_nodes->item(j)->nodeName() == "direction")
{
column.direction = index_direction_nodes->item(j)->innerText();
if (column.direction != "ascending" && column.direction != "descending")
throw Exception("Unknown direction of sorting.",
ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING);
}
}
result.sort_columns.push_back(column);
}
}
return result;
}
}
}
#pragma once
#include <Poco/DOM/DOMParser.h>
#include <Poco/DOM/DOMWriter.h>
#include <Poco/DOM/Document.h>
#include <Poco/DOM/Element.h>
#include <Poco/DOM/Text.h>
#include <Poco/DOM/NodeList.h>
#include <Poco/SAX/InputSource.h>
#include <Poco/RegularExpression.h>
#include <Poco/AutoPtr.h>
#include <common/logger_useful.h>
#include <common/Common.h>
namespace DB
{
namespace OLAP
{
/// формат выдачи результата
enum Format
{
FORMAT_XML,
FORMAT_TAB,
FORMAT_BIN
};
/// что делать, если размер результата больше max_result_size
enum OverflowMode
{
OVERFLOW_MODE_THROW, /// прекратить выполнение запроса, вернуть ошибку (по-умолчанию)
OVERFLOW_MODE_BREAK, /// вернуть то, что успело посчитаться до переполнения
OVERFLOW_MODE_ANY, /** для тех ключей, которые на момент переполнения, попали в результат,
* посчитать до конца, остальные ключи игнорировать
* (то есть, выбрать "первые попавшиеся" max_result_size записи)
*/
};
/// результат парсинга XML-запроса в формате OLAP-server
struct QueryParseResult
{
struct KeyAttribute
{
std::string attribute;
unsigned parameter;
};
struct Aggregate
{
std::string attribute;
std::string function;
unsigned parameter;
};
struct WhereCondition
{
std::string attribute;
unsigned parameter;
std::string relation;
std::string rhs;
};
struct SortColumn
{
unsigned index;
std::string direction;
};
/// 0, если не ограничено
unsigned max_result_size;
unsigned max_execution_time;
unsigned max_threads_per_counter;
unsigned concurrency;
unsigned limit; /// максимальное количество записей для вывода (все остальные - игнорируются)
bool cut_date_last;
bool cut_dates_for_goals; /// если за какую-то дату цели не существовало - то всего лишь пропускать эту дату
/// Использовать таблицу для одного слоя, даже если указан CounterID = 0.
bool local;
/// сэмплинг - по какой доле данных выполнять запрос. принимает значения в диапазоне (0, 1]
/// если равно 1 - то отключен
float sample;
Format format;
bool has_overflow_mode = false;
OverflowMode overflow_mode;
Poco::AutoPtr<Poco::XML::Document> query;
CounterID_t CounterID;
time_t date_first;
time_t date_last;
unsigned days;
std::vector<KeyAttribute> key_attributes;
std::vector<Aggregate> aggregates;
std::vector<WhereCondition> where_conditions;
std::vector<SortColumn> sort_columns;
/// Какую иерархию регионов использовать.
std::string regions_point_of_view;
};
/// Парсер XML-запросов в формате OLAP-server.
class QueryParser
{
private:
using AttributeWithParameter = std::pair<std::string, unsigned>;
AttributeWithParameter parseAttributeWithParameter(const std::string & s);
time_t getLastDate();
/// regexp для парсинга выражения типа "GoalReaches(111)"
Poco::RegularExpression parse_attribute_with_parameter_regexp;
Logger * log;
public:
QueryParser()
: parse_attribute_with_parameter_regexp("^\\s*(\\w+)\\s*\\((\\d+)\\)\\s*$"),
log(&Logger::get("QueryParser"))
{
}
QueryParseResult parse(std::istream & s);
};
}
}
......@@ -48,7 +48,6 @@
#include "HTTPHandler.h"
#include "ReplicasStatusHandler.h"
#include "InterserverIOHTTPHandler.h"
#include "OLAPHTTPHandler.h"
#include "TCPHandler.h"
#include "MetricsTransmitter.h"
#include "UsersConfigReloader.h"
......@@ -424,30 +423,10 @@ int Server::main(const std::vector<std::string> & args)
http_params);
}
/// OLAP HTTP
std::experimental::optional<Poco::Net::HTTPServer> olap_http_server;
bool use_olap_server = config().has("olap_compatibility.port");
if (use_olap_server)
{
olap_parser = std::make_unique<OLAP::QueryParser>();
olap_converter = std::make_unique<OLAP::QueryConverter>(config());
Poco::Net::ServerSocket olap_http_socket(Poco::Net::SocketAddress(listen_host, config().getInt("olap_compatibility.port")));
olap_http_socket.setReceiveTimeout(settings.receive_timeout);
olap_http_socket.setSendTimeout(settings.send_timeout);
olap_http_server.emplace(
new HTTPRequestHandlerFactory<OLAPHTTPHandler>(*this, "OLAPHTTPHandler-factory"),
server_pool,
olap_http_socket,
http_params);
}
http_server.start();
tcp_server.start();
if (interserver_io_http_server)
interserver_io_http_server->start();
if (olap_http_server)
olap_http_server->start();
LOG_INFO(log, "Ready for connections.");
......@@ -471,8 +450,6 @@ int Server::main(const std::vector<std::string> & args)
http_server.stop();
tcp_server.stop();
if (use_olap_server)
olap_http_server->stop();
);
/// try to load dictionaries immediately, throw on error and die
......
......@@ -22,17 +22,15 @@
#include <DB/Common/HTMLForm.h>
#include <DB/Interpreters/Context.h>
#include "OLAPQueryParser.h"
#include "OLAPQueryConverter.h"
/** Сервер предоставляет три интерфейса:
* 1. HTTP - простой интерфейс для доступа из любых приложений.
* 2. TCP - интерфейс для доступа из родной библиотеки, родного клиента, и для межсерверного взаимодействия.
* Более эффективен, так как
* - данные передаются по столбцам;
* - данные передаются со сжатием;
* Позволяет тонко управлять настройками и получать более подробную информацию в ответах.
* 3. OLAP-server HTTP - интерфейс для совместимости с устаревшим демоном OLAP-server.
/** Server provides three interfaces:
* 1. HTTP - simple interface for any applications.
* 2. TCP - interface for native clickhouse-client and for server to server internal communications.
* More rich and efficient, but less compatible
* - data is transferred by columns;
* - data is transferred compressed;
* Allows to get more information in response.
* 3. Interserver HTTP - for replication.
*/
......@@ -42,12 +40,9 @@ namespace DB
class Server : public BaseDaemon
{
public:
/// Глобальные настройки севрера
/// Global settings of server.
std::unique_ptr<Context> global_context;
std::unique_ptr<OLAP::QueryParser> olap_parser;
std::unique_ptr<OLAP::QueryConverter> olap_converter;
protected:
void initialize(Application & self)
{
......
......@@ -448,10 +448,7 @@ void TCPHandler::receiveHello()
{
writeString("HTTP/1.0 400 Bad Request\r\n\r\n"
"Port " + server.config().getString("tcp_port") + " is for clickhouse-client program.\r\n"
"You must use port " + server.config().getString("http_port") + " for HTTP"
+ (server.config().getBool("use_olap_http_server", false)
? "\r\n or port " + server.config().getString("olap_http_port") + " for OLAPServer compatibility layer.\r\n"
: ".\r\n"),
"You must use port " + server.config().getString("http_port") + " for HTTP.\r\n",
*out);
throw Exception("Client has connected to wrong port", ErrorCodes::CLIENT_HAS_CONNECTED_TO_WRONG_PORT);
......
......@@ -21,8 +21,8 @@
STRONG_TYPEDEF(UInt16, DayNum_t);
/** Lookup таблица для преобразования времени в дату, а также в месяц или в год или в день недели или в день месяца.
* Сейчас она используется для ускорения OLAPServer-а, который делает такие преобразования миллиардами.
/** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on.
* First time was implemented for OLAPServer, that needed to do billions of such transformations.
*/
class DateLUTImpl
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册