提交 a73790f1 编写于 作者: A Alexey Milovidov

Less dependencies [#CLICKHOUSE-2].

上级 2cc7c776
#include <Common/formatIPv6.h>
#include <Common/hex.h>
#include <ext/range.h>
#include <array>
namespace DB
{
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false)
{
return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry;
}
/// print integer in desired base, faster than sprintf
template <uint32_t base, typename T, uint32_t buffer_size = sizeof(T) * int_log(256, base, false)>
static void print_integer(char *& out, T value)
{
if (value == 0)
*out++ = '0';
else
{
char buf[buffer_size];
auto ptr = buf;
while (value > 0)
{
*ptr++ = hexLowercase(value % base);
value /= base;
}
while (ptr != buf)
*out++ = *--ptr;
}
}
/// print IPv4 address as %u.%u.%u.%u
static void formatIPv4(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
{
const auto limit = IPV4_BINARY_LENGTH - zeroed_tail_bytes_count;
for (const auto i : ext::range(0, IPV4_BINARY_LENGTH))
{
UInt8 byte = (i < limit) ? src[i] : 0;
print_integer<10, UInt8>(dst, byte);
if (i != IPV4_BINARY_LENGTH - 1)
*dst++ = '.';
}
}
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
{
struct { int base, len; } best{-1}, cur{-1};
std::array<uint16_t, IPV6_BINARY_LENGTH / sizeof(uint16_t)> words{};
/** Preprocess:
* Copy the input (bytewise) array into a wordwise array.
* Find the longest run of 0x00's in src[] for :: shorthanding. */
for (const auto i : ext::range(0, IPV6_BINARY_LENGTH - zeroed_tail_bytes_count))
words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
for (const auto i : ext::range(0, words.size()))
{
if (words[i] == 0) {
if (cur.base == -1)
cur.base = i, cur.len = 1;
else
cur.len++;
}
else
{
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
cur.base = -1;
}
}
}
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
}
if (best.base != -1 && best.len < 2)
best.base = -1;
/// Format the result.
for (const int i : ext::range(0, words.size()))
{
/// Are we inside the best run of 0x00's?
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
{
if (i == best.base)
*dst++ = ':';
continue;
}
/// Are we following an initial run of 0x00s or any real hex?
if (i != 0)
*dst++ = ':';
/// Is this address an encapsulated IPv4?
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
{
formatIPv4(src + 12, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(IPV4_BINARY_LENGTH)));
break;
}
print_integer<16>(dst, words[i]);
}
/// Was it a trailing run of 0x00's?
if (best.base != -1 && (best.base + best.len) == words.size())
*dst++ = ':';
*dst++ = '\0';
}
}
#pragma once
#include <common/Types.h>
#define IPV4_BINARY_LENGTH 4
#define IPV6_BINARY_LENGTH 16
#define IPV4_MAX_TEXT_LENGTH 15 /// Does not count tail zero byte.
#define IPV6_MAX_TEXT_LENGTH 39
namespace DB
{
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
* performs significantly faster than the reference implementation due to the absence of sprintf calls,
* bounds checking, unnecessary string copying and length calculation.
*/
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0);
}
......@@ -8,11 +8,11 @@
#include <Columns/ColumnFixedString.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h>
#include <IO/WriteIntText.h>
#include <Common/formatIPv6.h>
#include <iostream>
#include <Functions/FunctionsCoding.h>
namespace DB
{
......@@ -595,7 +595,7 @@ void TrieDictionary::trieTraverse(const btrie_t * tree, Getter && getter) const
Columns TrieDictionary::getKeyColumns() const
{
auto ip_column = std::make_shared<ColumnFixedString>(ipv6_bytes_length);
auto ip_column = std::make_shared<ColumnFixedString>(IPV6_BINARY_LENGTH);
auto mask_column = std::make_shared<ColumnVector<UInt8>>();
auto getter = [& ip_column, & mask_column](__uint128_t ip, size_t mask) {
......@@ -603,7 +603,7 @@ Columns TrieDictionary::getKeyColumns() const
ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]);
ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]);
std::swap(ip_array[0], ip_array[1]);
ip_column->insertData(reinterpret_cast<const char *>(ip_array), ipv6_bytes_length);
ip_column->insertData(reinterpret_cast<const char *>(ip_array), IPV6_BINARY_LENGTH);
mask_column->insert(static_cast<UInt8>(mask));
};
......@@ -619,7 +619,7 @@ BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_nam
{
const auto & attr = attributes.front();
return ColumnsWithTypeAndName({ColumnWithTypeAndName(columns.front(),
std::make_shared<DataTypeFixedString>(ipv6_bytes_length), attr.name)});
std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)});
};
auto getView = [](const Columns& columns, const std::vector<DictionaryAttribute>& attributes)
{
......@@ -631,7 +631,7 @@ BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_nam
{
UInt8 mask = mask_column->getElement(row);
char * ptr = buffer;
IPv6Format::apply(reinterpret_cast<const unsigned char *>(ip_column->getDataAt(row).data), ptr);
formatIPv6(reinterpret_cast<const unsigned char *>(ip_column->getDataAt(row).data), ptr);
*(ptr - 1) = '/';
auto size = detail::writeUIntText(mask, ptr);
column->insertData(buffer, size + (ptr - buffer));
......
#pragma once
#include <Common/hex.h>
#include <Common/formatIPv6.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypesNumber.h>
......@@ -16,7 +17,6 @@
#include <Columns/ColumnConst.h>
#include <Functions/IFunction.h>
#include <arpa/inet.h>
#include <ext/range.h>
#include <array>
......@@ -51,129 +51,6 @@ constexpr auto ipv6_bytes_length = 16;
constexpr auto uuid_bytes_length = 16;
constexpr auto uuid_text_length = 36;
class IPv6Format
{
private:
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false)
{
return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry;
}
/// print integer in desired base, faster than sprintf
template <uint32_t base, typename T, uint32_t buffer_size = sizeof(T) * int_log(256, base, false)>
static void print_integer(char *& out, T value)
{
if (value == 0)
*out++ = '0';
else
{
char buf[buffer_size];
auto ptr = buf;
while (value > 0)
{
*ptr++ = hexLowercase(value % base);
value /= base;
}
while (ptr != buf)
*out++ = *--ptr;
}
}
/// print IPv4 address as %u.%u.%u.%u
static void ipv4_format(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
{
const auto limit = ipv4_bytes_length - zeroed_tail_bytes_count;
for (const auto i : ext::range(0, ipv4_bytes_length))
{
UInt8 byte = (i < limit) ? src[i] : 0;
print_integer<10, UInt8>(dst, byte);
if (i != ipv4_bytes_length - 1)
*dst++ = '.';
}
}
public:
/** rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
* performs significantly faster than the reference implementation due to the absence of sprintf calls,
* bounds checking, unnecessary string copying and length calculation
*/
static const void apply(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0)
{
struct { int base, len; } best{-1}, cur{-1};
std::array<uint16_t, ipv6_bytes_length / sizeof(uint16_t)> words{};
/** Preprocess:
* Copy the input (bytewise) array into a wordwise array.
* Find the longest run of 0x00's in src[] for :: shorthanding. */
for (const auto i : ext::range(0, ipv6_bytes_length - zeroed_tail_bytes_count))
words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
for (const auto i : ext::range(0, words.size()))
{
if (words[i] == 0) {
if (cur.base == -1)
cur.base = i, cur.len = 1;
else
cur.len++;
}
else
{
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
cur.base = -1;
}
}
}
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
}
if (best.base != -1 && best.len < 2)
best.base = -1;
/// Format the result.
for (const int i : ext::range(0, words.size()))
{
/// Are we inside the best run of 0x00's?
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
{
if (i == best.base)
*dst++ = ':';
continue;
}
/// Are we following an initial run of 0x00s or any real hex?
if (i != 0)
*dst++ = ':';
/// Is this address an encapsulated IPv4?
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
{
ipv4_format(src + 12, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(ipv4_bytes_length)));
break;
}
print_integer<16>(dst, words[i]);
}
/// Was it a trailing run of 0x00's?
if (best.base != -1 && (best.base + best.len) == words.size())
*dst++ = ':';
*dst++ = '\0';
}
};
class FunctionIPv6NumToString : public IFunction
{
......@@ -220,7 +97,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(size * INET6_ADDRSTRLEN);
vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
offsets_res.resize(size);
auto begin = reinterpret_cast<char *>(&vec_res[0]);
......@@ -228,7 +105,7 @@ public:
for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_bytes_length, ++i)
{
IPv6Format::apply(&vec_in[offset], pos);
formatIPv6(&vec_in[offset], pos);
offsets_res[i] = pos - begin;
}
......@@ -246,9 +123,9 @@ public:
const auto & data_in = col_in->getData();
char buf[INET6_ADDRSTRLEN];
char buf[IPV6_MAX_TEXT_LENGTH + 1];
char * dst = buf;
IPv6Format::apply(reinterpret_cast<const unsigned char *>(data_in.data()), dst);
formatIPv6(reinterpret_cast<const unsigned char *>(data_in.data()), dst);
block.safeGetByPosition(result).column = std::make_shared<ColumnConstString>(col_in->size(), buf);
}
......@@ -343,7 +220,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(size * INET6_ADDRSTRLEN);
vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
offsets_res.resize(size);
auto begin = reinterpret_cast<char *>(&vec_res[0]);
......@@ -395,7 +272,7 @@ public:
const auto & data_in = col_in->getData();
char buf[INET6_ADDRSTRLEN];
char buf[IPV6_MAX_TEXT_LENGTH + 1];
char * dst = buf;
const auto address = reinterpret_cast<const unsigned char *>(data_in.data());
......@@ -419,7 +296,7 @@ private:
void cutAddress(const unsigned char * address, char *& dst, UInt8 zeroed_tail_bytes_count)
{
IPv6Format::apply(address, dst, zeroed_tail_bytes_count);
formatIPv6(address, dst, zeroed_tail_bytes_count);
}
};
......@@ -709,7 +586,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// the longest value is: 255.255.255.255\0
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(vec_in.size());
char * begin = reinterpret_cast<char *>(&vec_res[0]);
char * pos = begin;
......@@ -892,7 +769,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// the longest value is: 255.255.255.255\0
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(vec_in.size());
char * begin = reinterpret_cast<char *>(&vec_res[0]);
char * pos = begin;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册