NamePrompter.h 3.2 KB
Newer Older
1 2 3
#pragma once

#include <Core/Types.h>
A
Alexey Milovidov 已提交
4
#include <Common/PODArray.h>
5 6

#include <algorithm>
D
Danila Kutenin 已提交
7
#include <cctype>
D
Danila Kutenin 已提交
8
#include <cmath>
9
#include <memory>
10 11 12 13 14
#include <queue>
#include <utility>

namespace DB
{
D
Danila Kutenin 已提交
15
template <size_t MaxNumHints>
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
class NamePrompter
{
public:
    using DistanceIndex = std::pair<size_t, size_t>;
    using DistanceIndexQueue = std::priority_queue<DistanceIndex>;

    static std::vector<String> getHints(const String & name, const std::vector<String> & prompting_strings)
    {
        DistanceIndexQueue queue;
        for (size_t i = 0; i < prompting_strings.size(); ++i)
            appendToQueue(i, name, queue, prompting_strings);
        return release(queue, prompting_strings);
    }

private:
D
Danila Kutenin 已提交
31
    static size_t levenshteinDistance(const String & lhs, const String & rhs)
32
    {
33 34
        size_t m = lhs.size();
        size_t n = rhs.size();
35

A
Alexey Milovidov 已提交
36
        PODArrayWithStackMemory<size_t, 64> row(n + 1);
37

38 39
        for (size_t i = 1; i <= n; ++i)
            row[i] = i;
40 41 42

        for (size_t j = 1; j <= m; ++j)
        {
43 44
            row[0] = j;
            size_t prev = j - 1;
45 46
            for (size_t i = 1; i <= n; ++i)
            {
47 48 49 50
                size_t old = row[i];
                row[i] = std::min(prev + (std::tolower(lhs[j - 1]) != std::tolower(rhs[i - 1])),
                    std::min(row[i - 1], row[i]) + 1);
                prev = old;
51 52
            }
        }
53
        return row[n];
54 55 56 57
    }

    static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
    {
D
Danila Kutenin 已提交
58 59 60 61 62 63 64 65 66
        const String & prompt = prompting_strings[ind];

        /// Clang SimpleTypoCorrector logic
        const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size()));
        const size_t mistake_factor = (name.size() + 2) / 3;
        if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3)
            return;

        if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size())
67
        {
D
Danila Kutenin 已提交
68 69
            size_t distance = levenshteinDistance(prompt, name);
            if (distance <= mistake_factor)
D
Danila Kutenin 已提交
70
            {
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
                queue.emplace(distance, ind);
                if (queue.size() > MaxNumHints)
                    queue.pop();
            }
        }
    }

    static std::vector<String> release(DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
    {
        std::vector<String> ans;
        ans.reserve(queue.size());
        while (!queue.empty())
        {
            auto top = queue.top();
            queue.pop();
            ans.push_back(prompting_strings[top.second]);
        }
        std::reverse(ans.begin(), ans.end());
        return ans;
    }
};

D
Danila Kutenin 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
template <size_t MaxNumHints, class Self>
class IHints
{
public:

    virtual std::vector<String> getAllRegisteredNames() const = 0;

    std::vector<String> getHints(const String & name) const
    {
        static const auto registered_names = getAllRegisteredNames();
        return prompter.getHints(name, registered_names);
    }

    virtual ~IHints() = default;

private:
    NamePrompter<MaxNumHints> prompter;
};

112
}