未验证 提交 75129405 编写于 作者: 石晓伟 提交者: GitHub

new class: StringView, test=develop (#4159)

* new class: StringView, test=develop

* add comments, test=develop
上级 8448ad6c
...@@ -56,20 +56,19 @@ void KernelBase::ParseKernelType(const std::string &kernel_type, ...@@ -56,20 +56,19 @@ void KernelBase::ParseKernelType(const std::string &kernel_type,
std::string *op_type, std::string *op_type,
std::string *alias, std::string *alias,
Place *place) { Place *place) {
auto parts = Split(kernel_type, "/"); auto parts = lite::SplitView(kernel_type, '/');
CHECK_EQ(parts.size(), 5u); CHECK_EQ(parts.size(), 5u);
*op_type = parts[0]; *op_type = parts[0];
*alias = parts[1]; *alias = parts[1];
std::string target, precision, layout; const auto &target = parts[2];
const auto &precision = parts[3];
target = parts[2]; const auto &layout = parts[4];
precision = parts[3];
layout = parts[4];
place->target = static_cast<TargetType>(std::atoi(target.c_str())); place->target = static_cast<TargetType>(target.to_digit<int>());
place->precision = static_cast<PrecisionType>(std::atoi(precision.c_str())); place->precision = static_cast<PrecisionType>(precision.to_digit<int>());
place->layout = static_cast<DataLayoutType>(std::atoi(layout.c_str())); place->layout = static_cast<DataLayoutType>(layout.to_digit<int>());
} }
std::string KernelBase::SerializeKernelType(const std::string &op_type, std::string KernelBase::SerializeKernelType(const std::string &op_type,
......
...@@ -12,6 +12,7 @@ else() ...@@ -12,6 +12,7 @@ else()
endif() endif()
lite_cc_test(test_varient SRCS varient_test.cc DEPS utils) lite_cc_test(test_varient SRCS varient_test.cc DEPS utils)
lite_cc_test(test_utils_string SRCS string_test.cc)
lite_cc_library(any SRCS any.cc) lite_cc_library(any SRCS any.cc)
if(LITE_ON_TINY_PUBLISH OR LITE_ON_MODEL_OPTIMIZE_TOOL) if(LITE_ON_TINY_PUBLISH OR LITE_ON_MODEL_OPTIMIZE_TOOL)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdlib.h>
#include <algorithm>
#include <climits>
#include <limits>
#include <system_error> // NOLINT
#ifndef likely
#define likely(x) __builtin_expect((x), 1)
#endif
#ifndef unlikely
#define unlikely(x) __builtin_expect((x), 0)
#endif
namespace paddle {
namespace lite {
namespace utils {
/*
* The `std::string` handle can improve the encapsulation, but
* `const char*` is still needed to improve efficiency in the
* processing of small strings. This source code gives a simple
* implementation of the std::from_chars helper function in the
* C++ 17 standard.
*/
struct from_chars_result {
const char* ptr{nullptr};
std::errc ec{};
};
/*
* Most C++ environments use ASCII as the development character
* set. Array `kAsciiToInt` is the look-up table implementation
* of the following functions:
*
* static inline uint8_t get_char_val(char c) {
* if (likely(c >= '0' && c <= '9'))
* return c - '0';
* if (c >= 'A' && c <= 'Z')
* return 10 + (c - 'A');
* if (c >= 'a' && c <= 'z')
* return 10 + (c - 'a');
* return std::numeric_limits<uint8_t>::max();
* }
*/
constexpr uint8_t kAsciiToInt[256] = {
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
0 /* 0 */, 1, 2, 3, 4, 5,
6, 7, 8, 9, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, 10 /* A */,
12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
10 /* a */, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33,
34, 35, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX, UCHAR_MAX,
UCHAR_MAX, UCHAR_MAX, UCHAR_MAX};
/* function: aton_unsigned
* brief: Convert a string constant to an unsigned integer.
* parameters:
* str(const char*): input string constant.
* len(int): bytes of the string.
* value(T): result value.
* base(int): integer base to use, default to 10.
* return(from_chars_result):
* the error value and the current character pointer.
*/
template <typename T>
from_chars_result aton_unsigned(const char* str,
int len,
T& value, // NOLINT
int base = 10) {
from_chars_result result;
result.ptr = str;
if (unlikely(!str || len <= 0)) {
result.ec = std::errc::invalid_argument;
return result;
}
uint64_t val = 0;
if (unlikely(*str == '-')) {
result.ec = std::errc::result_out_of_range;
return result;
}
if (unlikely(*str == '+')) {
++str;
--len;
}
int i = 0;
for (; i < len; ++i) {
uint8_t cv = kAsciiToInt[reinterpret_cast<const uint8_t&>(str[i])];
if (unlikely(cv >= base)) {
value = static_cast<T>(val);
result.ptr = str + i;
return result;
}
// Handling integer values that may exceed the range represented by the
// basic type.
if (unlikely(i > std::numeric_limits<uint32_t>::digits10 + 1) &&
i == std::numeric_limits<uint64_t>::digits10) {
uint64_t mx = static_cast<uint64_t>(std::numeric_limits<T>::max());
if (val > mx / 10 || mx - (val * base) < cv) {
value = static_cast<T>(std::numeric_limits<T>::max());
result.ec = std::errc::result_out_of_range;
return result;
}
}
if (likely(i != 10)) {
val *= base;
}
val += cv;
}
if (unlikely(i > std::numeric_limits<T>::digits10 + 1 ||
(i > std::numeric_limits<T>::digits10 &&
val > static_cast<uint64_t>(std::numeric_limits<T>::max())))) {
value = static_cast<T>(std::numeric_limits<T>::max());
result.ec = std::errc::result_out_of_range;
return result;
}
value = static_cast<T>(val);
return result;
}
/* function: aton_signed
* brief: Convert a string constant to an signed integer.
* parameters:
* str(const char*): input string constant.
* len(int): bytes of the string.
* value(T): result value.
* base(int): integer base to use, default to 10.
* return(from_chars_result):
* the error value and the current character pointer.
*/
template <typename T>
from_chars_result aton_signed(const char* str,
int len,
T& value, // NOLINT
int base = 10) {
from_chars_result result;
result.ptr = str;
if (unlikely(!str || len <= 0)) {
result.ec = std::errc::invalid_argument;
return result;
}
uint64_t val = 0;
bool negative = (*str == '-');
if (negative || *str == '+') {
++str;
--len;
}
int i = 0;
for (; i < len; ++i) {
uint8_t cv = kAsciiToInt[reinterpret_cast<const uint8_t&>(str[i])];
if (unlikely(cv >= base)) {
value = static_cast<T>(val);
result.ptr = str + i;
return result;
}
if (likely(i != 0)) {
val *= base;
}
val += cv;
}
if (likely(!negative)) {
if (unlikely(i > std::numeric_limits<T>::digits10 + 1 ||
(i > std::numeric_limits<T>::digits10 &&
val > static_cast<int64_t>(std::numeric_limits<T>::max())))) {
value = static_cast<T>(std::numeric_limits<T>::max());
result.ec = std::errc::result_out_of_range;
return result;
}
value = static_cast<T>(val);
return result;
}
int64_t ret{static_cast<int64_t>(val)};
if (negative) {
ret *= -1;
}
if (i > std::numeric_limits<T>::digits10 + 1 ||
ret < static_cast<int64_t>(std::numeric_limits<T>::min())) {
value = static_cast<T>(std::numeric_limits<T>::min());
result.ec = std::errc::result_out_of_range;
return result;
}
value = static_cast<T>(ret);
return result;
}
/* function: aton_float
* brief: Convert a string constant to a float digit.
* parameters:
* str(const char*): input string constant.
* len(int): bytes of the string.
* value(T): result value.
* return(from_chars_result):
* the error value and the current character pointer.
*/
template <typename T>
from_chars_result aton_float(const char* str, int len, T& value) { // NOLINT
from_chars_result result;
result.ptr = str;
const uint8_t base = 10;
if (unlikely(!str || len <= 0)) {
result.ec = std::errc::invalid_argument;
return result;
}
uint64_t lval = 0;
uint64_t rval = 0;
uint64_t rdiv = 1;
bool negative = *str == '-';
if (negative || *str == '+') {
++str;
--len;
}
ssize_t dot_pos = -1;
int i = 0;
for (; i < len; ++i) {
char c = str[i];
if ('.' == c) {
dot_pos = i;
++i;
break;
}
uint8_t cv = kAsciiToInt[reinterpret_cast<const uint8_t&>(c)];
if (unlikely(cv >= base)) {
value = static_cast<T>(lval);
result.ptr = str + i;
return result;
}
if (i != 0) {
lval *= 10;
}
lval += cv;
}
double val{static_cast<double>(lval)};
if (-1 != dot_pos) {
for (; i < len; ++i) {
uint8_t cv = kAsciiToInt[reinterpret_cast<const uint8_t&>(str[i])];
if (unlikely(cv >= base)) {
result.ptr = str + i;
return result;
}
if (i - dot_pos > 1) {
rval *= 10.0;
}
rval += cv;
rdiv *= 10;
}
val += static_cast<double>(rval) / rdiv;
}
if (!negative && val > static_cast<double>(std::numeric_limits<T>::max())) {
value = static_cast<T>(std::numeric_limits<T>::max());
result.ec = std::errc::result_out_of_range;
return result;
}
if (!negative) {
value = static_cast<T>(val);
return result;
}
val *= -1;
if (val < static_cast<double>(-std::numeric_limits<T>::max())) {
value = static_cast<T>(std::numeric_limits<T>::min());
result.ec = std::errc::result_out_of_range;
return result;
}
value = static_cast<T>(val);
return result;
}
// To simplify the number of interfaces, using template type
// deduction here.
template <typename T>
from_chars_result from_chars(const char* first,
const char* last,
T& value, // NOLINT
int base = 10) = delete;
#define UNSIGNED_FROM_CHARS_INSTANCE(T) \
template <> \
inline from_chars_result from_chars<T>( \
const char* first, const char* last, T& value, int base) { \
return aton_unsigned(first, last - first, value, base); \
}
#define SIGNED_FROM_CHARS_INSTANCE(T) \
template <> \
inline from_chars_result from_chars<T>( \
const char* first, const char* last, T& value, int base) { \
return aton_signed(first, last - first, value, base); \
}
#define FLOAT_FROM_CHARS_INSTANCE(T) \
template <> \
inline from_chars_result from_chars<T>( \
const char* first, const char* last, T& value, int base) { \
return aton_float(first, last - first, value); \
}
UNSIGNED_FROM_CHARS_INSTANCE(uint8_t);
UNSIGNED_FROM_CHARS_INSTANCE(uint16_t);
UNSIGNED_FROM_CHARS_INSTANCE(uint32_t);
UNSIGNED_FROM_CHARS_INSTANCE(uint64_t);
SIGNED_FROM_CHARS_INSTANCE(int8_t);
SIGNED_FROM_CHARS_INSTANCE(int16_t);
SIGNED_FROM_CHARS_INSTANCE(int32_t);
SIGNED_FROM_CHARS_INSTANCE(int64_t);
FLOAT_FROM_CHARS_INSTANCE(double);
FLOAT_FROM_CHARS_INSTANCE(float);
#undef FLOAT_FROM_CHARS_INSTANCE
#undef SIGNED_FROM_CHARS_INSTANCE
#undef UNSIGNED_FROM_CHARS_INSTANCE
} // namespace utils
} // namespace lite
} // namespace paddle
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <memory> // For std::unique_ptr #include <memory> // For std::unique_ptr
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/utils/charconv.h"
#include "lite/utils/replace_stl/stream.h" #include "lite/utils/replace_stl/stream.h"
namespace paddle { namespace paddle {
...@@ -134,5 +135,51 @@ static std::vector<T> Split(const std::string& original, ...@@ -134,5 +135,51 @@ static std::vector<T> Split(const std::string& original,
return results; return results;
} }
class StringView {
public:
StringView(std::string::const_iterator begin, std::string::const_iterator end)
: begin_(begin), end_(end) {}
size_t size() const { return static_cast<size_t>(end_ - begin_); }
std::string::const_iterator begin() const { return begin_; }
std::string::const_iterator end() const { return end_; }
operator std::string() const { return std::string(begin_, end_); }
template <typename T>
T to_digit() const {
T result;
utils::from_chars(&*begin_, &*end_, result);
return result;
}
private:
std::string::const_iterator begin_;
std::string::const_iterator end_;
};
static std::vector<StringView> SplitView(const std::string& str,
char delimiter = ' ') {
enum State { inSpace, inToken };
State state = inSpace;
std::vector<StringView> result;
std::string::const_iterator pTokenBegin{};
for (auto it = str.begin(); it != str.end(); ++it) {
const State newState = (*it == delimiter ? inSpace : inToken);
if (newState != state) {
switch (newState) {
case inSpace:
result.push_back(StringView(pTokenBegin, it));
break;
case inToken:
pTokenBegin = it;
}
}
state = newState;
}
if (state == inToken) {
result.push_back(StringView(pTokenBegin, str.end()));
}
return result;
}
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/utils/string.h"
#include <gtest/gtest.h>
#include <string>
#include "lite/utils/cp_logging.h"
namespace paddle {
namespace lite {
namespace utils {
namespace {
void check_stoi(const std::string& str, int base = 10) {
int result;
auto status = from_chars(str.data(), str.data() + str.size(), result, base);
CHECK_EQ(result, std::stoi(str, 0, base));
CHECK(status.ec == std::errc());
}
void check_stol(const std::string& str, int base = 10) {
int64_t result;
auto status = from_chars(str.data(), str.data() + str.size(), result, base);
CHECK_EQ(result, std::stol(str, 0, base));
CHECK(status.ec == std::errc());
}
void check_stof(const std::string& str) {
float result;
auto status = from_chars(str.data(), str.data() + str.size(), result);
CHECK(std::abs(result - std::stof(str) < 0.0001));
CHECK(status.ec == std::errc());
}
void check_stod(const std::string& str) {
double result;
auto status = from_chars(str.data(), str.data() + str.size(), result);
CHECK(std::abs(result - std::stod(str) < 0.0001));
CHECK(status.ec == std::errc());
}
} // namespace
TEST(from_chars, test) {
check_stoi("10");
check_stoi("-128");
check_stoi("A", 16);
check_stol("100");
check_stol("-128");
check_stol("AA", 16);
check_stof("10.10");
check_stof("-10.10");
check_stof("-3.1415926");
check_stod("10.10");
check_stod("-10.10");
check_stod("-123.12345678");
}
TEST(StringView, Split) {
const std::string str("conv2d/def/4/1/1");
const std::vector<StringView> result = lite::SplitView(str, '/');
CHECK_EQ(static_cast<std::string>(result[0]), "conv2d");
CHECK_EQ(static_cast<std::string>(result[1]), "def");
CHECK_EQ(static_cast<std::string>(result[2]), "4");
CHECK_EQ(result[2].to_digit<int32_t>(), 4);
CHECK_EQ(static_cast<std::string>(result[3]), "1");
CHECK_EQ(result[3].to_digit<float>(), 1.0f);
CHECK_EQ(static_cast<std::string>(result[4]), "1");
CHECK_EQ(result[4].to_digit<uint32_t>(), 1u);
}
} // namespace utils
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册