diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 9898dc083ebb1783a0e2ddd12afaa9c3d5a79e98..47ca1833967ee705d6558b1dad06a6335b30f03a 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(gserver) add_subdirectory(pserver) add_subdirectory(trainer) add_subdirectory(scripts) +add_subdirectory(strings) # Do not build go directory until go cmake is working smoothly. # if(CMAKE_Go_COMPILER) diff --git a/paddle/strings/CMakeLists.txt b/paddle/strings/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e55eecd484c0e218ecd51bbd19b3eb4f6f92a25 --- /dev/null +++ b/paddle/strings/CMakeLists.txt @@ -0,0 +1,2 @@ +cc_library(stringpiece SRCS stringpiece.cc) +cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags) diff --git a/paddle/strings/stringpiece.cc b/paddle/strings/stringpiece.cc new file mode 100644 index 0000000000000000000000000000000000000000..4f788c6ecd56166aa0520db139a6cb1a29fae7cd --- /dev/null +++ b/paddle/strings/stringpiece.cc @@ -0,0 +1,135 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/strings/stringpiece.h" + +// #include +#include + +#include +#include + +namespace paddle { + +StringPiece::StringPiece() : data_(NULL), size_(0) {} + +StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) { + if (d == NULL && n != 0) + throw std::invalid_argument( + "StringPiece requires len to be 0 for NULL data"); +} + +StringPiece::StringPiece(const char* s) : data_(s) { + size_ = (s == NULL) ? 0 : strlen(s); +} + +StringPiece::StringPiece(const std::string& s) + : data_(s.data()), size_(s.size()) {} + +int Compare(StringPiece a, StringPiece b) { + const size_t min_len = (a.len() < b.len()) ? a.len() : b.len(); + int r = memcmp(a.data(), b.data(), min_len); + if (r == 0) { + if (a.len() < b.len()) + return -1; + else if (a.len() > b.len()) + return 1; + } + return r; +} + +bool operator==(StringPiece x, StringPiece y) { + return ((x.len() == y.len()) && + (x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0)); +} + +bool operator!=(StringPiece x, StringPiece y) { return !(x == y); } + +bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; } +bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; } + +bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; } +bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; } + +bool HasPrefix(StringPiece s, StringPiece x) { + return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0)); +} + +bool HasSuffix(StringPiece s, StringPiece x) { + return ((s.len() >= x.len()) && + (memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0)); +} + +StringPiece SkipPrefix(StringPiece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than StringPiece length"); + return StringPiece(s.data() + n, s.len() - n); +} + +StringPiece SkipSuffix(StringPiece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than StringPiece length"); + return StringPiece(s.data(), s.len() - n); +} + +StringPiece TrimPrefix(StringPiece s, StringPiece x) { + return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s; +} + +StringPiece TrimSuffix(StringPiece s, StringPiece x) { + return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s; +} + +bool Contains(StringPiece s, StringPiece sub) { + return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end(); +} + +size_t Index(StringPiece s, StringPiece sub) { + auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end()); + return e != s.end() ? e - s.data() : StringPiece::npos; +} + +size_t Find(StringPiece s, char c, size_t pos) { + if (pos >= s.len()) { + return StringPiece::npos; + } + const char* result = + reinterpret_cast(memchr(s.data() + pos, c, s.len() - pos)); + return result != nullptr ? result - s.data() : StringPiece::npos; +} + +size_t RFind(StringPiece s, char c, size_t pos) { + if (s.len() == 0) return StringPiece::npos; + for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data(); + p--) { + if (*p == c) { + return p - s.data(); + } + } + return StringPiece::npos; +} + +StringPiece SubStr(StringPiece s, size_t pos, size_t n) { + if (pos > s.len()) pos = s.len(); + if (n > s.len() - pos) n = s.len() - pos; + return StringPiece(s.data() + pos, n); +} + +std::ostream& operator<<(std::ostream& o, StringPiece piece) { + return o << piece.ToString(); +} + +} // namespace paddle diff --git a/paddle/strings/stringpiece.h b/paddle/strings/stringpiece.h new file mode 100644 index 0000000000000000000000000000000000000000..413b65d38412126deef10436770cdd888de6c60c --- /dev/null +++ b/paddle/strings/stringpiece.h @@ -0,0 +1,110 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include + +#include +#include + +namespace paddle { + +// StringPiece points into a std::string object but doesn't own the +// string. It is for efficient access to strings. Like Go's string +// type. Not that StringPiece doesn't mutate the underlying string, +// so it is thread-safe given that the underlying string doesn't +// change. Because StringPiece contains a little data members, and +// its syntax is simple as it doesn't own/manage the string, it is +// cheap to construct StringPieces and pass them around. +class StringPiece { +public: + static const size_t npos = static_cast(-1); + + // We provide non-explicit singleton constructors so users can + // pass in a "const char*" or a "string" wherever a "StringPiece" + // is expected. These contructors ensure that if data_ is NULL, + // size_ is 0. + StringPiece(); + StringPiece(const char* d, size_t n); + StringPiece(const char* d); + StringPiece(const std::string& s); + + const char* data() const { return data_; } + size_t len() const { return size_; } + + char operator[](size_t n) const { + assert(n < len()); + return data_[n]; + } + + // StringPiece doesn't own the string, so both iterator and const + // iterator are const char* indeed. + typedef const char* const_iterator; + typedef const char* iterator; + iterator begin() const { return data_; } + iterator end() const { return data_ + size_; } + + // Return a string that contains the copy of the referenced data. + std::string ToString() const { return std::string(data_, size_); } + +private: + const char* data_; + size_t size_; + + // Intentionally copyable +}; + +int Compare(StringPiece a, StringPiece b); + +bool operator==(StringPiece x, StringPiece y); +bool operator!=(StringPiece x, StringPiece y); +bool operator<(StringPiece x, StringPiece y); +bool operator>(StringPiece x, StringPiece y); +bool operator<=(StringPiece x, StringPiece y); +bool operator>=(StringPiece x, StringPiece y); + +bool HasPrefix(StringPiece s, StringPiece prefix); +bool HasSuffix(StringPiece s, StringPiece suffix); + +StringPiece SkipPrefix(StringPiece s, size_t n); +StringPiece SkipSuffix(StringPiece s, size_t n); + +// Skip the prefix (or suffix) if it matches with the string. +StringPiece TrimPrefix(StringPiece s, StringPiece prefix); +StringPiece TrimSuffix(StringPiece s, StringPiece suffix); + +// Returns if s contains sub. Any s except for empty s contains an +// empty sub. +bool Contains(StringPiece s, StringPiece sub); + +// Return the first occurrence of sub in s, or npos. If both s and +// sub is empty, it returns npos; otherwise, if only sub is empty, it +// returns 0. +size_t Index(StringPiece s, StringPiece sub); + +// Return the first occurrence of c in s[pos:end], or npos. +size_t Find(StringPiece s, char c, size_t pos); + +// Search range is [0..pos] inclusive. If pos == npos, search everything. +size_t RFind(StringPiece s, char c, size_t pos); + +StringPiece SubStr(StringPiece s, size_t pos, size_t n); + +// allow StringPiece to be logged +std::ostream& operator<<(std::ostream& o, StringPiece piece); + +} // namespace paddle diff --git a/paddle/strings/stringpiece_test.cc b/paddle/strings/stringpiece_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..2ba66a04f641c3457efa713383484491a213668f --- /dev/null +++ b/paddle/strings/stringpiece_test.cc @@ -0,0 +1,293 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/strings/stringpiece.h" + +#include + +#include "gtest/gtest.h" + +TEST(StringPiece, Construct) { + { + paddle::StringPiece s; + EXPECT_EQ(NULL, s.data()); + EXPECT_EQ(0U, s.len()); + } + { EXPECT_THROW(paddle::StringPiece s(NULL, 10000U), std::invalid_argument); } + { + paddle::StringPiece s(NULL); + EXPECT_EQ(0U, s.len()); + } + { + std::string a; + EXPECT_EQ(0U, a.size()); + paddle::StringPiece s(a); + EXPECT_EQ(0U, s.len()); + } +} + +TEST(StringPiece, CopyAndAssign) { + paddle::StringPiece empty; + EXPECT_EQ(0U, empty.len()); + + paddle::StringPiece a("hello"); + paddle::StringPiece b = a; + EXPECT_EQ(b.len(), strlen("hello")); + EXPECT_EQ(a, b); + + std::string storage("hello"); + paddle::StringPiece c(storage); + EXPECT_EQ(a, c); + EXPECT_NE(a.data(), c.data()); +} + +TEST(StringPiece, Compare) { + { + paddle::StringPiece a("hello"); + paddle::StringPiece b("world"); + EXPECT_TRUE(a != b); + EXPECT_FALSE(a == b); + EXPECT_TRUE(a < b); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(a > b); + EXPECT_FALSE(a >= b); + EXPECT_LT(Compare(a, b), 0); + EXPECT_GT(Compare(b, a), 0); + } + { + paddle::StringPiece a, b; + EXPECT_TRUE(a == b); + EXPECT_FALSE(a != b); + EXPECT_FALSE(a < b); + EXPECT_FALSE(a > b); + EXPECT_TRUE(a <= b); + EXPECT_TRUE(a >= b); + EXPECT_EQ(0, Compare(a, b)); + EXPECT_EQ(0, Compare(b, a)); + } +} + +TEST(StringPiece, ToString) { + { + paddle::StringPiece s; + EXPECT_EQ(std::string(""), s.ToString()); + } + { + paddle::StringPiece s(NULL); + EXPECT_EQ(std::string(""), s.ToString()); + } + { + paddle::StringPiece s("hello"); + EXPECT_EQ(std::string("hello"), s.ToString()); + } +} + +TEST(StringPiece, HasPrefixSuffix) { + using paddle::HasPrefix; + using paddle::HasSuffix; + { + paddle::StringPiece s; + EXPECT_FALSE(HasPrefix(s, "something")); + EXPECT_TRUE(HasPrefix(s, "")); + EXPECT_FALSE(HasSuffix(s, "something")); + EXPECT_TRUE(HasSuffix(s, "")); + } + { + paddle::StringPiece s("app"); + EXPECT_TRUE(HasPrefix(s, "")); + EXPECT_TRUE(HasPrefix(s, "a")); + EXPECT_TRUE(HasPrefix(s, "ap")); + EXPECT_TRUE(HasPrefix(s, "app")); + + EXPECT_TRUE(HasSuffix(s, "")); + EXPECT_TRUE(HasSuffix(s, "p")); + EXPECT_TRUE(HasSuffix(s, "pp")); + EXPECT_TRUE(HasSuffix(s, "app")); + } +} + +TEST(StringPiece, SkipPrefixSuffix) { + using paddle::SkipPrefix; + using paddle::SkipSuffix; + { + paddle::StringPiece s; + EXPECT_EQ("", SkipPrefix(s, 0)); + EXPECT_THROW(SkipPrefix(s, 1), std::invalid_argument); + + EXPECT_EQ("", SkipSuffix(s, 0)); + EXPECT_THROW(SkipSuffix(s, 1), std::invalid_argument); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ("app", SkipPrefix(s, 0)); + EXPECT_EQ("pp", SkipPrefix(s, 1)); + EXPECT_EQ("p", SkipPrefix(s, 2)); + EXPECT_EQ("", SkipPrefix(s, 3)); + EXPECT_THROW(SkipPrefix(s, 4), std::invalid_argument); + + EXPECT_EQ("app", SkipSuffix(s, 0)); + EXPECT_EQ("ap", SkipSuffix(s, 1)); + EXPECT_EQ("a", SkipSuffix(s, 2)); + EXPECT_EQ("", SkipSuffix(s, 3)); + EXPECT_THROW(SkipSuffix(s, 4), std::invalid_argument); + } +} + +TEST(StringPiece, TrimPrefixSuffix) { + using paddle::TrimPrefix; + using paddle::TrimSuffix; + { + paddle::StringPiece s; + EXPECT_EQ("", TrimPrefix(s, "")); + EXPECT_EQ("", TrimPrefix(s, "something")); + + EXPECT_EQ("", TrimSuffix(s, "")); + EXPECT_EQ("", TrimSuffix(s, "something")); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ("app", TrimPrefix(s, "")); + EXPECT_EQ("pp", TrimPrefix(s, "a")); + EXPECT_EQ("p", TrimPrefix(s, "ap")); + EXPECT_EQ("", TrimPrefix(s, "app")); + EXPECT_EQ("app", TrimPrefix(s, "something")); + + EXPECT_EQ("app", TrimSuffix(s, "")); + EXPECT_EQ("ap", TrimSuffix(s, "p")); + EXPECT_EQ("a", TrimSuffix(s, "pp")); + EXPECT_EQ("", TrimSuffix(s, "app")); + EXPECT_EQ("app", TrimSuffix(s, "something")); + } +} + +TEST(StringPiece, Contains) { + using paddle::Contains; + { + paddle::StringPiece s; + EXPECT_FALSE(Contains(s, "")); + EXPECT_FALSE(Contains(s, "something")); + } + { + paddle::StringPiece s("app"); + EXPECT_TRUE(Contains(s, "")); + EXPECT_TRUE(Contains(s, "a")); + EXPECT_TRUE(Contains(s, "p")); + EXPECT_TRUE(Contains(s, "ap")); + EXPECT_TRUE(Contains(s, "pp")); + EXPECT_TRUE(Contains(s, "app")); + EXPECT_FALSE(Contains(s, "something")); + } +} + +TEST(StringPiece, Index) { + using paddle::Index; + auto npos = paddle::StringPiece::npos; + { + paddle::StringPiece s; + EXPECT_EQ(npos, Index(s, "")); + EXPECT_EQ(npos, Index(s, "something")); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ(0U, Index(s, "")); + EXPECT_EQ(0U, Index(s, "a")); + EXPECT_EQ(1U, Index(s, "p")); + EXPECT_EQ(0U, Index(s, "ap")); + EXPECT_EQ(1U, Index(s, "pp")); + EXPECT_EQ(0U, Index(s, "app")); + EXPECT_EQ(npos, Index(s, "something")); + } +} + +TEST(StringPiece, Find) { + using paddle::Find; + auto npos = paddle::StringPiece::npos; + { + paddle::StringPiece s; + EXPECT_EQ(npos, Find(s, 'a', 0U)); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ(0U, Find(s, 'a', 0U)); + EXPECT_EQ(1U, Find(s, 'p', 0U)); + EXPECT_EQ(1U, Find(s, 'p', 1U)); + EXPECT_EQ(2U, Find(s, 'p', 2U)); + EXPECT_EQ(npos, Find(s, 'z', 2U)); + } +} + +TEST(StringPiece, RFind) { + using paddle::RFind; + auto npos = paddle::StringPiece::npos; + { + paddle::StringPiece s; + EXPECT_EQ(npos, RFind(s, 'a', 0U)); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ(2U, RFind(s, 'p', 2U)); + EXPECT_EQ(0U, RFind(s, 'a', 2U)); + EXPECT_EQ(1U, RFind(s, 'p', 1U)); + EXPECT_EQ(0U, RFind(s, 'a', 0)); + EXPECT_EQ(npos, RFind(s, 'z', 2U)); + } +} + +TEST(StringPiece, SubStr) { + using paddle::SubStr; + { + paddle::StringPiece s; + EXPECT_EQ("", SubStr(s, 0, 0)); + EXPECT_EQ("", SubStr(s, 0, 1)); + EXPECT_EQ("", SubStr(s, 1, 0)); + } + { + paddle::StringPiece s("app"); + EXPECT_EQ("", SubStr(s, 0, 0)); + EXPECT_EQ("", SubStr(s, 1, 0)); + EXPECT_EQ("", SubStr(s, 2, 0)); + EXPECT_EQ("", SubStr(s, 3, 0)); + + EXPECT_EQ("a", SubStr(s, 0, 1)); + EXPECT_EQ("p", SubStr(s, 1, 1)); + EXPECT_EQ("p", SubStr(s, 2, 1)); + EXPECT_EQ("", SubStr(s, 3, 1)); + + EXPECT_EQ("ap", SubStr(s, 0, 2)); + EXPECT_EQ("pp", SubStr(s, 1, 2)); + EXPECT_EQ("p", SubStr(s, 2, 2)); + EXPECT_EQ("", SubStr(s, 3, 2)); + + EXPECT_EQ("app", SubStr(s, 0, 3)); + EXPECT_EQ("pp", SubStr(s, 1, 3)); + EXPECT_EQ("p", SubStr(s, 2, 3)); + EXPECT_EQ("", SubStr(s, 3, 3)); + } +} + +TEST(StringPiece, StreamOutput) { + using paddle::StringPiece; + + std::stringstream o; + o << StringPiece(); + EXPECT_EQ("", o.str()); + + o << StringPiece("hello"); + EXPECT_EQ("hello", o.str()); + + o << StringPiece(); + EXPECT_EQ("hello", o.str()); +}