From 71bfb400a21e6fa46c604e1f4dbdcd9bed4c690a Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 21 Aug 2019 10:40:09 -0700 Subject: [PATCH] Move scanner from lib/strings to core/platform PiperOrigin-RevId: 264641416 --- .../contrib/makefile/proto_text_cc_files.txt | 2 +- tensorflow/core/BUILD | 4 + tensorflow/core/lib/strings/BUILD | 10 +- tensorflow/core/lib/strings/scanner.h | 226 +--------------- tensorflow/core/platform/BUILD | 15 +- .../core/{lib/strings => platform}/scanner.cc | 2 +- tensorflow/core/platform/scanner.h | 245 ++++++++++++++++++ .../{lib/strings => platform}/scanner_test.cc | 2 +- 8 files changed, 268 insertions(+), 238 deletions(-) rename tensorflow/core/{lib/strings => platform}/scanner.cc (96%) create mode 100644 tensorflow/core/platform/scanner.h rename tensorflow/core/{lib/strings => platform}/scanner_test.cc (99%) diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt index 0e557818fbd..c3974206731 100644 --- a/tensorflow/contrib/makefile/proto_text_cc_files.txt +++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt @@ -33,7 +33,6 @@ tensorflow/core/lib/random/weighted_picker.cc tensorflow/core/lib/strings/numbers.cc tensorflow/core/lib/strings/ordered_code.cc tensorflow/core/lib/strings/proto_text_util.cc -tensorflow/core/lib/strings/scanner.cc tensorflow/core/lib/strings/strcat.cc tensorflow/core/lib/wav/wav_io.cc tensorflow/core/platform/cpu_info.cc @@ -52,6 +51,7 @@ tensorflow/core/platform/posix/port.cc tensorflow/core/platform/posix/posix_file_system.cc tensorflow/core/platform/protobuf.cc tensorflow/core/platform/protobuf_util.cc +tensorflow/core/platform/scanner.cc tensorflow/core/platform/setround.cc tensorflow/core/platform/stringprintf.cc tensorflow/core/platform/str_util.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 90436b76dfe..f3e18818b57 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2362,6 +2362,7 @@ tf_proto_library_cc( LIB_INTERNAL_PRIVATE_HEADERS = [ "framework/resource_handle.h", "//tensorflow/core/platform:legacy_lib_internal_headers", + "//tensorflow/core/platform:scanner.h", "//tensorflow/core/platform:str_util.h", "//tensorflow/core/lib/bfloat16:bfloat16.h", "//tensorflow/core/lib/gtl:legacy_lib_gtl_all_headers", @@ -2486,6 +2487,7 @@ cc_library( "//tensorflow/core/platform:annotation", "//tensorflow/core/platform:cpu_info", "//tensorflow/core/platform:platform_strings", + "//tensorflow/core/platform:scanner", "//tensorflow/core/platform:stringprintf", "//tensorflow/core/platform:str_util", "//tensorflow/core/platform/default/build_config:platformlib", @@ -3784,6 +3786,7 @@ tf_cc_tests( "//tensorflow/core/platform:net_test.cc", "//tensorflow/core/platform:port_test.cc", "//tensorflow/core/platform:profile_utils/cpu_utils_test.cc", + "//tensorflow/core/platform:scanner_test.cc", "//tensorflow/core/platform:stacktrace_handler_test.cc", "//tensorflow/core/platform:str_util_test.cc", "//tensorflow/core/platform:stringpiece_test.cc", @@ -3799,6 +3802,7 @@ tf_cc_tests( ":protos_all_cc", ":test", ":test_main", + "//tensorflow/core/platform:scanner", "//tensorflow/core/platform:str_util", "//tensorflow/core/platform:stringpiece", "//tensorflow/core/platform:stringprintf", diff --git a/tensorflow/core/lib/strings/BUILD b/tensorflow/core/lib/strings/BUILD index 28c7c7ae22f..9302a8f10bc 100644 --- a/tensorflow/core/lib/strings/BUILD +++ b/tensorflow/core/lib/strings/BUILD @@ -23,13 +23,8 @@ cc_library( cc_library( name = "scanner", - srcs = ["scanner.cc"], hdrs = ["scanner.h"], - deps = [ - ":string_utils", - "//tensorflow/core/platform:macros", - "//tensorflow/core/platform:stringpiece", - ], + deps = ["//tensorflow/core/platform:scanner"], ) cc_library( @@ -87,7 +82,6 @@ filegroup( "ordered_code.cc", "proto_serialization.cc", "proto_text_util.cc", - "scanner.cc", "strcat.cc", ], visibility = ["//tensorflow/core:__pkg__"], @@ -100,7 +94,6 @@ filegroup( "numbers_test.cc", "ordered_code_test.cc", "proto_serialization_test.cc", - "scanner_test.cc", "strcat_test.cc", ], visibility = ["//tensorflow/core:__pkg__"], @@ -153,7 +146,6 @@ filegroup( srcs = [ "base64_test.cc", "numbers_test.cc", - "scanner_test.cc", "strcat_test.cc", ], visibility = ["//tensorflow/core:__pkg__"], diff --git a/tensorflow/core/lib/strings/scanner.h b/tensorflow/core/lib/strings/scanner.h index 38ccf9fd268..349f6091e2f 100644 --- a/tensorflow/core/lib/strings/scanner.h +++ b/tensorflow/core/lib/strings/scanner.h @@ -16,230 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LIB_STRINGS_SCANNER_H_ #define TENSORFLOW_LIB_STRINGS_SCANNER_H_ -#include - -#include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/stringpiece.h" - -namespace tensorflow { -namespace strings { - -// Scanner provides simplified string parsing, in which a string is parsed as a -// series of scanning calls (e.g. One, Any, Many, OneLiteral, Eos), and then -// finally GetResult is called. If GetResult returns true, then it also returns -// the remaining characters and any captured substring. -// -// The range to capture can be controlled with RestartCapture and StopCapture; -// by default, all processed characters are captured. -class Scanner { - public: - // Classes of characters. Each enum name is to be read as the union of the - // parts - e.g., class LETTER_DIGIT means the class includes all letters and - // all digits. - // - // LETTER means ascii letter a-zA-Z. - // DIGIT means ascii digit: 0-9. - enum CharClass { - // NOTE: When adding a new CharClass, update the AllCharClasses ScannerTest - // in scanner_test.cc - ALL, - DIGIT, - LETTER, - LETTER_DIGIT, - LETTER_DIGIT_DASH_UNDERSCORE, - LETTER_DIGIT_DASH_DOT_SLASH, // SLASH is / only, not backslash - LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE, // SLASH is / only, not backslash - LETTER_DIGIT_DOT, - LETTER_DIGIT_DOT_PLUS_MINUS, - LETTER_DIGIT_DOT_UNDERSCORE, - LETTER_DIGIT_UNDERSCORE, - LOWERLETTER, - LOWERLETTER_DIGIT, - LOWERLETTER_DIGIT_UNDERSCORE, - NON_ZERO_DIGIT, - SPACE, - UPPERLETTER, - RANGLE, - }; - - explicit Scanner(StringPiece source) : cur_(source) { RestartCapture(); } - - // Consume the next character of the given class from input. If the next - // character is not in the class, then GetResult will ultimately return false. - Scanner& One(CharClass clz) { - if (cur_.empty() || !Matches(clz, cur_[0])) { - return Error(); - } - cur_.remove_prefix(1); - return *this; - } - - // Consume the next s.size() characters of the input, if they match . If - // they don't match , this is a no-op. - Scanner& ZeroOrOneLiteral(StringPiece s) { - str_util::ConsumePrefix(&cur_, s); - return *this; - } - - // Consume the next s.size() characters of the input, if they match . If - // they don't match , then GetResult will ultimately return false. - Scanner& OneLiteral(StringPiece s) { - if (!str_util::ConsumePrefix(&cur_, s)) { - error_ = true; - } - return *this; - } - - // Consume characters from the input as long as they match . Zero - // characters is still considered a match, so it will never cause GetResult to - // return false. - Scanner& Any(CharClass clz) { - while (!cur_.empty() && Matches(clz, cur_[0])) { - cur_.remove_prefix(1); - } - return *this; - } - - // Shorthand for One(clz).Any(clz). - Scanner& Many(CharClass clz) { return One(clz).Any(clz); } - - // Reset the capture start point. - // - // Later, when GetResult is called and if it returns true, the capture - // returned will start at the position at the time this was called. - Scanner& RestartCapture() { - capture_start_ = cur_.data(); - capture_end_ = nullptr; - return *this; - } - - // Stop capturing input. - // - // Later, when GetResult is called and if it returns true, the capture - // returned will end at the position at the time this was called. - Scanner& StopCapture() { - capture_end_ = cur_.data(); - return *this; - } - - // If not at the input of input, then GetResult will ultimately return false. - Scanner& Eos() { - if (!cur_.empty()) error_ = true; - return *this; - } - - // Shorthand for Any(SPACE). - Scanner& AnySpace() { return Any(SPACE); } - - // This scans input until is reached. is NOT consumed. - Scanner& ScanUntil(char end_ch) { - ScanUntilImpl(end_ch, false); - return *this; - } - - // This scans input until is reached. is NOT consumed. - // Backslash escape sequences are skipped. - // Used for implementing quoted string scanning. - Scanner& ScanEscapedUntil(char end_ch) { - ScanUntilImpl(end_ch, true); - return *this; - } - - // Return the next character that will be scanned, or if there - // are no more characters to scan. - // Note that if a scan operation has failed (so GetResult() returns false), - // then the value of Peek may or may not have advanced since the scan - // operation that failed. - char Peek(char default_value = '\0') const { - return cur_.empty() ? default_value : cur_[0]; - } - - // Returns false if there are no remaining characters to consume. - int empty() const { return cur_.empty(); } - - // Returns true if the input string successfully matched. When true is - // returned, the remaining string is returned in and the captured - // string returned in , if non-NULL. - bool GetResult(StringPiece* remaining = nullptr, - StringPiece* capture = nullptr); - - private: - void ScanUntilImpl(char end_ch, bool escaped); - - Scanner& Error() { - error_ = true; - return *this; - } - - static bool IsLetter(char ch) { - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); - } - - static bool IsLowerLetter(char ch) { return ch >= 'a' && ch <= 'z'; } - - static bool IsDigit(char ch) { return ch >= '0' && ch <= '9'; } - - static bool IsSpace(char ch) { - return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\v' || ch == '\f' || - ch == '\r'); - } - - static bool Matches(CharClass clz, char ch) { - switch (clz) { - case ALL: - return true; - case DIGIT: - return IsDigit(ch); - case LETTER: - return IsLetter(ch); - case LETTER_DIGIT: - return IsLetter(ch) || IsDigit(ch); - case LETTER_DIGIT_DASH_UNDERSCORE: - return (IsLetter(ch) || IsDigit(ch) || ch == '-' || ch == '_'); - case LETTER_DIGIT_DASH_DOT_SLASH: - return IsLetter(ch) || IsDigit(ch) || ch == '-' || ch == '.' || - ch == '/'; - case LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE: - return (IsLetter(ch) || IsDigit(ch) || ch == '-' || ch == '.' || - ch == '/' || ch == '_'); - case LETTER_DIGIT_DOT: - return IsLetter(ch) || IsDigit(ch) || ch == '.'; - case LETTER_DIGIT_DOT_PLUS_MINUS: - return IsLetter(ch) || IsDigit(ch) || ch == '+' || ch == '-' || - ch == '.'; - case LETTER_DIGIT_DOT_UNDERSCORE: - return IsLetter(ch) || IsDigit(ch) || ch == '.' || ch == '_'; - case LETTER_DIGIT_UNDERSCORE: - return IsLetter(ch) || IsDigit(ch) || ch == '_'; - case LOWERLETTER: - return ch >= 'a' && ch <= 'z'; - case LOWERLETTER_DIGIT: - return IsLowerLetter(ch) || IsDigit(ch); - case LOWERLETTER_DIGIT_UNDERSCORE: - return IsLowerLetter(ch) || IsDigit(ch) || ch == '_'; - case NON_ZERO_DIGIT: - return IsDigit(ch) && ch != '0'; - case SPACE: - return IsSpace(ch); - case UPPERLETTER: - return ch >= 'A' && ch <= 'Z'; - case RANGLE: - return ch == '>'; - } - return false; - } - - StringPiece cur_; - const char* capture_start_ = nullptr; - const char* capture_end_ = nullptr; - bool error_ = false; - - friend class ScannerTest; - TF_DISALLOW_COPY_AND_ASSIGN(Scanner); -}; - -} // namespace strings -} // namespace tensorflow +#include "tensorflow/core/platform/scanner.h" #endif // TENSORFLOW_LIB_STRINGS_SCANNER_H_ diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index ca9ed7ac793..8423cbeb9d9 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -23,8 +23,8 @@ load( "tf_logging_absl_deps", "tf_platform_hdrs", "tf_platform_srcs", - "tf_protobuf_deps", "tf_protobuf_compiler_deps", + "tf_protobuf_deps", ) load( "//tensorflow:tensorflow.bzl", @@ -189,6 +189,17 @@ cc_library( deps = tf_protobuf_compiler_deps(), ) +cc_library( + name = "scanner", + srcs = ["scanner.cc"], + hdrs = ["scanner.h"], + deps = [ + ":macros", + ":str_util", + ":stringpiece", + ], +) + cc_library( name = "stacktrace", srcs = glob(["*/stacktrace.h"]), @@ -307,6 +318,7 @@ filegroup( "cpu_info.cc", "platform_strings.cc", "protobuf.cc", + "scanner.cc", "stringprintf.cc", ], ), @@ -402,6 +414,7 @@ filegroup( "cpu_info.cc", "platform_strings.cc", "protobuf.cc", + "scanner.cc", "stringprintf.cc", "str_util.cc", ], diff --git a/tensorflow/core/lib/strings/scanner.cc b/tensorflow/core/platform/scanner.cc similarity index 96% rename from tensorflow/core/lib/strings/scanner.cc rename to tensorflow/core/platform/scanner.cc index 39a2265aa27..031ccf0a2e8 100644 --- a/tensorflow/core/lib/strings/scanner.cc +++ b/tensorflow/core/platform/scanner.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/lib/strings/scanner.h" +#include "tensorflow/core/platform/scanner.h" namespace tensorflow { namespace strings { diff --git a/tensorflow/core/platform/scanner.h b/tensorflow/core/platform/scanner.h new file mode 100644 index 00000000000..ac93061949b --- /dev/null +++ b/tensorflow/core/platform/scanner.h @@ -0,0 +1,245 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_SCANNER_H_ +#define TENSORFLOW_CORE_PLATFORM_SCANNER_H_ + +#include + +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/str_util.h" +#include "tensorflow/core/platform/stringpiece.h" + +namespace tensorflow { +namespace strings { + +// Scanner provides simplified string parsing, in which a string is parsed as a +// series of scanning calls (e.g. One, Any, Many, OneLiteral, Eos), and then +// finally GetResult is called. If GetResult returns true, then it also returns +// the remaining characters and any captured substring. +// +// The range to capture can be controlled with RestartCapture and StopCapture; +// by default, all processed characters are captured. +class Scanner { + public: + // Classes of characters. Each enum name is to be read as the union of the + // parts - e.g., class LETTER_DIGIT means the class includes all letters and + // all digits. + // + // LETTER means ascii letter a-zA-Z. + // DIGIT means ascii digit: 0-9. + enum CharClass { + // NOTE: When adding a new CharClass, update the AllCharClasses ScannerTest + // in scanner_test.cc + ALL, + DIGIT, + LETTER, + LETTER_DIGIT, + LETTER_DIGIT_DASH_UNDERSCORE, + LETTER_DIGIT_DASH_DOT_SLASH, // SLASH is / only, not backslash + LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE, // SLASH is / only, not backslash + LETTER_DIGIT_DOT, + LETTER_DIGIT_DOT_PLUS_MINUS, + LETTER_DIGIT_DOT_UNDERSCORE, + LETTER_DIGIT_UNDERSCORE, + LOWERLETTER, + LOWERLETTER_DIGIT, + LOWERLETTER_DIGIT_UNDERSCORE, + NON_ZERO_DIGIT, + SPACE, + UPPERLETTER, + RANGLE, + }; + + explicit Scanner(StringPiece source) : cur_(source) { RestartCapture(); } + + // Consume the next character of the given class from input. If the next + // character is not in the class, then GetResult will ultimately return false. + Scanner& One(CharClass clz) { + if (cur_.empty() || !Matches(clz, cur_[0])) { + return Error(); + } + cur_.remove_prefix(1); + return *this; + } + + // Consume the next s.size() characters of the input, if they match . If + // they don't match , this is a no-op. + Scanner& ZeroOrOneLiteral(StringPiece s) { + str_util::ConsumePrefix(&cur_, s); + return *this; + } + + // Consume the next s.size() characters of the input, if they match . If + // they don't match , then GetResult will ultimately return false. + Scanner& OneLiteral(StringPiece s) { + if (!str_util::ConsumePrefix(&cur_, s)) { + error_ = true; + } + return *this; + } + + // Consume characters from the input as long as they match . Zero + // characters is still considered a match, so it will never cause GetResult to + // return false. + Scanner& Any(CharClass clz) { + while (!cur_.empty() && Matches(clz, cur_[0])) { + cur_.remove_prefix(1); + } + return *this; + } + + // Shorthand for One(clz).Any(clz). + Scanner& Many(CharClass clz) { return One(clz).Any(clz); } + + // Reset the capture start point. + // + // Later, when GetResult is called and if it returns true, the capture + // returned will start at the position at the time this was called. + Scanner& RestartCapture() { + capture_start_ = cur_.data(); + capture_end_ = nullptr; + return *this; + } + + // Stop capturing input. + // + // Later, when GetResult is called and if it returns true, the capture + // returned will end at the position at the time this was called. + Scanner& StopCapture() { + capture_end_ = cur_.data(); + return *this; + } + + // If not at the input of input, then GetResult will ultimately return false. + Scanner& Eos() { + if (!cur_.empty()) error_ = true; + return *this; + } + + // Shorthand for Any(SPACE). + Scanner& AnySpace() { return Any(SPACE); } + + // This scans input until is reached. is NOT consumed. + Scanner& ScanUntil(char end_ch) { + ScanUntilImpl(end_ch, false); + return *this; + } + + // This scans input until is reached. is NOT consumed. + // Backslash escape sequences are skipped. + // Used for implementing quoted string scanning. + Scanner& ScanEscapedUntil(char end_ch) { + ScanUntilImpl(end_ch, true); + return *this; + } + + // Return the next character that will be scanned, or if there + // are no more characters to scan. + // Note that if a scan operation has failed (so GetResult() returns false), + // then the value of Peek may or may not have advanced since the scan + // operation that failed. + char Peek(char default_value = '\0') const { + return cur_.empty() ? default_value : cur_[0]; + } + + // Returns false if there are no remaining characters to consume. + int empty() const { return cur_.empty(); } + + // Returns true if the input string successfully matched. When true is + // returned, the remaining string is returned in and the captured + // string returned in , if non-NULL. + bool GetResult(StringPiece* remaining = nullptr, + StringPiece* capture = nullptr); + + private: + void ScanUntilImpl(char end_ch, bool escaped); + + Scanner& Error() { + error_ = true; + return *this; + } + + static bool IsLetter(char ch) { + return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); + } + + static bool IsLowerLetter(char ch) { return ch >= 'a' && ch <= 'z'; } + + static bool IsDigit(char ch) { return ch >= '0' && ch <= '9'; } + + static bool IsSpace(char ch) { + return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\v' || ch == '\f' || + ch == '\r'); + } + + static bool Matches(CharClass clz, char ch) { + switch (clz) { + case ALL: + return true; + case DIGIT: + return IsDigit(ch); + case LETTER: + return IsLetter(ch); + case LETTER_DIGIT: + return IsLetter(ch) || IsDigit(ch); + case LETTER_DIGIT_DASH_UNDERSCORE: + return (IsLetter(ch) || IsDigit(ch) || ch == '-' || ch == '_'); + case LETTER_DIGIT_DASH_DOT_SLASH: + return IsLetter(ch) || IsDigit(ch) || ch == '-' || ch == '.' || + ch == '/'; + case LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE: + return (IsLetter(ch) || IsDigit(ch) || ch == '-' || ch == '.' || + ch == '/' || ch == '_'); + case LETTER_DIGIT_DOT: + return IsLetter(ch) || IsDigit(ch) || ch == '.'; + case LETTER_DIGIT_DOT_PLUS_MINUS: + return IsLetter(ch) || IsDigit(ch) || ch == '+' || ch == '-' || + ch == '.'; + case LETTER_DIGIT_DOT_UNDERSCORE: + return IsLetter(ch) || IsDigit(ch) || ch == '.' || ch == '_'; + case LETTER_DIGIT_UNDERSCORE: + return IsLetter(ch) || IsDigit(ch) || ch == '_'; + case LOWERLETTER: + return ch >= 'a' && ch <= 'z'; + case LOWERLETTER_DIGIT: + return IsLowerLetter(ch) || IsDigit(ch); + case LOWERLETTER_DIGIT_UNDERSCORE: + return IsLowerLetter(ch) || IsDigit(ch) || ch == '_'; + case NON_ZERO_DIGIT: + return IsDigit(ch) && ch != '0'; + case SPACE: + return IsSpace(ch); + case UPPERLETTER: + return ch >= 'A' && ch <= 'Z'; + case RANGLE: + return ch == '>'; + } + return false; + } + + StringPiece cur_; + const char* capture_start_ = nullptr; + const char* capture_end_ = nullptr; + bool error_ = false; + + friend class ScannerTest; + TF_DISALLOW_COPY_AND_ASSIGN(Scanner); +}; + +} // namespace strings +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_SCANNER_H_ diff --git a/tensorflow/core/lib/strings/scanner_test.cc b/tensorflow/core/platform/scanner_test.cc similarity index 99% rename from tensorflow/core/lib/strings/scanner_test.cc rename to tensorflow/core/platform/scanner_test.cc index 1514ab5f761..7537ffce179 100644 --- a/tensorflow/core/lib/strings/scanner_test.cc +++ b/tensorflow/core/platform/scanner_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/lib/strings/scanner.h" +#include "tensorflow/core/platform/scanner.h" #include "tensorflow/core/platform/test.h" -- GitLab