提交 9113b257 编写于 作者: J Jonathan Hseu 提交者: TensorFlower Gardener

Change path functions to operate on URIs rather than paths.

- Move the URI parsing and creation functions to path.h so that we don't
  depend on core/platform from core/lib/io.
- Fixes https://github.com/tensorflow/tensorflow/issues/5316
Change: 138023841
上级 7117b4f5
......@@ -64,7 +64,7 @@ class TestFileSystem : public NullFileSystem {
std::unique_ptr<ReadOnlyMemoryRegion>* result) override {
float val = 0;
StringPiece scheme, host, path;
ParseURI(fname, &scheme, &host, &path);
io::ParseURI(fname, &scheme, &host, &path);
// For the tests create in-memory regions with float values equal to the
// region name.
if (path == "/2") {
......
......@@ -14,6 +14,7 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/scanner.h"
#include "tensorflow/core/lib/strings/strcat.h"
namespace tensorflow {
......@@ -49,11 +50,14 @@ string JoinPathImpl(std::initializer_list<StringPiece> paths) {
return result;
}
// Return the parts of the path, split on the final "/". If there is no
// "/" in the path, the first part of the output is empty and the second
// is the input. If the only "/" in the path is the first character, it is
// the first part of the output.
std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) {
// Return the parts of the URI, split on the final "/" in the path. If there is
// no "/" in the path, the first part of the output is the scheme and host, and
// the second is the path. If the only "/" in the path is the first character,
// it is included in the first part of the output.
std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
StringPiece scheme, host, path;
ParseURI(uri, &scheme, &host, &path);
auto pos = path.rfind('/');
#ifdef PLATFORM_WINDOWS
if (pos == StringPiece::npos)
......@@ -61,15 +65,17 @@ std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) {
#endif
// Handle the case with no '/' in 'path'.
if (pos == StringPiece::npos)
return std::make_pair(StringPiece(path.data(), 0), path);
return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()),
path);
// Handle the case with a single leading '/' in 'path'.
if (pos == 0)
return std::make_pair(StringPiece(path.data(), 1),
StringPiece(path.data() + 1, path.size() - 1));
return std::make_pair(
StringPiece(uri.begin(), path.begin() + 1 - uri.begin()),
StringPiece(path.data() + 1, path.size() - 1));
return std::make_pair(
StringPiece(path.data(), pos),
StringPiece(uri.begin(), path.begin() + pos - uri.begin()),
StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
}
......@@ -185,5 +191,42 @@ string CleanPath(StringPiece unclean_path) {
return path;
}
void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
StringPiece* path) {
// 0. Parse scheme
// Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
// TODO(keveman): Allow "+" and "-" in the scheme.
if (!strings::Scanner(remaining)
.One(strings::Scanner::LETTER)
.Many(strings::Scanner::LETTER_DIGIT_DOT)
.StopCapture()
.OneLiteral("://")
.GetResult(&remaining, scheme)) {
// If there's no scheme, assume the entire string is a path.
*scheme = StringPiece(remaining.begin(), 0);
*host = StringPiece(remaining.begin(), 0);
*path = remaining;
return;
}
// 1. Parse host
if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
// No path, so the rest of the URI is the host.
*host = remaining;
*path = StringPiece(remaining.end(), 0);
return;
}
// 2. The rest is the path
*path = remaining;
}
string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
if (scheme.empty()) {
return path.ToString();
}
return strings::StrCat(scheme, "://", host, path);
}
} // namespace io
} // namespace tensorflow
......@@ -74,6 +74,21 @@ StringPiece Extension(StringPiece path);
// string manipulation, completely independent of process state.
string CleanPath(StringPiece path);
// Populates the scheme, host, and path from a URI. scheme, host, and path are
// guaranteed by this function to point into the contents of uri, even if
// empty.
//
// Corner cases:
// - If the URI is invalid, scheme and host are set to empty strings and the
// passed string is assumed to be a path
// - If the URI omits the path (e.g. file://host), then the path is left empty.
void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host,
StringPiece* path);
// Creates a URI from a scheme, host, and path. If the scheme is empty, we just
// return the path.
string CreateURI(StringPiece scheme, StringPiece host, StringPiece path);
} // namespace io
} // namespace tensorflow
......
......@@ -45,6 +45,8 @@ TEST(PathTest, IsAbsolutePath) {
}
TEST(PathTest, Dirname) {
EXPECT_EQ("hdfs://127.0.0.1:9000/",
Dirname("hdfs://127.0.0.1:9000/train.csv.tfrecords"));
EXPECT_EQ("/hello", Dirname("/hello/"));
EXPECT_EQ("/", Dirname("/hello"));
EXPECT_EQ("hello", Dirname("hello/world"));
......@@ -97,5 +99,47 @@ TEST(PathTest, CleanPath) {
EXPECT_EQ("../../bar", CleanPath("foo/../../../bar"));
}
#define EXPECT_PARSE_URI(uri, scheme, host, path) \
do { \
StringPiece u(uri); \
StringPiece s, h, p; \
ParseURI(u, &s, &h, &p); \
EXPECT_EQ(scheme, s.ToString()); \
EXPECT_EQ(host, h.ToString()); \
EXPECT_EQ(path, p.ToString()); \
EXPECT_EQ(uri, CreateURI(scheme, host, path)); \
EXPECT_LE(u.begin(), s.begin()); \
EXPECT_GE(u.end(), s.begin()); \
EXPECT_LE(u.begin(), s.end()); \
EXPECT_GE(u.end(), s.end()); \
EXPECT_LE(u.begin(), h.begin()); \
EXPECT_GE(u.end(), h.begin()); \
EXPECT_LE(u.begin(), h.end()); \
EXPECT_GE(u.end(), h.end()); \
EXPECT_LE(u.begin(), p.begin()); \
EXPECT_GE(u.end(), p.begin()); \
EXPECT_LE(u.begin(), p.end()); \
EXPECT_GE(u.end(), p.end()); \
} while (0)
TEST(PathTest, CreateParseURI) {
EXPECT_PARSE_URI("http://foo", "http", "foo", "");
EXPECT_PARSE_URI("/encrypted/://foo", "", "", "/encrypted/://foo");
EXPECT_PARSE_URI("/usr/local/foo", "", "", "/usr/local/foo");
EXPECT_PARSE_URI("file:///usr/local/foo", "file", "", "/usr/local/foo");
EXPECT_PARSE_URI("local.file:///usr/local/foo", "local.file", "",
"/usr/local/foo");
EXPECT_PARSE_URI("a-b:///foo", "", "", "a-b:///foo");
EXPECT_PARSE_URI(":///foo", "", "", ":///foo");
EXPECT_PARSE_URI("9dfd:///foo", "", "", "9dfd:///foo");
EXPECT_PARSE_URI("file:", "", "", "file:");
EXPECT_PARSE_URI("file:/", "", "", "file:/");
EXPECT_PARSE_URI("hdfs://localhost:8020/path/to/file", "hdfs",
"localhost:8020", "/path/to/file");
EXPECT_PARSE_URI("hdfs://localhost:8020", "hdfs", "localhost:8020", "");
EXPECT_PARSE_URI("hdfs://localhost:8020/", "hdfs", "localhost:8020", "/");
}
#undef EXPECT_PARSE_URI
} // namespace io
} // namespace tensorflow
......@@ -81,7 +81,7 @@ Status ParseGcsPath(StringPiece fname, bool empty_object_ok, string* bucket,
return errors::Internal("bucket and object cannot be null.");
}
StringPiece scheme, bucketp, objectp;
ParseURI(fname, &scheme, &bucketp, &objectp);
io::ParseURI(fname, &scheme, &bucketp, &objectp);
if (scheme != "gs") {
return errors::InvalidArgument("GCS path doesn't start with 'gs://': ",
fname);
......
......@@ -70,7 +70,7 @@ Env::Env() : file_system_registry_(new FileSystemRegistryImpl) {}
Status Env::GetFileSystemForFile(const string& fname, FileSystem** result) {
StringPiece scheme, host, path;
ParseURI(fname, &scheme, &host, &path);
io::ParseURI(fname, &scheme, &host, &path);
FileSystem* file_system = file_system_registry_->Lookup(scheme.ToString());
if (!file_system) {
return errors::Unimplemented("File system scheme ", scheme,
......
......@@ -229,35 +229,6 @@ TEST_F(DefaultEnvTest, LocalFileSystem) {
}
}
#define EXPECT_PARSE_URI(uri, scheme, host, path) \
do { \
StringPiece s, h, p; \
ParseURI(uri, &s, &h, &p); \
EXPECT_EQ(scheme, s.ToString()); \
EXPECT_EQ(host, h.ToString()); \
EXPECT_EQ(path, p.ToString()); \
EXPECT_EQ(uri, CreateURI(scheme, host, path)); \
} while (0)
TEST_F(DefaultEnvTest, CreateParseURI) {
EXPECT_PARSE_URI("http://foo", "http", "foo", "");
EXPECT_PARSE_URI("/encrypted/://foo", "", "", "/encrypted/://foo");
EXPECT_PARSE_URI("/usr/local/foo", "", "", "/usr/local/foo");
EXPECT_PARSE_URI("file:///usr/local/foo", "file", "", "/usr/local/foo");
EXPECT_PARSE_URI("local.file:///usr/local/foo", "local.file", "",
"/usr/local/foo");
EXPECT_PARSE_URI("a-b:///foo", "", "", "a-b:///foo");
EXPECT_PARSE_URI(":///foo", "", "", ":///foo");
EXPECT_PARSE_URI("9dfd:///foo", "", "", "9dfd:///foo");
EXPECT_PARSE_URI("file:", "", "", "file:");
EXPECT_PARSE_URI("file:/", "", "", "file:/");
EXPECT_PARSE_URI("hdfs://localhost:8020/path/to/file", "hdfs",
"localhost:8020", "/path/to/file");
EXPECT_PARSE_URI("hdfs://localhost:8020", "hdfs", "localhost:8020", "");
EXPECT_PARSE_URI("hdfs://localhost:8020/", "hdfs", "localhost:8020", "/");
}
#undef EXPECT_PARSE_URI
TEST_F(DefaultEnvTest, SleepForMicroseconds) {
const int64 start = env_->NowMicros();
const int64 sleep_time = 1e6 + 5e5;
......@@ -274,14 +245,14 @@ class TmpDirFileSystem : public NullFileSystem {
public:
bool FileExists(const string& dir) override {
StringPiece scheme, host, path;
ParseURI(dir, &scheme, &host, &path);
io::ParseURI(dir, &scheme, &host, &path);
if (path.empty()) return false;
return Env::Default()->FileExists(io::JoinPath(BaseDir(), path));
}
Status CreateDir(const string& dir) override {
StringPiece scheme, host, path;
ParseURI(dir, &scheme, &host, &path);
io::ParseURI(dir, &scheme, &host, &path);
if (scheme != "tmpdirfs") {
return errors::FailedPrecondition("scheme must be tmpdirfs");
}
......
......@@ -22,7 +22,6 @@ limitations under the License.
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/lib/gtl/stl_util.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/scanner.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/env.h"
......@@ -79,43 +78,6 @@ WritableFile::~WritableFile() {}
FileSystemRegistry::~FileSystemRegistry() {}
void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
StringPiece* path) {
// 0. Parse scheme
// Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
// TODO(keveman): Allow "+" and "-" in the scheme.
if (!strings::Scanner(remaining)
.One(strings::Scanner::LETTER)
.Many(strings::Scanner::LETTER_DIGIT_DOT)
.StopCapture()
.OneLiteral("://")
.GetResult(&remaining, scheme)) {
// If there's no scheme, assume the entire string is a path.
scheme->clear();
host->clear();
*path = remaining;
return;
}
// 1. Parse host
if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
// No path, so the rest of the URI is the host.
*host = remaining;
path->clear();
return;
}
// 2. The rest is the path
*path = remaining;
}
string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
if (scheme.empty()) {
return path.ToString();
}
return strings::StrCat(scheme, "://", host, path);
}
Status FileSystem::GetMatchingPaths(const string& pattern,
std::vector<string>* results) {
results->clear();
......@@ -237,9 +199,9 @@ Status FileSystem::DeleteRecursively(const string& dirname,
Status FileSystem::RecursivelyCreateDir(const string& dirname) {
StringPiece scheme, host, remaining_dir;
ParseURI(dirname, &scheme, &host, &remaining_dir);
io::ParseURI(dirname, &scheme, &host, &remaining_dir);
std::vector<StringPiece> sub_dirs;
while (!FileExists(CreateURI(scheme, host, remaining_dir)) &&
while (!FileExists(io::CreateURI(scheme, host, remaining_dir)) &&
!remaining_dir.empty()) {
// Basename returns "" for / ending dirs.
if (!remaining_dir.ends_with("/")) {
......@@ -255,7 +217,7 @@ Status FileSystem::RecursivelyCreateDir(const string& dirname) {
string built_path = remaining_dir.ToString();
for (const StringPiece sub_dir : sub_dirs) {
built_path = io::JoinPath(built_path, sub_dir);
TF_RETURN_IF_ERROR(CreateDir(CreateURI(scheme, host, built_path)));
TF_RETURN_IF_ERROR(CreateDir(io::CreateURI(scheme, host, built_path)));
}
return Status::OK();
}
......
......@@ -287,19 +287,6 @@ class FileSystemRegistry {
std::vector<string>* schemes) = 0;
};
// Populates the scheme, host, and path from a URI.
//
// Corner cases:
// - If the URI is invalid, scheme and host are set to empty strings and the
// passed string is assumed to be a path
// - If the URI omits the path (e.g. file://host), then the path is left empty.
void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host,
StringPiece* path);
// Creates a URI from a scheme, host, and path. If the scheme is empty, we just
// return the path.
string CreateURI(StringPiece scheme, StringPiece host, StringPiece path);
} // namespace tensorflow
#endif // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_
......@@ -112,7 +112,7 @@ class InterPlanetaryFileSystem : public NullFileSystem {
void ParsePath(const string& name, string* parsed_path) {
StringPiece scheme, host, path;
ParseURI(name, &scheme, &host, &path);
io::ParseURI(name, &scheme, &host, &path);
ASSERT_EQ(scheme, "ipfs");
ASSERT_EQ(host, "solarsystem");
path.Consume("/");
......
......@@ -126,7 +126,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
TF_RETURN_IF_ERROR(hdfs_->status());
StringPiece scheme, namenode, path;
ParseURI(fname, &scheme, &namenode, &path);
io::ParseURI(fname, &scheme, &namenode, &path);
const string nn = namenode.ToString();
hdfsBuilder* builder = hdfs_->hdfsNewBuilder();
......@@ -144,7 +144,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
string HadoopFileSystem::TranslateName(const string& name) const {
StringPiece scheme, namenode, path;
ParseURI(name, &scheme, &namenode, &path);
io::ParseURI(name, &scheme, &namenode, &path);
return path.ToString();
}
......
......@@ -16,6 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_
#define TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/env.h"
namespace tensorflow {
......@@ -63,7 +64,7 @@ class LocalPosixFileSystem : public PosixFileSystem {
public:
string TranslateName(const string& name) const override {
StringPiece scheme, host, path;
ParseURI(name, &scheme, &host, &path);
io::ParseURI(name, &scheme, &host, &path);
return path.ToString();
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册