From 5db4fbe3bbf744ba72d986b4795896b8759031c1 Mon Sep 17 00:00:00 2001 From: Alexey Surkov Date: Tue, 1 Nov 2016 16:15:33 -0800 Subject: [PATCH] Make GetMachingPaths also return directory names. This is now consistent with what other file systems have. Change: 137892031 --- .../core/platform/cloud/gcs_file_system.cc | 28 +++++++++++++++++-- .../platform/cloud/gcs_file_system_test.cc | 5 ++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 4b83bc7cbf1..867acc7d8a3 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -118,6 +118,26 @@ string JoinGcsPath(const string& path, const string& subpath) { return strings::StrCat(MaybeAppendSlash(path), subpath); } +/// \brief Returns the given paths appending all their subfolders. +/// +/// For every path X in the list, every subfolder in X is added to the +/// resulting list. +/// For example: +/// - for 'a/b/c/d' it will append 'a', 'a/b' and 'a/b/c' +/// - for 'a/b/c/' it will append 'a', 'a/b' and 'a/b/c' +std::set AddAllSubpaths(const std::vector& paths) { + std::set result; + result.insert(paths.begin(), paths.end()); + for (const string& path : paths) { + StringPiece subpath = io::Dirname(path); + while (!subpath.empty()) { + result.emplace(subpath.ToString()); + subpath = io::Dirname(subpath); + } + } + return result; +} + Status ParseJson(StringPiece json, Json::Value* result) { Json::Reader reader; if (!reader.parse(json.ToString(), *result)) { @@ -784,9 +804,11 @@ Status GcsFileSystem::GetMatchingPaths(const string& pattern, GetChildrenBounded(dir, UINT64_MAX, &all_files, true /* recursively */, false /* include_self_directory_marker */)); - // Match all obtained files to the input pattern. - for (const auto& f : all_files) { - const string& full_path = io::JoinPath(dir, f); + const auto& files_and_folders = AddAllSubpaths(all_files); + + // Match all obtained paths to the input pattern. + for (const auto& path : files_and_folders) { + const string& full_path = io::JoinPath(dir, path); if (Env::Default()->MatchPath(full_path, pattern)) { results->push_back(full_path); } diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index da7f53825ad..00bf3314705 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -767,8 +767,9 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) { std::vector result; TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result)); - EXPECT_EQ(std::vector( - {"gs://bucket/path/file1.txt", "gs://bucket/path/file3.txt"}), + EXPECT_EQ(std::vector({"gs://bucket/path/file1.txt", + "gs://bucket/path/file3.txt", + "gs://bucket/path/subpath"}), result); } -- GitLab