提交 07ae2d1d 编写于 作者: J Jonathan Hseu 提交者: TensorFlower Gardener

Fix issue where we wouldn't descend into subdirectories when listing GCS runs

in TensorBoard.

- Fixes GCS so that it still includes prefixes even if items is empty.
- Change list_directory to use GetChildren() instead of GetMatchingFiles.
  GetMatchingFiles in GCS has a quirk where, unlike other filesystems, it
  doesn't include directories. There's no obvious fix given the GCS API.
Change: 137885363
上级 176a7727
......@@ -847,35 +847,34 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname,
Json::Value root;
TF_RETURN_IF_ERROR(ParseJson(response_piece, &root));
const auto items = root.get("items", Json::Value::null);
if (items == Json::Value::null) {
// Empty results.
return Status::OK();
}
if (!items.isArray()) {
return errors::Internal("Expected an array 'items' in the GCS response.");
}
for (size_t i = 0; i < items.size(); i++) {
const auto item = items.get(i, Json::Value::null);
if (!item.isObject()) {
return errors::Internal(
"Unexpected JSON format: 'items' should be a list of objects.");
}
string name;
TF_RETURN_IF_ERROR(GetStringValue(item, "name", &name));
// The names should be relative to the 'dirname'. That means the
// 'object_prefix', which is part of 'dirname', should be removed from the
// beginning of 'name'.
StringPiece relative_path(name);
if (!relative_path.Consume(object_prefix)) {
if (items != Json::Value::null) {
if (!items.isArray()) {
return errors::Internal(
strings::StrCat("Unexpected response: the returned file name ",
name, " doesn't match the prefix ", object_prefix));
}
if (!relative_path.empty() || include_self_directory_marker) {
result->emplace_back(relative_path.ToString());
"Expected an array 'items' in the GCS response.");
}
if (++retrieved_results >= max_results) {
return Status::OK();
for (size_t i = 0; i < items.size(); i++) {
const auto item = items.get(i, Json::Value::null);
if (!item.isObject()) {
return errors::Internal(
"Unexpected JSON format: 'items' should be a list of objects.");
}
string name;
TF_RETURN_IF_ERROR(GetStringValue(item, "name", &name));
// The names should be relative to the 'dirname'. That means the
// 'object_prefix', which is part of 'dirname', should be removed from
// the beginning of 'name'.
StringPiece relative_path(name);
if (!relative_path.Consume(object_prefix)) {
return errors::Internal(strings::StrCat(
"Unexpected response: the returned file name ", name,
" doesn't match the prefix ", object_prefix));
}
if (!relative_path.empty() || include_self_directory_marker) {
result->emplace_back(relative_path.ToString());
}
if (++retrieved_results >= max_results) {
return Status::OK();
}
}
}
const auto prefixes = root.get("prefixes", Json::Value::null);
......
......@@ -579,6 +579,24 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) {
EXPECT_FALSE(fs.FileExists("gs://bucket2"));
}
TEST(GcsFileSystemTest, GetChildren_NoItems) {
std::vector<HttpRequest*> requests({new FakeHttpRequest(
"Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
"fields=items%2Fname%2Cprefixes%2CnextPageToken&delimiter=%2F&prefix="
"path%2F\n"
"Auth Token: fake_token\n",
"{\"prefixes\": [\"path/subpath/\"]}")});
GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
std::unique_ptr<HttpRequest::Factory>(
new FakeHttpRequestFactory(&requests)),
0 /* read ahead bytes */, 5 /* max upload attempts */);
std::vector<string> children;
TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children));
EXPECT_EQ(std::vector<string>({"subpath/"}), children);
}
TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
std::vector<HttpRequest*> requests({new FakeHttpRequest(
"Uri: https://www.googleapis.com/storage/v1/b/bucket/o?"
......
......@@ -66,6 +66,16 @@ void WriteStringToFile(const string& filename, const string& file_content,
}
}
std::vector<string> GetChildren(const string& dir, TF_Status* out_status) {
std::vector<string> results;
tensorflow::Status status = tensorflow::Env::Default()->GetChildren(
dir, &results);
if (!status.ok()) {
Set_TF_Status_from_Status(out_status, status);
}
return results;
}
std::vector<string> GetMatchingFiles(const string& filename,
TF_Status* out_status) {
std::vector<string> results;
......@@ -233,6 +243,7 @@ inline void DeleteFile(const string& filename, TF_Status* out_status);
string ReadFileToString(const string& filename, TF_Status* out_status);
void WriteStringToFile(const string& filename, const string& file_content,
TF_Status* out_status);
std::vector<string> GetChildren(const string& dir, TF_Status* out_status);
std::vector<string> GetMatchingFiles(const string& filename,
TF_Status* out_status);
void CreateDir(const string& dirname, TF_Status* out_status);
......
......@@ -388,11 +388,14 @@ def list_directory(dirname):
"""
if not is_directory(dirname):
raise errors.NotFoundError(None, None, "Could not find directory")
file_list = get_matching_files(os.path.join(compat.as_str_any(dirname), "*"))
return [
compat.as_str_any(pywrap_tensorflow.Basename(compat.as_bytes(filename)))
for filename in file_list
]
with errors.raise_exception_on_not_ok_status() as status:
# Convert each element to string, since the return values of the
# vector of string should be interpreted as strings, not bytes.
return [
compat.as_str_any(filename)
for filename in pywrap_tensorflow.GetChildren(
compat.as_bytes(dirname), status)
]
def walk(top, in_order=True):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册