diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index 0e6090f128ee11d913f2d60e2dce1897b2007f41..f0d042d9cec9647109bce8f3a33d197029170f87 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -61,11 +61,19 @@ namespace api { } while (false) // Function to create the iterator, which will build and launch the execution tree. -std::shared_ptr Dataset::CreateIterator() { +std::shared_ptr Dataset::CreateIterator(std::vector columns) { std::shared_ptr iter; try { + auto ds = shared_from_this(); + + // The specified columns will be selected from the dataset and passed down the pipeline + // in the order specified, other columns will be discarded. + if (!columns.empty()) { + ds = ds->Project(columns); + } + iter = std::make_shared(); - Status rc = iter->BuildAndLaunchTree(shared_from_this()); + Status rc = iter->BuildAndLaunchTree(ds); if (rc.IsError()) { MS_LOG(ERROR) << "CreateIterator failed." << rc; return nullptr; @@ -629,13 +637,13 @@ bool VOCDataset::ValidateParams() { } Path imagesets_file = dir / "ImageSets" / "Segmentation" / mode_ + ".txt"; if (!imagesets_file.Exists()) { - MS_LOG(ERROR) << "[Segmentation] imagesets_file is invalid or not exist"; + MS_LOG(ERROR) << "Invalid mode: " << mode_ << ", file \"" << imagesets_file << "\" is not exists!"; return false; } } else if (task_ == "Detection") { Path imagesets_file = dir / "ImageSets" / "Main" / mode_ + ".txt"; if (!imagesets_file.Exists()) { - MS_LOG(ERROR) << "[Detection] imagesets_file is invalid or not exist."; + MS_LOG(ERROR) << "Invalid mode: " << mode_ << ", file \"" << imagesets_file << "\" is not exists!"; return false; } } else { @@ -655,18 +663,33 @@ std::vector> VOCDataset::Build() { sampler_ = CreateDefaultSampler(); } - std::shared_ptr builder = std::make_shared(); - (void)builder->SetDir(dataset_dir_); - (void)builder->SetTask(task_); - (void)builder->SetMode(mode_); - (void)builder->SetNumWorkers(num_workers_); - (void)builder->SetSampler(std::move(sampler_->Build())); - (void)builder->SetDecode(decode_); - (void)builder->SetClassIndex(class_index_); + auto schema = std::make_unique(); + VOCOp::TaskType task_type_; - std::shared_ptr op; - RETURN_EMPTY_IF_ERROR(builder->Build(&op)); - node_ops.push_back(op); + if (task_ == "Segmentation") { + task_type_ = VOCOp::TaskType::Segmentation; + RETURN_EMPTY_IF_ERROR(schema->AddColumn( + ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); + RETURN_EMPTY_IF_ERROR(schema->AddColumn( + ColDescriptor(std::string(kColumnTarget), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); + } else if (task_ == "Detection") { + task_type_ = VOCOp::TaskType::Detection; + RETURN_EMPTY_IF_ERROR(schema->AddColumn( + ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); + RETURN_EMPTY_IF_ERROR(schema->AddColumn( + ColDescriptor(std::string(kColumnBbox), DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1))); + RETURN_EMPTY_IF_ERROR(schema->AddColumn( + ColDescriptor(std::string(kColumnLabel), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + RETURN_EMPTY_IF_ERROR(schema->AddColumn( + ColDescriptor(std::string(kColumnDifficult), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + RETURN_EMPTY_IF_ERROR(schema->AddColumn( + ColDescriptor(std::string(kColumnTruncate), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + } + + std::shared_ptr voc_op; + voc_op = std::make_shared(task_type_, mode_, dataset_dir_, class_index_, num_workers_, rows_per_buffer_, + connector_que_size_, decode_, std::move(schema), std::move(sampler_->Build())); + node_ops.push_back(voc_op); return node_ops; } diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc index e60ddc76436f768f50707c992f85385f64c102a0..4703ba532d7eeab9b71953944889bdabecebdd13 100644 --- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc +++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc @@ -30,6 +30,19 @@ void Iterator::GetNextRow(TensorMap *row) { } } +// Get the next row from the data pipeline. +void Iterator::GetNextRow(TensorVec *row) { + TensorRow tensor_row; + Status rc = iterator_->FetchNextTensorRow(&tensor_row); + if (rc.IsError()) { + MS_LOG(ERROR) << "GetNextRow: Failed to get next row."; + row->clear(); + } + // Generate a vector as return + row->clear(); + std::copy(tensor_row.begin(), tensor_row.end(), std::back_inserter(*row)); +} + // Shut down the data pipeline. void Iterator::Stop() { // Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_. diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc index 6847f74f0aae4b60a304c8862da2f9a9c77692a9..cec690c5031abdc4b413a71560d1cafaea6ab989 100644 --- a/mindspore/ccsrc/minddata/dataset/api/transforms.cc +++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc @@ -138,8 +138,9 @@ std::shared_ptr RandomColorAdjust(std::vector // Function to create RandomCropOperation. std::shared_ptr RandomCrop(std::vector size, std::vector padding, - bool pad_if_needed, std::vector fill_value) { - auto op = std::make_shared(size, padding, pad_if_needed, fill_value); + bool pad_if_needed, std::vector fill_value, + BorderType padding_mode) { + auto op = std::make_shared(size, padding, pad_if_needed, fill_value, padding_mode); // Input validation if (!op->ValidateParams()) { return nullptr; @@ -453,8 +454,12 @@ std::shared_ptr RandomColorAdjustOperation::Build() { // RandomCropOperation RandomCropOperation::RandomCropOperation(std::vector size, std::vector padding, bool pad_if_needed, - std::vector fill_value) - : size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {} + std::vector fill_value, BorderType padding_mode) + : size_(size), + padding_(padding), + pad_if_needed_(pad_if_needed), + fill_value_(fill_value), + padding_mode_(padding_mode) {} bool RandomCropOperation::ValidateParams() { if (size_.empty() || size_.size() > 2) { @@ -493,7 +498,7 @@ std::shared_ptr RandomCropOperation::Build() { } auto tensor_op = std::make_shared(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, - BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b); + padding_mode_, pad_if_needed_, fill_r, fill_g, fill_b); return tensor_op; } diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index eefcb024f62b98088e98e8aa9a87c6d068295bd2..20193e39b2c7223501423e3ffeee778555c5d0be 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -196,8 +196,9 @@ class Dataset : public std::enable_shared_from_this { } /// \brief Function to create an Iterator over the Dataset pipeline + /// \param[in] columns List of columns to be used to specify the order of columns /// \return Shared pointer to the Iterator - std::shared_ptr CreateIterator(); + std::shared_ptr CreateIterator(std::vector columns = {}); /// \brief Function to create a BatchDataset /// \notes Combines batch_size number of consecutive rows into batches @@ -452,6 +453,12 @@ class VOCDataset : public Dataset { bool ValidateParams() override; private: + const std::string kColumnImage = "image"; + const std::string kColumnTarget = "target"; + const std::string kColumnBbox = "bbox"; + const std::string kColumnLabel = "label"; + const std::string kColumnDifficult = "difficult"; + const std::string kColumnTruncate = "truncate"; std::string dataset_dir_; std::string task_; std::string mode_; diff --git a/mindspore/ccsrc/minddata/dataset/include/iterator.h b/mindspore/ccsrc/minddata/dataset/include/iterator.h index 241a8c65b13841624aa6c9bbefaab68b0514c3a2..443cca93eb1ff9af18c8dbfff84c45ea9354023e 100644 --- a/mindspore/ccsrc/minddata/dataset/include/iterator.h +++ b/mindspore/ccsrc/minddata/dataset/include/iterator.h @@ -37,6 +37,7 @@ namespace api { class Dataset; using TensorMap = std::unordered_map>; +using TensorVec = std::vector>; // Abstract class for iterating over the dataset. class Iterator { @@ -53,9 +54,15 @@ class Iterator { Status BuildAndLaunchTree(std::shared_ptr ds); /// \brief Function to get the next row from the data pipeline. + /// \note Type of return data is a map(with column name). /// \param[out] row - the output tensor row. void GetNextRow(TensorMap *row); + /// \brief Function to get the next row from the data pipeline. + /// \note Type of return data is a vector(without column name). + /// \param[out] row - the output tensor row. + void GetNextRow(TensorVec *row); + /// \brief Function to shut down the data pipeline. void Stop(); diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h index dcec3763eaa3e65bb4b61f474b46e495b28ba0d9..32f474b579f863f6e85304393d218fa280465457 100644 --- a/mindspore/ccsrc/minddata/dataset/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h @@ -163,8 +163,8 @@ std::shared_ptr RandomColorAdjust(std::vector /// fill R, G, B channels respectively. /// \return Shared pointer to the current TensorOperation. std::shared_ptr RandomCrop(std::vector size, std::vector padding = {0, 0, 0, 0}, - bool pad_if_needed = false, - std::vector fill_value = {0, 0, 0}); + bool pad_if_needed = false, std::vector fill_value = {0, 0, 0}, + BorderType padding_mode = BorderType::kConstant); /// \brief Function to create a RandomHorizontalFlip TensorOperation. /// \notes Tensor operation to perform random horizontal flip. @@ -354,7 +354,8 @@ class RandomColorAdjustOperation : public TensorOperation { class RandomCropOperation : public TensorOperation { public: RandomCropOperation(std::vector size, std::vector padding = {0, 0, 0, 0}, - bool pad_if_needed = false, std::vector fill_value = {0, 0, 0}); + bool pad_if_needed = false, std::vector fill_value = {0, 0, 0}, + BorderType padding_mode = BorderType::kConstant); ~RandomCropOperation() = default; @@ -367,6 +368,7 @@ class RandomCropOperation : public TensorOperation { std::vector padding_; bool pad_if_needed_; std::vector fill_value_; + BorderType padding_mode_; }; class RandomHorizontalFlipOperation : public TensorOperation { diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt index fedbe408b7b7a8d608fbb18b626bff4415fc64be..790f9d0596543964b3028575f9200e9e72c5487d 100644 --- a/tests/ut/cpp/dataset/CMakeLists.txt +++ b/tests/ut/cpp/dataset/CMakeLists.txt @@ -96,6 +96,7 @@ SET(DE_UT_SRCS c_api_dataset_coco_test.cc c_api_dataset_voc_test.cc c_api_datasets_test.cc + c_api_dataset_iterator_test.cc tensor_op_fusion_pass_test.cc sliding_window_op_test.cc epoch_ctrl_op_test.cc diff --git a/tests/ut/cpp/dataset/c_api_dataset_iterator_test.cc b/tests/ut/cpp/dataset/c_api_dataset_iterator_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..a6fbe7a623ef798ee099f87371fac7a084c8f7c8 --- /dev/null +++ b/tests/ut/cpp/dataset/c_api_dataset_iterator_test.cc @@ -0,0 +1,223 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include "utils/log_adapter.h" +#include "utils/ms_utils.h" +#include "common/common.h" +#include "gtest/gtest.h" +#include "securec.h" +#include "minddata/dataset/include/datasets.h" +#include "minddata/dataset/include/status.h" +#include "minddata/dataset/include/transforms.h" +#include "minddata/dataset/include/iterator.h" +#include "minddata/dataset/core/constants.h" +#include "minddata/dataset/core/tensor_shape.h" +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/include/samplers.h" + +using namespace mindspore::dataset::api; +using mindspore::MsLogLevel::ERROR; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::LogStream; +using mindspore::dataset::Tensor; +using mindspore::dataset::TensorShape; +using mindspore::dataset::TensorImpl; +using mindspore::dataset::DataType; +using mindspore::dataset::Status; +using mindspore::dataset::BorderType; +using mindspore::dataset::dsize_t; + +class MindDataTestPipeline : public UT::DatasetOpTesting { + protected: +}; + + +TEST_F(MindDataTestPipeline, TestIteratorOneColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIteratorOneColumn."; + // Create a Mnist Dataset + std::string folder_path = datasets_root_path_ + "/testMnistData/"; + std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 4)); + EXPECT_NE(ds, nullptr); + + // Create a Batch operation on ds + int32_t batch_size = 2; + ds = ds->Batch(batch_size); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // Only select "image" column and drop others + std::vector columns = {"image"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector> row; + iter->GetNextRow(&row); + TensorShape expect({2, 28, 28, 1}); + + uint64_t i = 0; + while (row.size() != 0) { + for (auto &v : row) { + MS_LOG(INFO) << "image shape:" << v->shape(); + EXPECT_EQ(expect, v->shape()); + } + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 2); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestIteratorTwoColumns) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIteratorTwoColumns."; + // Create a VOC Dataset + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::shared_ptr ds = VOC(folder_path, "Detection", "train", {}, false, SequentialSampler(0, 4)); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // Only select "image" and "bbox" column + std::vector columns = {"image", "bbox"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector> row; + iter->GetNextRow(&row); + std::vector expect = {TensorShape({173673}), TensorShape({1, 4}), + TensorShape({173673}), TensorShape({1, 4}), + TensorShape({147025}), TensorShape({1, 4}), + TensorShape({211653}), TensorShape({1, 4})}; + + uint64_t i = 0; + uint64_t j = 0; + while (row.size() != 0) { + MS_LOG(INFO) << "row[0]:" << row[0]->shape() << ", row[1]:" << row[1]->shape(); + EXPECT_EQ(2, row.size()); + EXPECT_EQ(expect[j++], row[0]->shape()); + EXPECT_EQ(expect[j++], row[1]->shape()); + iter->GetNextRow(&row); + i++; + j = (j == expect.size()) ? 0 : j; + } + + EXPECT_EQ(i, 8); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestIteratorEmptyColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIteratorEmptyColumn."; + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, RandomSampler(false, 5)); + EXPECT_NE(ds, nullptr); + + // Create a Rename operation on ds + ds = ds->Rename({"image", "label"}, {"col1", "col2"}); + EXPECT_NE(ds, nullptr); + + // No columns are specified, use all columns + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector> row; + iter->GetNextRow(&row); + TensorShape expect0({32, 32, 3}); + TensorShape expect1({}); + + uint64_t i = 0; + while (row.size() != 0) { + MS_LOG(INFO) << "row[0]:" << row[0]->shape() << ", row[1]:" << row[1]->shape(); + EXPECT_EQ(expect0, row[0]->shape()); + EXPECT_EQ(expect1, row[1]->shape()); + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 5); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestIteratorReOrder) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIteratorReOrder."; + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, SequentialSampler(false, 4)); + EXPECT_NE(ds, nullptr); + + // Create a Take operation on ds + ds = ds->Take(2); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // Reorder "image" and "label" column + std::vector columns = {"label", "image"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::vector> row; + iter->GetNextRow(&row); + TensorShape expect0({32, 32, 3}); + TensorShape expect1({}); + + // Check if we will catch "label" before "image" in row + std::vector expect = {"label", "image"}; + uint64_t i = 0; + while (row.size() != 0) { + MS_LOG(INFO) << "row[0]:" << row[0]->shape() << ", row[1]:" << row[1]->shape(); + EXPECT_EQ(expect1, row[0]->shape()); + EXPECT_EQ(expect0, row[1]->shape()); + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 2); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestIteratorWrongColumn) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIteratorOneColumn."; + // Create a Mnist Dataset + std::string folder_path = datasets_root_path_ + "/testMnistData/"; + std::shared_ptr ds = Mnist(folder_path, RandomSampler(false, 4)); + EXPECT_NE(ds, nullptr); + + // Pass wrong column name + std::vector columns = {"digital"}; + std::shared_ptr iter = ds->CreateIterator(columns); + EXPECT_EQ(iter, nullptr); +}