From ae99a3a3e4013eb2729de17cedf61a07fd4f0193 Mon Sep 17 00:00:00 2001 From: YangLuo Date: Fri, 24 Jul 2020 18:20:41 +0800 Subject: [PATCH] C++ API support for TakeDatasetOp and VOCDatasetOp --- .../ccsrc/minddata/dataset/api/datasets.cc | 125 +++++++- .../ccsrc/minddata/dataset/include/datasets.h | 72 +++++ tests/ut/cpp/dataset/c_api_test.cc | 266 +++++++++++++++++- 3 files changed, 458 insertions(+), 5 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index 92222e541..726527109 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -24,21 +24,25 @@ #include "minddata/dataset/engine/datasetops/source/cifar_op.h" #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" #include "minddata/dataset/engine/datasetops/source/mnist_op.h" +#include "minddata/dataset/engine/datasetops/source/voc_op.h" // Dataset operator headers (in alphabetical order) #include "minddata/dataset/engine/datasetops/batch_op.h" #include "minddata/dataset/engine/datasetops/map_op.h" +#include "minddata/dataset/engine/datasetops/project_op.h" +#include "minddata/dataset/engine/datasetops/rename_op.h" #include "minddata/dataset/engine/datasetops/repeat_op.h" #include "minddata/dataset/engine/datasetops/shuffle_op.h" #include "minddata/dataset/engine/datasetops/skip_op.h" -#include "minddata/dataset/engine/datasetops/project_op.h" +#include "minddata/dataset/engine/datasetops/take_op.h" #include "minddata/dataset/engine/datasetops/zip_op.h" -#include "minddata/dataset/engine/datasetops/rename_op.h" + // Sampler headers (in alphabetical order) #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" #include "minddata/dataset/core/config_manager.h" #include "minddata/dataset/util/random.h" +#include "minddata/dataset/util/path.h" namespace mindspore { namespace dataset { @@ -123,6 +127,16 @@ std::shared_ptr Mnist(std::string dataset_dir, std::shared_ptrValidateParams() ? ds : nullptr; } +// Function to create a VOCDataset. +std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &mode, + const std::map &class_index, bool decode, + std::shared_ptr sampler) { + auto ds = std::make_shared(dataset_dir, task, mode, class_index, decode, sampler); + + // Call derived class validation method. + return ds->ValidateParams() ? ds : nullptr; +} + // FUNCTIONS TO CREATE DATASETS FOR DATASET OPS // (In alphabetical order) @@ -232,6 +246,26 @@ std::shared_ptr Dataset::Skip(int32_t count) { return ds; } +// Function to create a TakeDataset. +std::shared_ptr Dataset::Take(int32_t count) { + // If count is greater than the number of element in dataset or equal to -1, + // all the element in dataset will be taken + if (count == -1) { + return shared_from_this(); + } + + auto ds = std::make_shared(count); + + // Call derived class validation method. + if (!ds->ValidateParams()) { + return nullptr; + } + + ds->children.push_back(shared_from_this()); + + return ds; +} + // Function to create a Zip dataset std::shared_ptr Dataset::Zip(const std::vector> &datasets) { // Default values @@ -392,6 +426,71 @@ std::vector> MnistDataset::Build() { return node_ops; } +// Constructor for VOCDataset +VOCDataset::VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &mode, + const std::map &class_index, bool decode, + std::shared_ptr sampler) + : dataset_dir_(dataset_dir), + task_(task), + mode_(mode), + class_index_(class_index), + decode_(decode), + sampler_(sampler) {} + +bool VOCDataset::ValidateParams() { + Path dir(dataset_dir_); + if (!dir.IsDirectory()) { + MS_LOG(ERROR) << "Invalid dataset path or no dataset path is specified."; + return false; + } + if (task_ == "Segmentation") { + if (!class_index_.empty()) { + MS_LOG(ERROR) << "class_indexing is invalid in Segmentation task."; + return false; + } + Path imagesets_file = dir / "ImageSets" / "Segmentation" / mode_ + ".txt"; + if (!imagesets_file.Exists()) { + MS_LOG(ERROR) << "[Segmentation] imagesets_file is invalid or not exist"; + return false; + } + } else if (task_ == "Detection") { + Path imagesets_file = dir / "ImageSets" / "Main" / mode_ + ".txt"; + if (!imagesets_file.Exists()) { + MS_LOG(ERROR) << "[Detection] imagesets_file is invalid or not exist."; + return false; + } + } else { + MS_LOG(ERROR) << "Invalid task: " << task_; + return false; + } + return true; +} + +// Function to build VOCDataset +std::vector> VOCDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + // If user does not specify Sampler, create a default sampler based on the shuffle variable. + if (sampler_ == nullptr) { + sampler_ = CreateDefaultSampler(); + } + + std::shared_ptr builder = std::make_shared(); + (void)builder->SetDir(dataset_dir_); + (void)builder->SetTask(task_); + (void)builder->SetMode(mode_); + (void)builder->SetNumWorkers(num_workers_); + (void)builder->SetSampler(std::move(sampler_->Build())); + (void)builder->SetDecode(decode_); + (void)builder->SetClassIndex(class_index_); + + std::shared_ptr op; + RETURN_EMPTY_IF_ERROR(builder->Build(&op)); + node_ops.push_back(op); + return node_ops; +} + // DERIVED DATASET CLASSES LEAF-NODE DATASETS // (In alphabetical order) @@ -580,6 +679,28 @@ bool SkipDataset::ValidateParams() { return true; } +// Constructor for TakeDataset +TakeDataset::TakeDataset(int32_t count) : take_count_(count) {} + +// Function to build the TakeOp +std::vector> TakeDataset::Build() { + // A vector containing shared pointer to the Dataset Ops that this object will create + std::vector> node_ops; + + node_ops.push_back(std::make_shared(take_count_, connector_que_size_)); + return node_ops; +} + +// Function to validate the parameters for TakeDataset +bool TakeDataset::ValidateParams() { + if (take_count_ < -1) { + MS_LOG(ERROR) << "Take: Invalid input, take_count: " << take_count_; + return false; + } + + return true; +} + // Function to build ZipOp ZipDataset::ZipDataset() {} diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index 14f8233ef..5af8c648e 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -45,6 +45,7 @@ class Cifar10Dataset; class Cifar100Dataset; class ImageFolderDataset; class MnistDataset; +class VOCDataset; // Dataset Op classes (in alphabetical order) class BatchDataset; class MapDataset; @@ -53,6 +54,7 @@ class RenameDataset; class RepeatDataset; class ShuffleDataset; class SkipDataset; +class TakeDataset; class ZipDataset; /// \brief Function to create a Cifar10 Dataset @@ -96,6 +98,24 @@ std::shared_ptr ImageFolder(std::string dataset_dir, bool de /// \return Shared pointer to the current MnistDataset std::shared_ptr Mnist(std::string dataset_dir, std::shared_ptr sampler = nullptr); +/// \brief Function to create a VOCDataset +/// \notes The generated dataset has multi-columns : +/// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], +/// ['difficult', dtype=uint32], ['truncate', dtype=uint32]]. +/// - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection" +/// \param[in] mode Set the data list txt file to be readed +/// \param[in] class_indexing A str-to-int mapping from label name to index +/// \param[in] decode Decode the images after reading +/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler` +/// will be used to randomly iterate the entire dataset +/// \return Shared pointer to the current Dataset +std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task = "Segmentation", + const std::string &mode = "train", + const std::map &class_index = {}, bool decode = false, + std::shared_ptr sampler = nullptr); + /// \class Dataset datasets.h /// \brief A base class to represent a dataset in the data pipeline. class Dataset : public std::enable_shared_from_this { @@ -192,6 +212,12 @@ class Dataset : public std::enable_shared_from_this { /// \return Shared pointer to the current SkipDataset std::shared_ptr Skip(int32_t count); + /// \brief Function to create a TakeDataset + /// \notes Takes count elements in this dataset. + /// \param[in] count Number of elements the dataset to be taken. + /// \return Shared pointer to the current Dataset + std::shared_ptr Take(int32_t count = -1); + /// \brief Function to create a Zip Dataset /// \notes Applies zip to the dataset /// \param[in] datasets A list of shared pointer to the datasets that we want to zip @@ -300,6 +326,32 @@ class MnistDataset : public Dataset { std::shared_ptr sampler_; }; +class VOCDataset : public Dataset { + public: + /// \brief Constructor + VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &mode, + const std::map &class_index, bool decode, std::shared_ptr sampler); + + /// \brief Destructor + ~VOCDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::vector> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + std::string dataset_dir_; + std::string task_; + std::string mode_; + std::map class_index_; + bool decode_; + std::shared_ptr sampler_; +}; + class BatchDataset : public Dataset { public: /// \brief Constructor @@ -446,6 +498,26 @@ class SkipDataset : public Dataset { int32_t skip_count_; }; +class TakeDataset : public Dataset { + public: + /// \brief Constructor + explicit TakeDataset(int32_t count); + + /// \brief Destructor + ~TakeDataset() = default; + + /// \brief a base class override function to create the required runtime dataset op objects for this class + /// \return shared pointer to the list of newly created DatasetOps + std::vector> Build() override; + + /// \brief Parameters validation + /// \return bool true if all the params are valid + bool ValidateParams() override; + + private: + int32_t take_count_; +}; + class ZipDataset : public Dataset { public: /// \brief Constructor diff --git a/tests/ut/cpp/dataset/c_api_test.cc b/tests/ut/cpp/dataset/c_api_test.cc index 8544c5c56..a8b7530ad 100644 --- a/tests/ut/cpp/dataset/c_api_test.cc +++ b/tests/ut/cpp/dataset/c_api_test.cc @@ -29,13 +29,20 @@ #include "minddata/dataset/include/transforms.h" #include "minddata/dataset/include/iterator.h" #include "minddata/dataset/core/constants.h" +#include "minddata/dataset/core/tensor_shape.h" +#include "minddata/dataset/core/tensor.h" #include "minddata/dataset/include/samplers.h" +#include "minddata/dataset/engine/datasetops/source/voc_op.h" + using namespace mindspore::dataset::api; using mindspore::MsLogLevel::ERROR; using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; using mindspore::dataset::Tensor; +using mindspore::dataset::TensorShape; +using mindspore::dataset::TensorImpl; +using mindspore::dataset::DataType; using mindspore::dataset::Status; using mindspore::dataset::BorderType; @@ -638,8 +645,96 @@ TEST_F(MindDataTestPipeline, TestSkipDatasetError1) { EXPECT_EQ(ds, nullptr); } -TEST_F(MindDataTestPipeline, TestCifar10Dataset) { +TEST_F(MindDataTestPipeline, TestTakeDatasetDefault) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetDefault."; + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 7)); + EXPECT_NE(ds, nullptr); + + // Create a Take operation on ds, dafault count = -1 + ds = ds->Take(); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + MS_LOG(INFO) << "Number of rows: " << i; + + // Expect 7 rows + EXPECT_EQ(i, 7); + + // Manually terminate the pipeline + iter->Stop(); +} +TEST_F(MindDataTestPipeline, TestTakeDatasetNormal) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetNormal."; + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 8)); + EXPECT_NE(ds, nullptr); + + // Create a Take operation on ds + ds = ds->Take(5); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + MS_LOG(INFO) << "Number of rows: " << i; + + // Expect 5 rows + EXPECT_EQ(i, 5); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestTakeDatasetError1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetError1."; + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Take operation on ds with invalid count input + int32_t count = -5; + ds = ds->Take(count); + // Expect nullptr for invalid input take_count + EXPECT_EQ(ds, nullptr); +} + +TEST_F(MindDataTestPipeline, TestCifar10Dataset) { // Create a Cifar10 Dataset std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; std::shared_ptr ds = Cifar10(folder_path, RandomSampler(false, 10)); @@ -939,7 +1034,7 @@ TEST_F(MindDataTestPipeline, TestZipSuccess) { TEST_F(MindDataTestPipeline, TestZipFail) { // We expect this test to fail because we are the both datasets we are zipping have "image" and "label" columns // and zip doesn't accept datasets with same column names - + // Create an ImageFolder Dataset std::string folder_path = datasets_root_path_ + "/testPK/data/"; std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); @@ -1028,4 +1123,169 @@ TEST_F(MindDataTestPipeline, TestRenameFail) { // Create a Rename operation on ds ds = ds->Rename({"image", "label"}, {"col2"}); EXPECT_EQ(ds, nullptr); -} \ No newline at end of file +} + +TEST_F(MindDataTestPipeline, TestVOCSegmentation) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentation."; + // Create a VOC Dataset + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::shared_ptr ds = VOC(folder_path, "Segmentation", "train", {}, false, SequentialSampler(0, 3)); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // Check if VOCOp read correct images/targets + using Tensor = mindspore::dataset::Tensor; + std::string expect_file[] = {"32", "33", "39", "32", "33", "39"}; + uint64_t i = 0; + while (row.size() != 0) { + auto image = row["image"]; + auto target = row["target"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + MS_LOG(INFO) << "Tensor target shape: " << target->shape(); + + std::shared_ptr expect_image; + Tensor::CreateFromFile(folder_path + "/JPEGImages/" + expect_file[i] + ".jpg", &expect_image); + EXPECT_EQ(*image, *expect_image); + + std::shared_ptr expect_target; + Tensor::CreateFromFile(folder_path + "/SegmentationClass/" + expect_file[i] + ".png", &expect_target); + EXPECT_EQ(*target, *expect_target); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 6); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestVOCSegmentationError1) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCSegmentationError1."; + // Create a VOC Dataset + std::map class_index; + class_index["car"] = 0; + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::shared_ptr ds = VOC(folder_path, "Segmentation", "train", class_index, false, RandomSampler(false, 6)); + + // Expect nullptr for segmentation task with class_index + EXPECT_EQ(ds, nullptr); +} + +TEST_F(MindDataTestPipeline, TestVOCInvalidTaskOrMode) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCInvalidTaskOrMode."; + // Create a VOC Dataset + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::shared_ptr ds_1 = VOC(folder_path, "Classification", "train", {}, false, SequentialSampler(0, 3)); + // Expect nullptr for invalid task + EXPECT_EQ(ds_1, nullptr); + + std::shared_ptr ds_2 = VOC(folder_path, "Segmentation", "validation", {}, false, RandomSampler(false, 4)); + // Expect nullptr for invalid mode + EXPECT_EQ(ds_2, nullptr); +} + +TEST_F(MindDataTestPipeline, TestVOCDetection) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCDetection."; + // Create a VOC Dataset + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::shared_ptr ds = VOC(folder_path, "Detection", "train", {}, false, SequentialSampler(0, 4)); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // Check if VOCOp read correct images/labels + std::string expect_file[] = {"15", "32", "33", "39"}; + uint32_t expect_num[] = {5, 5, 4, 3}; + uint64_t i = 0; + while (row.size() != 0) { + auto image = row["image"]; + auto label = row["label"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + MS_LOG(INFO) << "Tensor label shape: " << label->shape(); + + std::shared_ptr expect_image; + Tensor::CreateFromFile(folder_path + "/JPEGImages/" + expect_file[i] + ".jpg", &expect_image); + EXPECT_EQ(*image, *expect_image); + + std::shared_ptr expect_label; + Tensor::CreateFromMemory(TensorShape({1, 1}), DataType(DataType::DE_UINT32), nullptr, &expect_label); + expect_label->SetItemAt({0, 0}, expect_num[i]); + EXPECT_EQ(*label, *expect_label); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestVOCClassIndex) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCClassIndex."; + // Create a VOC Dataset + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::map class_index; + class_index["car"] = 0; + class_index["cat"] = 1; + class_index["train"] = 9; + + std::shared_ptr ds = VOC(folder_path, "Detection", "train", class_index, false, SequentialSampler(0, 6)); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // Check if VOCOp read correct labels + // When we provide class_index, label of ["car","cat","train"] become [0,1,9] + std::shared_ptr expect_label; + Tensor::CreateFromMemory(TensorShape({1, 1}), DataType(DataType::DE_UINT32), nullptr, &expect_label); + + uint32_t expect[] = {9, 9, 9, 1, 1, 0}; + uint64_t i = 0; + while (row.size() != 0) { + auto image = row["image"]; + auto label = row["label"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + MS_LOG(INFO) << "Tensor label shape: " << label->shape(); + expect_label->SetItemAt({0, 0}, expect[i]); + EXPECT_EQ(*label, *expect_label); + + iter->GetNextRow(&row); + i++; + } + + EXPECT_EQ(i, 6); + + // Manually terminate the pipeline + iter->Stop(); +} -- GitLab