!1126 VOCDataset support object detection function

Merge pull request !1126 from xiefangqi/voc_support_detection

!1126 VOCDataset support object detection function
Merge pull request !1126 from xiefangqi/voc_support_detection
ab031ee9 · mindspore-ci-bot · Gitee · dbac31e7 · c937bad5 · ab031ee9
28 changed file
--- a/cmake/external_libs/tinyxml2.cmake
+++ b/cmake/external_libs/tinyxml2.cmake
+set(tinyxml2_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 -Wno-unused-result")
+set(tinyxml2_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+mindspore_add_pkg(tinyxml2
+        VER 8.0.0
+        LIBS tinyxml2
+        URL https://github.com/leethomason/tinyxml2/archive/8.0.0.tar.gz
+        CMAKE_OPTION -DCMAKE_BUILD_TYPE=Release
+        MD5 5dc535c8b34ee621fe2128f072d275b5)
+include_directories(${tinyxml2_INC})
+add_library(mindspore::tinyxml2 ALIAS tinyxml2::tinyxml2)
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -56,6 +56,7 @@ if (ENABLE_MINDDATA)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/libtiff.cmake)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/opencv.cmake)
    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/sqlite.cmake)
+    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/tinyxml2.cmake)
 endif()

 include(${CMAKE_SOURCE_DIR}/cmake/external_libs/gtest.cmake)

--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -39,6 +39,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Windows")
    set(opencv_LIBPATH ${opencv_LIBPATH}/../bin/)
    set(jpeg_turbo_LIBPATH ${jpeg_turbo_LIBPATH}/../bin/)
    set(sqlite_LIBPATH ${sqlite_LIBPATH}/../bin/)
+    set(tinyxml2_LIBPATH ${tinyxml2_LIBPATH}/../bin/)
 else ()
    set(INSTALL_LIB_DIR "lib")
 endif ()
@@ -82,6 +83,15 @@ if (ENABLE_MINDDATA)
        DESTINATION ${INSTALL_LIB_DIR}
        COMPONENT mindspore
    )
+    file(GLOB_RECURSE TINYXML2_LIB_LIST
+	    ${tinyxml2_LIBPATH}/libtinyxml2*
+    )
+    install(
+	FILES ${TINYXML2_LIB_LIST}
+        DESTINATION ${INSTALL_LIB_DIR}
+        COMPONENT mindspore
+    )
+
 endif ()

 if (ENABLE_CPU)

--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/CMakeLists.txt
@@ -89,7 +89,7 @@ else()
    target_link_libraries(_c_dataengine PRIVATE mindspore::pybind11_module -ldl mindspore::protobuf ${SECUREC_LIBRARY})
 endif()
 target_link_libraries(_c_dataengine PUBLIC mindspore::jpeg_turbo mindspore::opencv_core mindspore::opencv_imgcodecs
-        mindspore::opencv_imgproc)
+        mindspore::opencv_imgproc mindspore::tinyxml2)
 if (ENABLE_GPUQUE)
    target_link_libraries(_c_dataengine PRIVATE gpu_queue
                                     ${CUDNN_PATH}/lib64/libcudnn.so

--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -903,6 +903,8 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *

  std::shared_ptr<VOCOp::Builder> builder = std::make_shared<VOCOp::Builder>();
  (void)builder->SetDir(ToString(args["dataset_dir"]));
+  (void)builder->SetTask(ToString(args["task"]));
+  (void)builder->SetMode(ToString(args["mode"]));
  for (auto arg : args) {
    std::string key = py::str(arg.first);
    py::handle value = arg.second;
@@ -917,6 +919,8 @@ Status DEPipeline::ParseVOCOp(const py::dict &args, std::shared_ptr<DatasetOp> *
        (void)builder->SetSampler(std::move(sampler));
      } else if (key == "decode") {
        (void)builder->SetDecode(ToBool(value));
+      } else if (key == "class_indexing") {
+        (void)builder->SetClassIndex(ToStringMap(value));
      }
    }
  }

--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -55,6 +55,7 @@
 #include "dataset/engine/datasetops/source/tf_reader_op.h"
 #include "dataset/engine/jagged_connector.h"
 #include "dataset/engine/datasetops/source/text_file_op.h"
+#include "dataset/engine/datasetops/source/voc_op.h"
 #include "dataset/kernels/data/to_float16_op.h"
 #include "dataset/util/random.h"
 #include "mindrecord/include/shard_operator.h"
@@ -194,6 +195,13 @@ void bindDatasetOps(py::module *m) {
      THROW_IF_ERROR(TextFileOp::CountAllFileRows(filenames, &count));
      return count;
    });
+  (void)py::class_<VOCOp, DatasetOp, std::shared_ptr<VOCOp>>(*m, "VOCOp")
+    .def_static("get_class_indexing", [](const std::string &dir, const std::string &task_type,
+                                         const std::string &task_mode, const py::dict &dict, int64_t numSamples) {
+      std::map<std::string, int32_t> output_class_indexing;
+      THROW_IF_ERROR(VOCOp::GetClassIndexing(dir, task_type, task_mode, dict, numSamples, &output_class_indexing));
+      return output_class_indexing;
+    });
 }
 void bindTensor(py::module *m) {
  (void)py::class_<GlobalContext>(*m, "GlobalContext")

--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
@@ -15,8 +15,10 @@
 */
 #include "dataset/engine/datasetops/source/voc_op.h"

+#include <algorithm>
 #include <fstream>
 #include <iomanip>
+#include "./tinyxml2.h"
 #include "common/utils.h"
 #include "dataset/core/config_manager.h"
 #include "dataset/core/tensor_shape.h"
@@ -24,8 +26,24 @@
 #include "dataset/engine/db_connector.h"
 #include "dataset/engine/execution_tree.h"

+using tinyxml2::XMLDocument;
+using tinyxml2::XMLElement;
+using tinyxml2::XMLError;
 namespace mindspore {
 namespace dataset {
+const char kColumnImage[] = "image";
+const char kColumnTarget[] = "target";
+const char kColumnAnnotation[] = "annotation";
+const char kJPEGImagesFolder[] = "/JPEGImages/";
+const char kSegmentationClassFolder[] = "/SegmentationClass/";
+const char kAnnotationsFolder[] = "/Annotations/";
+const char kImageSetsSegmentation[] = "/ImageSets/Segmentation/";
+const char kImageSetsMain[] = "/ImageSets/Main/";
+const char kImageExtension[] = ".jpg";
+const char kSegmentationExtension[] = ".png";
+const char kAnnotationExtension[] = ".xml";
+const char kImageSetsExtension[] = ".txt";
+
 VOCOp::Builder::Builder() : builder_decode_(false), builder_num_samples_(0), builder_sampler_(nullptr) {
  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  builder_num_workers_ = cfg->num_parallel_workers();
@@ -39,13 +57,21 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
    builder_sampler_ = std::make_shared<SequentialSampler>();
  }
  builder_schema_ = std::make_unique<DataSchema>();
-  RETURN_IF_NOT_OK(
-    builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
-  RETURN_IF_NOT_OK(
-    builder_schema_->AddColumn(ColDescriptor("target", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
-  *ptr = std::make_shared<VOCOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
-                                 builder_op_connector_size_, builder_num_samples_, builder_decode_,
-                                 std::move(builder_schema_), std::move(builder_sampler_));
+  if (builder_task_type_ == TaskType::Segmentation) {
+    RETURN_IF_NOT_OK(builder_schema_->AddColumn(
+      ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
+    RETURN_IF_NOT_OK(builder_schema_->AddColumn(
+      ColDescriptor(std::string(kColumnTarget), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
+  } else if (builder_task_type_ == TaskType::Detection) {
+    RETURN_IF_NOT_OK(builder_schema_->AddColumn(
+      ColDescriptor(std::string(kColumnImage), DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
+    RETURN_IF_NOT_OK(builder_schema_->AddColumn(
+      ColDescriptor(std::string(kColumnAnnotation), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
+  }
+  *ptr = std::make_shared<VOCOp>(builder_task_type_, builder_task_mode_, builder_dir_, builder_labels_to_read_,
+                                 builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_,
+                                 builder_num_samples_, builder_decode_, std::move(builder_schema_),
+                                 std::move(builder_sampler_));
  return Status::OK();
 }

@@ -58,8 +84,9 @@ Status VOCOp::Builder::SanityCheck() {
  return err_msg.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, err_msg);
 }

-VOCOp::VOCOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &folder_path, int32_t queue_size,
-             int64_t num_samples, bool decode, std::unique_ptr<DataSchema> data_schema,
+VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
+             const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer,
+             int32_t queue_size, int64_t num_samples, bool decode, std::unique_ptr<DataSchema> data_schema,
             std::shared_ptr<Sampler> sampler)
    : ParallelOp(num_workers, queue_size),
      decode_(decode),
@@ -67,7 +94,10 @@ VOCOp::VOCOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &fo
      buf_cnt_(0),
      num_rows_(0),
      num_samples_(num_samples),
+      task_type_(task_type),
+      task_mode_(task_mode),
      folder_path_(folder_path),
+      class_index_(class_index),
      rows_per_buffer_(rows_per_buffer),
      sampler_(std::move(sampler)),
      data_schema_(std::move(data_schema)) {
@@ -167,12 +197,25 @@ Status VOCOp::GetNumSamples(int64_t *num) const {
 }

 Status VOCOp::LoadTensorRow(const std::string &image_id, TensorRow *trow) {
-  std::shared_ptr<Tensor> image, target;
-  const std::string kImageDir = folder_path_ + "/JPEGImages/" + image_id + ".jpg";
-  const std::string kTargetDir = folder_path_ + "/SegmentationClass/" + image_id + ".png";
-  RETURN_IF_NOT_OK(ReadImageToTensor(kImageDir, data_schema_->column(0), &image));
-  RETURN_IF_NOT_OK(ReadImageToTensor(kTargetDir, data_schema_->column(1), &target));
-  (*trow) = {std::move(image), std::move(target)};
+  if (task_type_ == TaskType::Segmentation) {
+    std::shared_ptr<Tensor> image, target;
+    const std::string kImageFile =
+      folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
+    const std::string kTargetFile =
+      folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
+    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
+    RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target));
+    (*trow) = {std::move(image), std::move(target)};
+  } else if (task_type_ == TaskType::Detection) {
+    std::shared_ptr<Tensor> image, annotation;
+    const std::string kImageFile =
+      folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
+    const std::string kAnnotationFile =
+      folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
+    RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
+    RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, data_schema_->column(1), &annotation));
+    (*trow) = {std::move(image), std::move(annotation)};
+  }
  return Status::OK();
 }

@@ -213,8 +256,13 @@ Status VOCOp::WorkerEntry(int32_t worker_id) {
 }

 Status VOCOp::ParseImageIds() {
-  const std::string kImageSets = "/ImageSets/Segmentation/train.txt";
-  std::string image_sets_file = folder_path_ + kImageSets;
+  std::string image_sets_file;
+  if (task_type_ == TaskType::Segmentation) {
+    image_sets_file =
+      folder_path_ + std::string(kImageSetsSegmentation) + task_mode_ + std::string(kImageSetsExtension);
+  } else if (task_type_ == TaskType::Detection) {
+    image_sets_file = folder_path_ + std::string(kImageSetsMain) + task_mode_ + std::string(kImageSetsExtension);
+  }
  std::ifstream in_file;
  in_file.open(image_sets_file);
  if (in_file.fail()) {
@@ -231,6 +279,84 @@ Status VOCOp::ParseImageIds() {
  return Status::OK();
 }

+Status VOCOp::ParseAnnotationIds() {
+  std::vector<std::string> new_image_ids;
+  for (auto id : image_ids_) {
+    const std::string kAnnotationName =
+      folder_path_ + std::string(kAnnotationsFolder) + id + std::string(kAnnotationExtension);
+    RETURN_IF_NOT_OK(ParseAnnotationBbox(kAnnotationName));
+    if (label_map_.find(kAnnotationName) != label_map_.end()) {
+      new_image_ids.push_back(id);
+    }
+  }
+
+  if (image_ids_.size() != new_image_ids.size()) {
+    image_ids_.clear();
+    image_ids_.insert(image_ids_.end(), new_image_ids.begin(), new_image_ids.end());
+  }
+  uint32_t count = 0;
+  for (auto &label : label_index_) {
+    label.second = count++;
+  }
+
+  num_rows_ = image_ids_.size();
+  num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_;
+  return Status::OK();
+}
+
+Status VOCOp::ParseAnnotationBbox(const std::string &path) {
+  if (!Path(path).Exists()) {
+    RETURN_STATUS_UNEXPECTED("File is not found : " + path);
+  }
+  Bbox bbox;
+  XMLDocument doc;
+  XMLError e = doc.LoadFile(common::SafeCStr(path));
+  if (e != XMLError::XML_SUCCESS) {
+    RETURN_STATUS_UNEXPECTED("Xml load failed");
+  }
+  XMLElement *root = doc.RootElement();
+  if (root == nullptr) {
+    RETURN_STATUS_UNEXPECTED("Xml load root element error");
+  }
+  XMLElement *object = root->FirstChildElement("object");
+  if (object == nullptr) {
+    RETURN_STATUS_UNEXPECTED("No object find in " + path);
+  }
+  while (object != nullptr) {
+    std::string label_name;
+    uint32_t xmin = 0, ymin = 0, xmax = 0, ymax = 0, truncated = 0, difficult = 0;
+    XMLElement *name_node = object->FirstChildElement("name");
+    if (name_node != nullptr) label_name = name_node->GetText();
+    XMLElement *truncated_node = object->FirstChildElement("truncated");
+    if (truncated_node != nullptr) truncated = truncated_node->UnsignedText();
+    XMLElement *difficult_node = object->FirstChildElement("difficult");
+    if (difficult_node != nullptr) difficult = difficult_node->UnsignedText();
+
+    XMLElement *bbox_node = object->FirstChildElement("bndbox");
+    if (bbox_node != nullptr) {
+      XMLElement *xmin_node = bbox_node->FirstChildElement("xmin");
+      if (xmin_node != nullptr) xmin = xmin_node->UnsignedText();
+      XMLElement *ymin_node = bbox_node->FirstChildElement("ymin");
+      if (ymin_node != nullptr) ymin = ymin_node->UnsignedText();
+      XMLElement *xmax_node = bbox_node->FirstChildElement("xmax");
+      if (xmax_node != nullptr) xmax = xmax_node->UnsignedText();
+      XMLElement *ymax_node = bbox_node->FirstChildElement("ymax");
+      if (ymax_node != nullptr) ymax = ymax_node->UnsignedText();
+    } else {
+      RETURN_STATUS_UNEXPECTED("bndbox dismatch in " + path);
+    }
+    if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
+        ymin > 0 && xmax > xmin && ymax > ymin) {
+      std::vector<uint32_t> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, truncated, difficult};
+      bbox.emplace_back(std::make_pair(label_name, bbox_list));
+      label_index_[label_name] = 0;
+    }
+    object = object->NextSiblingElement("object");
+  }
+  if (bbox.size() > 0) label_map_[path] = bbox;
+  return Status::OK();
+}
+
 Status VOCOp::InitSampler() {
  RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
  return Status::OK();
@@ -245,6 +371,9 @@ Status VOCOp::LaunchThreadsAndInitOp() {
  RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&VOCOp::WorkerEntry, this, std::placeholders::_1)));
  TaskManager::FindMe()->Post();
  RETURN_IF_NOT_OK(this->ParseImageIds());
+  if (task_type_ == TaskType::Detection) {
+    RETURN_IF_NOT_OK(this->ParseAnnotationIds());
+  }
  RETURN_IF_NOT_OK(this->InitSampler());
  return Status::OK();
 }
@@ -270,6 +399,34 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
  return Status::OK();
 }

+Status VOCOp::ReadAnnotationToTensor(const std::string &path, const ColDescriptor &col,
+                                     std::shared_ptr<Tensor> *tensor) {
+  Bbox bbox_info = label_map_[path];
+  std::vector<uint32_t> bbox_row;
+  dsize_t bbox_column_num = 0, bbox_num = 0;
+  for (auto box : bbox_info) {
+    if (label_index_.find(box.first) != label_index_.end()) {
+      std::vector<uint32_t> bbox;
+      if (class_index_.find(box.first) != class_index_.end()) {
+        bbox.emplace_back(class_index_[box.first]);
+      } else {
+        bbox.emplace_back(label_index_[box.first]);
+      }
+      bbox.insert(bbox.end(), box.second.begin(), box.second.end());
+      bbox_row.insert(bbox_row.end(), bbox.begin(), bbox.end());
+      if (bbox_column_num == 0) {
+        bbox_column_num = static_cast<dsize_t>(bbox.size());
+      }
+      bbox_num++;
+    }
+  }
+
+  std::vector<dsize_t> bbox_dim = {bbox_num, bbox_column_num};
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(bbox_dim), col.type(),
+                                        reinterpret_cast<unsigned char *>(&bbox_row[0])));
+  return Status::OK();
+}
+
 // Derived from RandomAccessOp
 Status VOCOp::GetNumRowsInDataset(int64_t *num) const {
  if (num == nullptr || num_rows_ == 0) {
@@ -280,5 +437,30 @@ Status VOCOp::GetNumRowsInDataset(int64_t *num) const {
  (*num) = num_rows_;
  return Status::OK();
 }
+
+Status VOCOp::GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
+                               const py::dict &dict, int64_t numSamples,
+                               std::map<std::string, int32_t> *output_class_indexing) {
+  std::map<std::string, int32_t> input_class_indexing;
+  for (auto p : dict) {
+    (void)input_class_indexing.insert(std::pair<std::string, int32_t>(py::reinterpret_borrow<py::str>(p.first),
+                                                                      py::reinterpret_borrow<py::int_>(p.second)));
+  }
+
+  if (!input_class_indexing.empty()) {
+    *output_class_indexing = input_class_indexing;
+  } else {
+    std::shared_ptr<VOCOp> op;
+    RETURN_IF_NOT_OK(
+      Builder().SetDir(dir).SetTask(task_type).SetMode(task_mode).SetClassIndex(input_class_indexing).Build(&op));
+    RETURN_IF_NOT_OK(op->ParseImageIds());
+    RETURN_IF_NOT_OK(op->ParseAnnotationIds());
+    for (const auto label : op->label_index_) {
+      (*output_class_indexing).insert(std::make_pair(label.first, label.second));
+    }
+  }
+
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.h
@@ -16,6 +16,7 @@
 #ifndef DATASET_ENGINE_DATASETOPS_SOURCE_VOC_OP_H_
 #define DATASET_ENGINE_DATASETOPS_SOURCE_VOC_OP_H_

+#include <map>
 #include <memory>
 #include <string>
 #include <utility>
@@ -39,8 +40,12 @@ namespace dataset {
 template <typename T>
 class Queue;

+using Bbox = std::vector<std::pair<std::string, std::vector<uint32_t>>>;
+
 class VOCOp : public ParallelOp, public RandomAccessOp {
 public:
+  enum class TaskType { Segmentation = 0, Detection = 1 };
+
  class Builder {
   public:
    // Constructor for Builder class of ImageFolderOp
@@ -59,6 +64,34 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
      return *this;
    }

+    // Setter method.
+    // @param const std::map<std::string, int32_t> &map - a class name to label map
+    // @return Builder setter method returns reference to the builder.
+    Builder &SetClassIndex(const std::map<std::string, int32_t> &map) {
+      builder_labels_to_read_ = map;
+      return *this;
+    }
+
+    // Setter method.
+    // @param const std::string & task_type
+    // @return Builder setter method returns reference to the builder.
+    Builder &SetTask(const std::string &task_type) {
+      if (task_type == "Segmentation") {
+        builder_task_type_ = TaskType::Segmentation;
+      } else if (task_type == "Detection") {
+        builder_task_type_ = TaskType::Detection;
+      }
+      return *this;
+    }
+
+    // Setter method.
+    // @param const std::string & task_mode
+    // @return Builder setter method returns reference to the builder.
+    Builder &SetMode(const std::string &task_mode) {
+      builder_task_mode_ = task_mode;
+      return *this;
+    }
+
    // Setter method.
    // @param int32_t num_workers
    // @return Builder setter method returns reference to the builder.
@@ -119,25 +152,33 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
   private:
    bool builder_decode_;
    std::string builder_dir_;
+    TaskType builder_task_type_;
+    std::string builder_task_mode_;
    int32_t builder_num_workers_;
    int32_t builder_op_connector_size_;
    int32_t builder_rows_per_buffer_;
    int64_t builder_num_samples_;
    std::shared_ptr<Sampler> builder_sampler_;
    std::unique_ptr<DataSchema> builder_schema_;
+    std::map<std::string, int32_t> builder_labels_to_read_;
  };

  // Constructor
+  // @param TaskType task_type - task type of VOC
+  // @param std::string task_mode - task mode of VOC
+  // @param std::string folder_path - dir directory of VOC
+  // @param std::map<std::string, int32_t> class_index - input class-to-index of annotation
  // @param int32_t num_workers - number of workers reading images in parallel
  // @param int32_t rows_per_buffer - number of images (rows) in each buffer
-  // @param std::string folder_path - dir directory of VOC
  // @param int32_t queue_size - connector queue size
  // @param int64_t num_samples - number of samples to read
  // @param bool decode - whether to decode images
  // @param std::unique_ptr<DataSchema> data_schema - the schema of the VOC dataset
  // @param std::shared_ptr<Sampler> sampler - sampler tells VOCOp what to read
-  VOCOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &folder_path, int32_t queue_size,
-        int64_t num_samples, bool decode, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler);
+  VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
+        const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer,
+        int32_t queue_size, int64_t num_samples, bool decode, std::unique_ptr<DataSchema> data_schema,
+        std::shared_ptr<Sampler> sampler);

  // Destructor
  ~VOCOp() = default;
@@ -167,6 +208,16 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
  // @param show_all
  void Print(std::ostream &out, bool show_all) const override;

+  // @param const std::string &dir - VOC dir path
+  // @param const std::string &task_type - task type of reading voc job
+  // @param const std::string &task_mode - task mode of reading voc job
+  // @param const py::dict &dict - input dict of class index
+  // @param int64_t numSamples - samples number of VOCDataset
+  // @param std::map<std::string, int32_t> *output_class_indexing - output class index of VOCDataset
+  static Status GetClassIndexing(const std::string &dir, const std::string &task_type, const std::string &task_mode,
+                                 const py::dict &dict, int64_t numSamples,
+                                 std::map<std::string, int32_t> *output_class_indexing);
+
 private:
  // Initialize Sampler, calls sampler->Init() within
  // @return Status - The error code return
@@ -184,19 +235,40 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
  // @return Status - The error code return
  Status ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor);

+  // @param const std::string &path - path to the image file
+  // @param const ColDescriptor &col - contains tensor implementation and datatype
+  // @param std::shared_ptr<Tensor> tensor - return
+  // @return Status - The error code return
+  Status ReadAnnotationToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor);
+
  // @param const std::vector<uint64_t> &keys - keys in ioblock
  // @param std::unique_ptr<DataBuffer> db
  // @return Status - The error code return
  Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db);

+  // Read image list from ImageSets
+  // @return Status - The error code return
  Status ParseImageIds();

+  // Read annotation from Annotation folder
+  // @return Status - The error code return
+  Status ParseAnnotationIds();
+
+  // @param const std::string &path - path to annotation xml
+  // @return Status - The error code return
+  Status ParseAnnotationBbox(const std::string &path);
+
+  // @param const std::shared_ptr<Tensor> &sample_ids - sample ids of tensor
+  // @param std::vector<int64_t> *keys - image id
+  // @return Status - The error code return
  Status TraverseSampleIds(const std::shared_ptr<Tensor> &sample_ids, std::vector<int64_t> *keys);

  // Called first when function is called
  // @return Status - The error code return
  Status LaunchThreadsAndInitOp();

+  // Reset dataset state
+  // @return Status - The error code return
  Status Reset() override;

  bool decode_;
@@ -205,6 +277,8 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
  int64_t num_rows_;
  int64_t num_samples_;
  std::string folder_path_;
+  TaskType task_type_;
+  std::string task_mode_;
  int32_t rows_per_buffer_;
  std::shared_ptr<Sampler> sampler_;
  std::unique_ptr<DataSchema> data_schema_;
@@ -212,6 +286,9 @@ class VOCOp : public ParallelOp, public RandomAccessOp {
  WaitPost wp_;
  std::vector<std::string> image_ids_;
  QueueList<std::unique_ptr<IOBlock>> io_block_queues_;
+  std::map<std::string, int32_t> class_index_;
+  std::map<std::string, int32_t> label_index_;
+  std::map<std::string, Bbox> label_map_;
 };
 }  // namespace dataset
 }  // namespace mindspore

--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -34,7 +34,7 @@ import copy
 import numpy as np

 from mindspore._c_dataengine import DataType, TFReaderOp, ImageFolderOp, CifarOp, MnistOp, ManifestOp, \
-    MindRecordOp, TextFileOp, CBatchInfo
+    MindRecordOp, TextFileOp, VOCOp, CBatchInfo
 from mindspore._c_expression import typing

 from mindspore import log as logger
@@ -3462,6 +3462,12 @@ class VOCDataset(SourceDataset):

    Args:
        dataset_dir (str): Path to the root directory that contains the dataset.
+        task (str): Set the task type of reading voc data, now only support "Segmentation" or "Detection"
+            (default="Segmentation")
+        mode(str): Set the data list txt file to be readed (default="train")
+        class_indexing (dict, optional): A str-to-int mapping from label name to index
+            (default=None, the folder names will be sorted alphabetically and each
+            class will be given a unique index starting from 0).
        num_samples (int, optional): The number of images to be included in the dataset
            (default=None, all images).
        num_parallel_workers (int, optional): Number of workers to read the data
@@ -3477,27 +3483,41 @@ class VOCDataset(SourceDataset):
            argument should be specified only when num_shards is also specified.

    Raises:
+        RuntimeError: If xml of Annotations is a invalid format
+        RuntimeError: If xml of Annotations loss attribution of "object"
+        RuntimeError: If xml of Annotations loss attribution of "bndbox"
        RuntimeError: If sampler and shuffle are specified at the same time.
        RuntimeError: If sampler and sharding are specified at the same time.
        RuntimeError: If num_shards is specified but shard_id is None.
        RuntimeError: If shard_id is specified but num_shards is None.
+        ValueError: If task is not equal 'Segmentation' or 'Detection'.
+        ValueError: If task equal 'Segmentation' but class_indexing is not None.
+        ValueError: If txt related to mode is not exist.
        ValueError: If shard_id is invalid (< 0 or >= num_shards).

    Examples:
        >>> import mindspore.dataset as ds
        >>> dataset_dir = "/path/to/voc_dataset_directory"
-        >>> # 1) read all VOC dataset samples in dataset_dir with 8 threads in random order:
-        >>> voc_dataset = ds.VOCDataset(dataset_dir, num_parallel_workers=8)
-        >>> # 2) read then decode all VOC dataset samples in dataset_dir in sequence:
-        >>> voc_dataset = ds.VOCDataset(dataset_dir, decode=True, shuffle=False)
-        >>> # in VOC dataset, each dictionary has keys "image" and "target"
+        >>> # 1) read VOC data for segmenatation train
+        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Segmentation", mode="train")
+        >>> # 2) read VOC data for detection train
+        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Detection", mode="train")
+        >>> # 3) read all VOC dataset samples in dataset_dir with 8 threads in random order:
+        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Detection", mode="train", num_parallel_workers=8)
+        >>> # 4) read then decode all VOC dataset samples in dataset_dir in sequence:
+        >>> voc_dataset = ds.VOCDataset(dataset_dir, task="Detection", mode="train", decode=True, shuffle=False)
+        >>> # in VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target"
+        >>> # in VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation"
    """

    @check_vocdataset
-    def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None,
-                 shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
+    def __init__(self, dataset_dir, task="Segmentation", mode="train", class_indexing=None, num_samples=None,
+                 num_parallel_workers=None, shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None):
        super().__init__(num_parallel_workers)
        self.dataset_dir = dataset_dir
+        self.task = task
+        self.mode = mode
+        self.class_indexing = class_indexing
        self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id)
        self.num_samples = num_samples
        self.decode = decode
@@ -3508,6 +3528,9 @@ class VOCDataset(SourceDataset):
    def get_args(self):
        args = super().get_args()
        args["dataset_dir"] = self.dataset_dir
+        args["task"] = self.task
+        args["mode"] = self.mode
+        args["class_indexing"] = self.class_indexing
        args["num_samples"] = self.num_samples
        args["sampler"] = self.sampler
        args["decode"] = self.decode
@@ -3525,6 +3548,28 @@ class VOCDataset(SourceDataset):
        """
        return self.num_samples

+    def get_class_indexing(self):
+        """
+        Get the class index.
+
+        Return:
+            Dict, A str-to-int mapping from label name to index.
+        """
+        if self.task != "Detection":
+            raise NotImplementedError()
+
+        if self.num_samples is None:
+            num_samples = 0
+        else:
+            num_samples = self.num_samples
+
+        if self.class_indexing is None:
+            class_indexing = dict()
+        else:
+            class_indexing = self.class_indexing
+
+        return VOCOp.get_class_indexing(self.dataset_dir, self.task, self.mode, class_indexing, num_samples)
+

 class CelebADataset(SourceDataset):
    """

--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -285,9 +285,9 @@ def create_node(node):

    elif dataset_op == 'VOCDataset':
        sampler = construct_sampler(node.get('sampler'))
-        pyobj = pyclass(node['dataset_dir'], node.get('num_samples'), node.get('num_parallel_workers'),
-                        node.get('shuffle'), node.get('decode'), sampler, node.get('num_shards'),
-                        node.get('shard_id'))
+        pyobj = pyclass(node['dataset_dir'], node.get('task'), node.get('mode'), node.get('class_indexing'),
+                        node.get('num_samples'), node.get('num_parallel_workers'), node.get('shuffle'),
+                        node.get('decode'), sampler, node.get('num_shards'), node.get('shard_id'))

    elif dataset_op == 'CelebADataset':
        sampler = construct_sampler(node.get('sampler'))

--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -455,17 +455,44 @@ def check_vocdataset(method):

        nreq_param_int = ['num_samples', 'num_parallel_workers', 'num_shards', 'shard_id']
        nreq_param_bool = ['shuffle', 'decode']
+        nreq_param_dict = ['class_indexing']

        # check dataset_dir; required argument
        dataset_dir = param_dict.get('dataset_dir')
        if dataset_dir is None:
            raise ValueError("dataset_dir is not provided.")
        check_dataset_dir(dataset_dir)
+        # check task; required argument
+        task = param_dict.get('task')
+        if task is None:
+            raise ValueError("task is not provided.")
+        if not isinstance(task, str):
+            raise ValueError("task is not str type.")
+        # check mode; required argument
+        mode = param_dict.get('mode')
+        if mode is None:
+            raise ValueError("mode is not provided.")
+        if not isinstance(mode, str):
+            raise ValueError("mode is not str type.")
+
+        imagesets_file = ""
+        if task == "Segmentation":
+            imagesets_file = os.path.join(dataset_dir, "ImageSets", "Segmentation", mode + ".txt")
+            if param_dict.get('class_indexing') is not None:
+                raise ValueError("class_indexing is invalid in Segmentation task")
+        elif task == "Detection":
+            imagesets_file = os.path.join(dataset_dir, "ImageSets", "Main", mode + ".txt")
+        else:
+            raise ValueError("Invalid task : " + task)
+
+        check_dataset_file(imagesets_file)

        check_param_type(nreq_param_int, param_dict, int)

        check_param_type(nreq_param_bool, param_dict, bool)

+        check_param_type(nreq_param_dict, param_dict, dict)
+
        check_sampler_shuffle_shard_options(param_dict)

        return method(*args, **kwargs)

--- a/tests/ut/cpp/dataset/voc_op_test.cc
+++ b/tests/ut/cpp/dataset/voc_op_test.cc
@@ -50,17 +50,170 @@ std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);

 std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);

-std::shared_ptr<VOCOp> CreateVOC(int64_t num_wrks, int64_t rows, int64_t conns, std::string path,
-                                 bool shuf = false, std::unique_ptr<Sampler> sampler = nullptr,
-                                 int64_t num_samples = 0, bool decode = false) {
-  std::shared_ptr<VOCOp> so;
+class MindDataTestVOCOp : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestVOCOp, TestVOCDetection) {
+  // Start with an empty execution tree
+  auto my_tree = std::make_shared<ExecutionTree>();
+  std::string dataset_path;
+  dataset_path = datasets_root_path_ + "/testVOC2012";
+
+  std::string task_type("Detection");
+  std::string task_mode("train");
+  std::shared_ptr<VOCOp> my_voc_op;
  VOCOp::Builder builder;
-  Status rc = builder.SetNumWorkers(num_wrks).SetDir(path).SetRowsPerBuffer(rows)
-                     .SetOpConnectorSize(conns).SetSampler(std::move(sampler))
-                     .SetNumSamples(num_samples).SetDecode(decode).Build(&so);
-  return so;
+  Status rc = builder.SetDir(dataset_path)
+                     .SetTask(task_type)
+                     .SetMode(task_mode)
+                     .Build(&my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+
+  rc = my_tree->AssociateNode(my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+  rc = my_tree->AssignRoot(my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+
+  MS_LOG(DEBUG) << "Launch tree and begin iteration.";
+  rc = my_tree->Prepare();
+  ASSERT_TRUE(rc.IsOk());
+
+  rc = my_tree->Launch();
+  ASSERT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator di(my_tree);
+  TensorRow tensor_list;
+  rc = di.FetchNextTensorRow(&tensor_list);
+  ASSERT_TRUE(rc.IsOk());
+
+  int row_count = 0;
+  while (!tensor_list.empty()) {
+    MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
+
+    //Display the tensor by calling the printer on it
+    for (int i = 0; i < tensor_list.size(); i++) {
+      std::ostringstream ss;
+      ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
+      MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
+    }
+
+    rc = di.FetchNextTensorRow(&tensor_list);
+    ASSERT_TRUE(rc.IsOk());
+    row_count++;
+  }
+
+  ASSERT_EQ(row_count, 9);
 }

-class MindDataTestVOCSampler : public UT::DatasetOpTesting {
- protected:
-};
+TEST_F(MindDataTestVOCOp, TestVOCSegmentation) {
+  // Start with an empty execution tree
+  auto my_tree = std::make_shared<ExecutionTree>();
+  std::string dataset_path;
+  dataset_path = datasets_root_path_ + "/testVOC2012";
+
+  std::string task_type("Segmentation");
+  std::string task_mode("train");
+  std::shared_ptr<VOCOp> my_voc_op;
+  VOCOp::Builder builder;
+  Status rc = builder.SetDir(dataset_path)
+                     .SetTask(task_type)
+                     .SetMode(task_mode)
+                     .Build(&my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+
+  rc = my_tree->AssociateNode(my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+  rc = my_tree->AssignRoot(my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+
+  MS_LOG(DEBUG) << "Launch tree and begin iteration.";
+  rc = my_tree->Prepare();
+  ASSERT_TRUE(rc.IsOk());
+
+  rc = my_tree->Launch();
+  ASSERT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator di(my_tree);
+  TensorRow tensor_list;
+  rc = di.FetchNextTensorRow(&tensor_list);
+  ASSERT_TRUE(rc.IsOk());
+
+  int row_count = 0;
+  while (!tensor_list.empty()) {
+    MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
+
+    //Display the tensor by calling the printer on it
+    for (int i = 0; i < tensor_list.size(); i++) {
+      std::ostringstream ss;
+      ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
+      MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
+    }
+
+    rc = di.FetchNextTensorRow(&tensor_list);
+    ASSERT_TRUE(rc.IsOk());
+    row_count++;
+  }
+
+  ASSERT_EQ(row_count, 10);
+}
+
+TEST_F(MindDataTestVOCOp, TestVOCClassIndex) {
+  // Start with an empty execution tree
+  auto my_tree = std::make_shared<ExecutionTree>();
+  std::string dataset_path;
+  dataset_path = datasets_root_path_ + "/testVOC2012";
+
+  std::string task_type("Detection");
+  std::string task_mode("train");
+  std::map<std::string, int32_t> class_index;
+  class_index["car"] = 0;
+  class_index["cat"] = 1;
+  class_index["train"] = 5;
+  std::shared_ptr<VOCOp> my_voc_op;
+  VOCOp::Builder builder;
+  Status rc = builder.SetDir(dataset_path)
+                     .SetTask(task_type)
+                     .SetMode(task_mode)
+                     .SetClassIndex(class_index)
+                     .Build(&my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+
+  rc = my_tree->AssociateNode(my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+  rc = my_tree->AssignRoot(my_voc_op);
+  ASSERT_TRUE(rc.IsOk());
+
+  MS_LOG(DEBUG) << "Launch tree and begin iteration.";
+  rc = my_tree->Prepare();
+  ASSERT_TRUE(rc.IsOk());
+
+  rc = my_tree->Launch();
+  ASSERT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator di(my_tree);
+  TensorRow tensor_list;
+  rc = di.FetchNextTensorRow(&tensor_list);
+  ASSERT_TRUE(rc.IsOk());
+
+  int row_count = 0;
+  while (!tensor_list.empty()) {
+    MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
+
+    //Display the tensor by calling the printer on it
+    for (int i = 0; i < tensor_list.size(); i++) {
+      std::ostringstream ss;
+      ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
+      MS_LOG(DEBUG) << "Tensor print: " << ss.str() << ".";
+    }
+
+    rc = di.FetchNextTensorRow(&tensor_list);
+    ASSERT_TRUE(rc.IsOk());
+    row_count++;
+  }
+
+  ASSERT_EQ(row_count, 6);
+}
--- a/tests/ut/data/dataset/testVOC2012/Annotations/27.xml
+++ b/tests/ut/data/dataset/testVOC2012/Annotations/27.xml
 <annotation>
 	<folder>VOC2012</folder>
-	<filename>27.jpg</filename>
+	<filename>32.jpg</filename>
 	<source>
 		<database>simulate VOC2007 Database</database>
 		<annotation>simulate VOC2007</annotation>
 		<image>flickr</image>
 	</source>
 	<size>
-		<width>486</width>
-		<height>500</height>
+		<width>500</width>
+		<height>281</height>
 		<depth>3</depth>
 	</size>
-	<segmented>0</segmented>
+	<segmented>1</segmented>
 	<object>
-		<name>person</name>
-		<pose>Unspecified</pose>
+		<name>train</name>
+		<pose>Frontal</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
-			<xmin>161</xmin>
-			<ymin>132</ymin>
+			<xmin>113</xmin>
+			<ymin>79</ymin>
 			<xmax>323</xmax>
-			<ymax>342</ymax>
+			<ymax>191</ymax>
+		</bndbox>
+	</object>
+	<object>
+		<name>train</name>
+		<pose>Left</pose>
+		<truncated>0</truncated>
+		<difficult>0</difficult>
+		<bndbox>
+			<xmin>121</xmin>
+			<ymin>91</ymin>
+			<xmax>191</xmax>
+			<ymax>121</ymax>
+		</bndbox>
+	</object>
+	<object>
+		<name>car</name>
+		<pose>Rear</pose>
+		<truncated>0</truncated>
+		<difficult>0</difficult>
+		<bndbox>
+			<xmin>195</xmin>
+			<ymin>155</ymin>
+			<xmax>235</xmax>
+			<ymax>235</ymax>
 		</bndbox>
-		<part>
-			<name>head</name>
-			<bndbox>
-				<xmin>159</xmin>
-				<ymin>113</ymin>
-				<xmax>208</xmax>
-				<ymax>166</ymax>
-			</bndbox>
-		</part>
-		<part>
-			<name>foot</name>
-			<bndbox>
-				<xmin>261</xmin>
-				<ymin>321</ymin>
-				<xmax>287</xmax>
-				<ymax>344</ymax>
-			</bndbox>
-		</part>
-		<part>
-			<name>foot</name>
-			<bndbox>
-				<xmin>329</xmin>
-				<ymin>317</ymin>
-				<xmax>330</xmax>
-				<ymax>366</ymax>
-			</bndbox>
-		</part>
 	</object>
 </annotation>
--- a/tests/ut/data/dataset/testVOC2012/Annotations/invalidxml.xml
+++ b/tests/ut/data/dataset/testVOC2012/Annotations/invalidxml.xml
+invalidxml
\ No newline at end of file
--- a/tests/ut/data/dataset/testVOC2012/Annotations/xmlnoobject.xml
+++ b/tests/ut/data/dataset/testVOC2012/Annotations/xmlnoobject.xml
+<annotation>
+	<folder>VOC2012</folder>
+	<filename>33.jpg</filename>
+	<source>
+		<database>simulate VOC2007 Database</database>
+		<annotation>simulate VOC2007</annotation>
+		<image>flickr</image>
+	</source>
+	<size>
+		<width>500</width>
+		<height>366</height>
+		<depth>3</depth>
+	</size>
+	<segmented>1</segmented>
+</annotation>
\ No newline at end of file
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Main/invalidxml.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/invalidxml.txt
+invalidxml
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Main/train.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/train.txt
+32
+33
+39
+42
+61
+63
+68
+121
+123
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Main/trainval.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/trainval.txt
+15
\ No newline at end of file
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Main/val.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/val.txt
+15
\ No newline at end of file
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlnoobject.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlnoobject.txt
+xmlnoobject
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlnotexist.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Main/xmlnotexist.txt
+4176
\ No newline at end of file
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Segmentation/trainval.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Segmentation/trainval.txt
--- a/tests/ut/data/dataset/testVOC2012/ImageSets/Segmentation/val.txt
+++ b/tests/ut/data/dataset/testVOC2012/ImageSets/Segmentation/val.txt
--- a/tests/ut/data/dataset/testVOC2012/JPEGImages/15.jpg
+++ b/tests/ut/data/dataset/testVOC2012/JPEGImages/15.jpg
--- a/tests/ut/data/dataset/testVOC2012/JPEGImages/27.jpg
+++ b/tests/ut/data/dataset/testVOC2012/JPEGImages/27.jpg
--- a/tests/ut/data/dataset/testVOC2012/SegmentationClass/27.png
+++ b/tests/ut/data/dataset/testVOC2012/SegmentationClass/27.png
--- a/tests/ut/data/dataset/testVOC2012/SegmentationObject/27.png
+++ b/tests/ut/data/dataset/testVOC2012/SegmentationObject/27.png
--- a/tests/ut/python/dataset/test_datasets_voc.py
+++ b/tests/ut/python/dataset/test_datasets_voc.py
@@ -15,25 +15,69 @@
 import mindspore.dataset.transforms.vision.c_transforms as vision

 import mindspore.dataset as ds
-from mindspore import log as logger

 DATA_DIR = "../data/dataset/testVOC2012"
+IMAGE_SHAPE = [2268, 2268, 2268, 2268, 642, 607, 561, 596, 612, 2268]
+TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680]

+def test_voc_segmentation():
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True, shuffle=False)
+    num = 0
+    for item in data1.create_dict_iterator():
+        assert (item["image"].shape[0] == IMAGE_SHAPE[num])
+        assert (item["target"].shape[0] == TARGET_SHAPE[num])
+        num += 1
+    assert (num == 10)

-def test_voc_normal():
-    data1 = ds.VOCDataset(DATA_DIR, decode=True)
+def test_voc_detection():
+    data1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
    num = 0
+    count = [ 0, 0, 0, 0, 0, 0 ]
    for item in data1.create_dict_iterator():
-        logger.info("item[image] is {}".format(item["image"]))
-        logger.info("item[image].shape is {}".format(item["image"].shape))
-        logger.info("item[target] is {}".format(item["target"]))
-        logger.info("item[target].shape is {}".format(item["target"].shape))
+        assert (item["image"].shape[0] == IMAGE_SHAPE[num])
+        for bbox in item["annotation"]:
+            count[bbox[0]] += 1
        num += 1
-    logger.info("num is {}".format(str(num)))
+    assert (num == 9)
+    assert (count == [3,2,1,2,4,3])

+def test_voc_class_index():
+    class_index = { 'car': 0, 'cat': 1, 'train': 5 }
+    data1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", class_indexing=class_index, decode=True)
+    class_index1 = data1.get_class_indexing()
+    assert (class_index1 == { 'car': 0, 'cat': 1, 'train': 5 })
+    data1 = data1.shuffle(4)
+    class_index2 = data1.get_class_indexing()
+    assert (class_index2 == { 'car': 0, 'cat': 1, 'train': 5 })
+    num = 0
+    count = [0,0,0,0,0,0]
+    for item in data1.create_dict_iterator():
+        for bbox in item["annotation"]:
+            assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 5)
+            count[bbox[0]] += 1
+        num += 1
+    assert (num == 6)
+    assert (count == [3,2,0,0,0,3])
+
+def test_voc_get_class_indexing():
+    data1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True)
+    class_index1 = data1.get_class_indexing()
+    assert (class_index1 == { 'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5 })
+    data1 = data1.shuffle(4)
+    class_index2 = data1.get_class_indexing()
+    assert (class_index2 == { 'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5 })
+    num = 0
+    count = [0,0,0,0,0,0]
+    for item in data1.create_dict_iterator():
+        for bbox in item["annotation"]:
+            assert (bbox[0] == 0 or bbox[0] == 1 or bbox[0] == 2 or bbox[0] == 3 or bbox[0] == 4 or bbox[0] == 5)
+            count[bbox[0]] += 1
+        num += 1
+    assert (num == 9)
+    assert (count == [3,2,1,2,4,3])

 def test_case_0():
-    data1 = ds.VOCDataset(DATA_DIR, decode=True)
+    data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", decode=True)

    resize_op = vision.Resize((224, 224))

@@ -46,7 +90,79 @@ def test_case_0():

    num = 0
    for item in data1.create_dict_iterator():
-        logger.info("item[image].shape is {}".format(item["image"].shape))
-        logger.info("item[target].shape is {}".format(item["target"].shape))
        num += 1
-    logger.info("num is {}".format(str(num)))
+    assert (num == 20)
+
+def test_case_1():
+    data1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True)
+
+    resize_op = vision.Resize((224, 224))
+
+    data1 = data1.map(input_columns=["image"], operations=resize_op)
+    repeat_num = 4
+    data1 = data1.repeat(repeat_num)
+    batch_size = 2
+    data1 = data1.batch(batch_size, drop_remainder=True, pad_info={})
+
+    num = 0
+    for item in data1.create_dict_iterator():
+        num += 1
+    assert (num == 18)
+
+def test_voc_exception():
+    try:
+        data1 = ds.VOCDataset(DATA_DIR, task="InvalidTask", mode="train", decode=True)
+        for _ in data1.create_dict_iterator():
+            pass
+        assert False
+    except ValueError:
+        pass
+
+    try:
+        data2 = ds.VOCDataset(DATA_DIR, task="Segmentation", mode="train", class_indexing={ "cat":0 }, decode=True)
+        for _ in data2.create_dict_iterator():
+            pass
+        assert False
+    except ValueError:
+        pass
+
+    try:
+        data3 = ds.VOCDataset(DATA_DIR, task="Detection", mode="notexist", decode=True)
+        for _ in data3.create_dict_iterator():
+            pass
+        assert False
+    except ValueError:
+        pass
+
+    try:
+        data4 = ds.VOCDataset(DATA_DIR, task="Detection", mode="xmlnotexist", decode=True)
+        for _ in data4.create_dict_iterator():
+            pass
+        assert False
+    except RuntimeError:
+        pass
+
+    try:
+        data5 = ds.VOCDataset(DATA_DIR, task="Detection", mode="invalidxml", decode=True)
+        for _ in data5.create_dict_iterator():
+            pass
+        assert False
+    except RuntimeError:
+        pass
+
+    try:
+        data6 = ds.VOCDataset(DATA_DIR, task="Detection", mode="xmlnoobject", decode=True)
+        for _ in data6.create_dict_iterator():
+            pass
+        assert False
+    except RuntimeError:
+        pass
+
+if __name__ == '__main__':
+    test_voc_segmentation()
+    test_voc_detection()
+    test_voc_class_index()
+    test_voc_get_class_indexing()
+    test_case_0()
+    test_case_1()
+    test_voc_exception()