!4772 Adding AlbumDataset for device training

Merge pull request !4772 from EricZ/ms-album

!4772 Adding AlbumDataset for device training
Merge pull request !4772 from EricZ/ms-album
0bbce936 · mindspore-ci-bot · Gitee · befc2094 · c79db93c · 0bbce936
51 changed file
--- a/build.sh
+++ b/build.sh
@@ -393,7 +393,7 @@ build_mindspore()
      CMAKE_VERBOSE="--verbose"
    fi
    cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM
-    echo "success to build mindspore project!"
+    echo "success building mindspore project!"
 }

 checkndk() {

--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -21,6 +21,7 @@
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/engine/dataset_iterator.h"
 // Source dataset headers (in alphabetical order)
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/clue_op.h"
@@ -117,6 +118,15 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file) {
 // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS
 // (In alphabetical order)

+// Function to create a AlbumDataset.
+std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
+                                    const std::vector<std::string> &column_names, bool decode,
+                                    const std::shared_ptr<SamplerObj> &sampler) {
+  auto ds = std::make_shared<AlbumDataset>(dataset_dir, data_schema, column_names, decode, sampler);
+
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
 // Function to create a CelebADataset.
 std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type,
                                      const std::shared_ptr<SamplerObj> &sampler, bool decode,
@@ -687,6 +697,49 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha
 // DERIVED DATASET CLASSES LEAF-NODE DATASETS
 // (In alphabetical order)

+// Constructor for AlbumDataset
+AlbumDataset::AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
+                           const std::vector<std::string> &column_names, bool decode,
+                           const std::shared_ptr<SamplerObj> &sampler)
+    : dataset_dir_(dataset_dir),
+      schema_path_(data_schema),
+      column_names_(column_names),
+      decode_(decode),
+      sampler_(sampler) {}
+
+bool AlbumDataset::ValidateParams() {
+  if (!ValidateDatasetDirParam("AlbumDataset", dataset_dir_)) {
+    return false;
+  }
+
+  if (!ValidateDatasetFilesParam("AlbumDataset", {schema_path_})) {
+    return false;
+  }
+
+  return true;
+}
+
+// Function to build AlbumDataset
+std::vector<std::shared_ptr<DatasetOp>> AlbumDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  auto schema = std::make_unique<DataSchema>();
+  RETURN_EMPTY_IF_ERROR(schema->LoadSchemaFile(schema_path_, column_names_));
+
+  // Argument that is not exposed to user in the API.
+  std::set<std::string> extensions = {};
+
+  node_ops.push_back(std::make_shared<AlbumOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
+                                               decode_, extensions, std::move(schema), std::move(sampler_->Build())));
+  return node_ops;
+}
+
 // Constructor for CelebADataset
 CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &dataset_type,
                             const std::shared_ptr<SamplerObj> &sampler, const bool &decode,

--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
@@ -13,6 +13,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
    text_file_op.cc
    clue_op.cc
    csv_op.cc
+    album_op.cc
    )

 set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES

--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
+
+#include <deque>
+#include <memory>
+#include <queue>
+#include <string>
+#include <algorithm>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+#include <unordered_map>
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/engine/data_buffer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/io_block.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/services.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+// Forward declares
+template <typename T>
+class Queue;
+
+// Define row information as a list of file objects to read
+using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>;
+
+/// \class AlbumOp album_op.h
+class AlbumOp : public ParallelOp, public RandomAccessOp {
+ public:
+  class Builder {
+   public:
+    /// \brief Constructor for Builder class of AlbumOp
+    Builder();
+
+    /// \brief Destructor.
+    ~Builder() = default;
+
+    /// \brief Setter method
+    /// \param[in] rows_per_buffer
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
+      builder_rows_per_buffer_ = rows_per_buffer;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] size
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetOpConnectorSize(int32_t size) {
+      builder_op_connector_size_ = size;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] exts - file extensions to be read
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetExtensions(const std::set<std::string> &exts) {
+      builder_extensions_ = exts;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] do_decode
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetDecode(bool do_decode) {
+      builder_decode_ = do_decode;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] num_workers
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetNumWorkers(int32_t num_workers) {
+      builder_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] sampler
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetSampler(std::shared_ptr<Sampler> sampler) {
+      builder_sampler_ = std::move(sampler);
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] dir - dataset directory
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetAlbumDir(const std::string &dir) {
+      builder_dir_ = dir;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] file - schema file to load
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetSchemaFile(const std::string &file) {
+      builder_schema_file_ = file;
+      return *this;
+    }
+
+    /// \brief Setter method
+    /// \param[in] columns - input columns
+    /// \return Builder setter method returns reference to the builder
+    Builder &SetColumnsToLoad(const std::vector<std::string> &columns) {
+      builder_columns_to_load_ = columns;
+      return *this;
+    }
+
+    /// \brief Check validity of input args
+    /// \return - The error code return
+    Status SanityCheck();
+
+    /// \brief The builder "build" method creates the final object.
+    /// \param[inout] std::shared_ptr<AlbumOp> *op - DatasetOp
+    /// \return - The error code return
+    Status Build(std::shared_ptr<AlbumOp> *op);
+
+   private:
+    bool builder_decode_;
+    std::vector<std::string> builder_columns_to_load_;
+    std::string builder_dir_;
+    std::string builder_schema_file_;
+    int32_t builder_num_workers_;
+    int32_t builder_rows_per_buffer_;
+    int32_t builder_op_connector_size_;
+    std::set<std::string> builder_extensions_;
+    std::shared_ptr<Sampler> builder_sampler_;
+    std::unique_ptr<DataSchema> builder_schema_;
+  };
+
+  /// \brief Constructor
+  /// \param[in] num_wkrs - Num of workers reading images in parallel
+  /// \param[in] rows_per_buffer Number of images (rows) in each buffer
+  /// \param[in] file_dir - directory of Album
+  /// \param[in] queue_size - connector size
+  /// \param[in] do_decode - decode image files
+  /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
+  /// \param[in] data_schema - schema of dataset
+  /// \param[in] sampler - sampler tells AlbumOp what to read
+  AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
+          const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<Sampler> sampler);
+
+  /// \brief Destructor.
+  ~AlbumOp() = default;
+
+  /// \brief Initialize AlbumOp related var, calls the function to walk all files
+  /// \return - The error code return
+  Status PrescanEntry();
+
+  /// \brief Worker thread pulls a number of IOBlock from IOBlock Queue, make a buffer and push it to Connector
+  /// \param[in] int32_t workerId - id of each worker
+  /// \return Status - The error code return
+  Status WorkerEntry(int32_t worker_id) override;
+
+  /// \brief Main Loop of AlbumOp
+  ///     Master thread: Fill IOBlockQueue, then goes to sleep
+  ///     Worker thread: pulls IOBlock from IOBlockQueue, work on it then put buffer to mOutConnector
+  /// \return Status - The error code return
+  Status operator()() override;
+
+  /// \brief A print method typically used for debugging
+  /// \param[in] out
+  /// \param[in] show_all
+  void Print(std::ostream &out, bool show_all) const override;
+
+  /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP
+  ///     This function could be optimized to return the tensor to reduce open/closing files
+  /// \return Status - The error code return
+  Status CheckImageType(const std::string &file_name, bool *valid);
+
+  // Base-class override for NodePass visitor acceptor.
+  // @param p - Pointer to the NodePass to be accepted.
+  // @param modified - Whether this node visit modified the pipeline.
+  // @return - Status of the node visit.
+  Status Accept(NodePass *p, bool *modified) override;
+
+  // Op name getter
+  // @return Name of the current Op
+  std::string Name() const override { return "AlbumOp"; }
+
+ private:
+  /// \brief Initialize Sampler, calls sampler->Init() within
+  /// \return Status The error code return
+  Status InitSampler();
+
+  /// \brief Load image to tensor row
+  /// \param[in] image_file Image name of file
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load vector of ints to tensor, append tensor to tensor row
+  /// \param[in] json_obj Json object containing multi-dimensional label
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load string array into a tensor, append tensor to tensor row
+  /// \param[in] json_obj Json object containing string tensor
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load string into a tensor, append tensor to tensor row
+  /// \param[in] json_obj Json object containing string tensor
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load float value to tensor row
+  /// \param[in] json_obj Json object containing float
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load emtpy tensor to tensor row
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadEmptyTensor(uint32_t col_num, TensorRow *row);
+
+  /// \brief Load id from file name to tensor row
+  /// \param[in] file The file name to get ID from
+  /// \param[in] col_num Column num in schema
+  /// \param[inout] row Tensor row to push to
+  /// \return Status The error code return
+  Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row);
+
+  /// \brief Load a tensor row according to a json file
+  /// \param[in] ImageColumns file Json file location
+  /// \param[inout] TensorRow row Json content stored into a tensor row
+  /// \return Status The error code return
+  Status LoadTensorRow(const std::string &file, TensorRow *row);
+
+  /// \param[in] const std::vector<int64_t> &keys Keys in ioblock
+  /// \param[inout] std::unique_ptr<DataBuffer> db Databuffer to push to
+  /// \return Status The error code return
+  Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db);
+
+  /// \brief Called first when function is called
+  /// \return The error code return
+  Status LaunchThreadsAndInitOp();
+
+  /// \brief reset Op
+  /// \return Status The error code return
+  Status Reset() override;
+
+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
+  int32_t rows_per_buffer_;
+  std::string folder_path_;  // directory of image folder
+  bool decode_;
+  std::set<std::string> extensions_;  // extensions allowed
+  std::unordered_map<std::string, int32_t> col_name_map_;
+  std::unique_ptr<DataSchema> data_schema_;
+  std::shared_ptr<Sampler> sampler_;
+  int64_t row_cnt_;
+  int64_t buf_cnt_;
+  int64_t sampler_ind_;
+  int64_t dirname_offset_;
+  WaitPost wp_;
+  std::vector<std::string> image_rows_;
+  QueueList<std::unique_ptr<IOBlock>> io_block_queues_;  // queues of IOBlocks
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
@@ -134,7 +134,6 @@ Status ImageFolderOp::operator()() {
      TensorRow sample_row;
      RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
      std::shared_ptr<Tensor> sample_ids = sample_row[0];
-      if (sample_ids->type() != DataType(DataType::DE_INT64)) RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64");
      for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) {
        if ((*itr) >= num_rows_) continue;  // index out of bound, skipping
        keys.push_back(*itr);

--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
@@ -30,6 +30,7 @@
 #include "minddata/dataset/engine/datasetops/repeat_op.h"
 #include "minddata/dataset/engine/datasetops/skip_op.h"
 #include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/coco_op.h"
@@ -199,6 +200,11 @@ Status NodePass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified)
  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }

+Status NodePass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
 Status NodePass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
  // Fallback to base class visitor by default
  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);

--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
@@ -49,6 +49,8 @@ class FilterOp;
 class GeneratorOp;
 #endif

+class AlbumOp;
+
 class RandomDataOp;

 class RepeatOp;
@@ -178,6 +180,8 @@ class NodePass : public Pass {

  virtual Status RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified);

+  virtual Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified);
+
  virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified);

  virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified);

--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
@@ -21,6 +21,7 @@
 #include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
 #include "minddata/dataset/engine/datasetops/cache_merge_op.h"
 #include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/coco_op.h"
@@ -152,6 +153,11 @@ Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<ImageFolderOp> n
  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
 }

+// Perform leaf node cache transform identification
+Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
 // Perform leaf node cache transform identification
 Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) {
  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));

--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
@@ -79,6 +79,12 @@ class CacheTransformPass : public TreePass {
    /// \return Status The error code return
    Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;

+    /// \brief Perform leaf node cache tranform identifications
+    /// \param[in] node The node being visited
+    /// \param[inout] modified Indicator if the node was changed at all
+    /// \return Status The error code return
+    Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) override;
+
    /// \brief Perform leaf node cache tranform identifications
    /// \param[in] node The node being visited
    /// \param[inout] modified Indicator if the node was changed at all

--- a/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
@@ -111,5 +111,11 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modifie
  std::cout << "Visiting ImageFolderOp" << '\n';
  return Status::OK();
 }
+
+Status PrinterPass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  *modified = false;
+  std::cout << "Visiting ImageFolderOp" << '\n';
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
@@ -58,6 +58,8 @@ class PrinterPass : public NodePass {
  Status RunOnNode(std::shared_ptr<DeviceQueueOp> node, bool *modified) override;

  Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;
+
+  Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) override;
 };

 }  // namespace dataset

--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@@ -48,6 +48,7 @@ class TensorOperation;
 class SchemaObj;
 class SamplerObj;
 // Datasets classes (in alphabetical order)
+class AlbumDataset;
 class CelebADataset;
 class Cifar10Dataset;
 class Cifar100Dataset;
@@ -79,13 +80,27 @@ class ZipDataset;
 /// \return Shared pointer to the current schema
 std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");

+/// \brief Function to create an AlbumDataset
+/// \notes The generated dataset is specified through setting a schema
+/// \param[in] dataset_dir Path to the root directory that contains the dataset
+/// \param[in] data_schema Path to dataset schema file
+/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
+///     (default = {})
+/// \param[in] decode the option to decode the images in dataset (default = false)
+/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
+///     A `RandomSampler` will be used to randomly iterate the entire dataset (default = nullptr)
+/// \return Shared pointer to the current Dataset
+std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
+                                    const std::vector<std::string> &column_names = {}, bool decode = false,
+                                    const std::shared_ptr<SamplerObj> &sampler = nullptr);
+
 /// \brief Function to create a CelebADataset
 /// \notes The generated dataset has two columns ['image', 'attr'].
-//     The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
+//      The type of the image tensor is uint8. The attr tensor is uint32 and one hot type.
 /// \param[in] dataset_dir Path to the root directory that contains the dataset.
 /// \param[in] dataset_type One of 'all', 'train', 'valid' or 'test'.
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \param[in] decode Decode the images after reading (default=false).
 /// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
 /// \return Shared pointer to the current Dataset
@@ -97,7 +112,7 @@ std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std:
 /// \notes The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir,
                                        const std::shared_ptr<SamplerObj> &sampler = nullptr);
@@ -106,7 +121,7 @@ std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir,
 /// \notes The generated dataset has three columns ['image', 'coarse_label', 'fine_label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
                                          const std::shared_ptr<SamplerObj> &sampler = nullptr);
@@ -114,19 +129,19 @@ std::shared_ptr<Cifar100Dataset> Cifar100(const std::string &dataset_dir,
 /// \brief Function to create a CLUEDataset
 /// \notes The generated dataset has a variable number of columns depending on the task and usage
 /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///    will be sorted in a lexicographical order.
+///     will be sorted in a lexicographical order.
 /// \param[in] task The kind of task, one of "AFQMC", "TNEWS", "IFLYTEK", "CMNLI", "WSC" and "CSL" (default="AFQMC").
 /// \param[in] usage Be used to "train", "test" or "eval" data (default="train").
 /// \param[in] num_samples The number of samples to be included in the dataset.
-///    (Default = 0 means all samples.)
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode::kGlobal)
-///    Can be any of:
-///    ShuffleMode::kFalse - No shuffling is performed.
-///    ShuffleMode::kFiles - Shuffle files only.
-///    ShuffleMode::kGlobal - Shuffle both the files and samples.
+///     (Default = 0 means all samples.)
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
+///     Can be any of:
+///     ShuffleMode.kFalse - No shuffling is performed.
+///     ShuffleMode.kFiles - Shuffle files only.
+///     ShuffleMode.kGlobal - Shuffle both the files and samples.
 /// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
 /// \param[in] shard_id The shard ID within num_shards. This argument should be
-///    specified only when num_shards is also specified. (Default = 0)
+///     specified only when num_shards is also specified. (Default = 0)
 /// \return Shared pointer to the current CLUEDataset
 std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files, const std::string &task = "AFQMC",
                                  const std::string &usage = "train", int64_t num_samples = 0,
@@ -135,19 +150,19 @@ std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files,

 /// \brief Function to create a CocoDataset
 /// \notes The generated dataset has multi-columns :
-///        - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
-///                                     ['iscrowd', dtype=uint32]].
-///        - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]].
-///        - task='Keypoint', column: [['image', dtype=uint8], ['keypoints', dtype=float32],
-///                                    ['num_keypoints', dtype=uint32]].
-///        - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
-///                                    ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
+///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
+///                                  ['iscrowd', dtype=uint32]].
+///     - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]].
+///     - task='Keypoint', column: [['image', dtype=uint8], ['keypoints', dtype=float32],
+///                                 ['num_keypoints', dtype=uint32]].
+///     - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32],
+///                                 ['iscrowd', dtype=uint32], ['area', dtype=uitn32]].
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] annotation_file Path to the annotation json
 /// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'
 /// \param[in] decode Decode the images after reading
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
                                  const std::string &task = "Detection", const bool &decode = false,
@@ -181,12 +196,12 @@ std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, c

 /// \brief Function to create an ImageFolderDataset
 /// \notes A source dataset that reads images from a tree of directories
-///    All images within one folder have the same label
-///    The generated dataset has two columns ['image', 'label']
+///     All images within one folder have the same label
+///     The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] decode A flag to decode in ImageFolder
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+///     A `RandomSampler` will be used to randomly iterate the entire dataset
 /// \param[in] extensions File extensions to be read
 /// \param[in] class_indexing a class name to label map
 /// \return Shared pointer to the current ImageFolderDataset
@@ -200,9 +215,9 @@ std::shared_ptr<ImageFolderDataset> ImageFolder(const std::string &dataset_dir,
 /// \param[in] dataset_file The dataset file to be read
 /// \param[in] usage Need "train", "eval" or "inference" data (default="train")
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+///     A `RandomSampler` will be used to randomly iterate the entire dataset
 /// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder
-///    names will be sorted alphabetically and each class will be given a unique index starting from 0).
+///     names will be sorted alphabetically and each class will be given a unique index starting from 0).
 /// \param[in] decode Decode the images after reading (default=false).
 /// \return Shared pointer to the current ManifestDataset
 std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string usage = "train",
@@ -214,7 +229,7 @@ std::shared_ptr<ManifestDataset> Manifest(std::string dataset_file, std::string
 /// \notes The generated dataset has two columns ['image', 'label']
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
-///    A `RandomSampler` will be used to randomly iterate the entire dataset
+///     A `RandomSampler` will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current MnistDataset
 std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir,
                                    const std::shared_ptr<SamplerObj> &sampler = nullptr);
@@ -245,17 +260,17 @@ std::shared_ptr<RandomDataset> RandomData(const int32_t &total_rows = 0, T schem
 /// \brief Function to create a TextFileDataset
 /// \notes The generated dataset has one column ['text']
 /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list
-///    will be sorted in a lexicographical order.
+///     will be sorted in a lexicographical order.
 /// \param[in] num_samples The number of samples to be included in the dataset.
-///    (Default = 0 means all samples.)
-/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode::kGlobal)
-///    Can be any of:
-///    ShuffleMode::kFalse - No shuffling is performed.
-///    ShuffleMode::kFiles - Shuffle files only.
-///    ShuffleMode::kGlobal - Shuffle both the files and samples.
+///     (Default = 0 means all samples.)
+/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal)
+///     Can be any of:
+///     ShuffleMode.kFalse - No shuffling is performed.
+///     ShuffleMode.kFiles - Shuffle files only.
+///     ShuffleMode.kGlobal - Shuffle both the files and samples.
 /// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1)
 /// \param[in] shard_id The shard ID within num_shards. This argument should be
-///    specified only when num_shards is also specified. (Default = 0)
+///     specified only when num_shards is also specified. (Default = 0)
 /// \return Shared pointer to the current TextFileDataset
 std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
                                          ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
@@ -263,16 +278,16 @@ std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &datase

 /// \brief Function to create a VOCDataset
 /// \notes The generated dataset has multi-columns :
-///        - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
-///                                     ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
-///        - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]].
+///     - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32],
+///                                  ['difficult', dtype=uint32], ['truncate', dtype=uint32]].
+///     - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]].
 /// \param[in] dataset_dir Path to the root directory that contains the dataset
 /// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection"
 /// \param[in] mode Set the data list txt file to be readed
 /// \param[in] class_indexing A str-to-int mapping from label name to index
 /// \param[in] decode Decode the images after reading
 /// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
-///    will be used to randomly iterate the entire dataset
+///     will be used to randomly iterate the entire dataset
 /// \return Shared pointer to the current Dataset
 std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
                                const std::string &mode = "train",
@@ -335,9 +350,9 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
  /// \notes Combines batch_size number of consecutive rows into batches
  /// \param[in] batch_size Path to the root directory that contains the dataset
  /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
-  ///    batch. If true, and if there are less than batch_size rows
-  ///    available to make the last batch, then those rows will
-  ///    be dropped and not propagated to the next node
+  ///     batch. If true, and if there are less than batch_size rows
+  ///     available to make the last batch, then those rows will
+  ///     be dropped and not propagated to the next node
  /// \return Shared pointer to the current BatchDataset
  std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);

@@ -368,16 +383,16 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
  /// \brief Function to create a MapDataset
  /// \notes Applies each operation in operations to this dataset
  /// \param[in] operations Vector of operations to be applied on the dataset. Operations are
-  ///    applied in the order they appear in this list
+  ///     applied in the order they appear in this list
  /// \param[in] input_columns Vector of the names of the columns that will be passed to the first
-  ///    operation as input. The size of this list must match the number of
-  ///    input columns expected by the first operator. The default input_columns
-  ///    is the first column
+  ///     operation as input. The size of this list must match the number of
+  ///     input columns expected by the first operator. The default input_columns
+  ///     is the first column
  /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
-  ///    This parameter is mandatory if len(input_columns) != len(output_columns)
-  ///    The size of this list must match the number of output columns of the
-  ///    last operation. The default output_columns will have the same
-  ///    name as the input columns, i.e., the columns will be replaced
+  ///     This parameter is mandatory if len(input_columns) != len(output_columns)
+  ///     The size of this list must match the number of output columns of the
+  ///     last operation. The default output_columns will have the same
+  ///     name as the input columns, i.e., the columns will be replaced
  /// \param[in] project_columns A list of column names to project
  /// \return Shared pointer to the current MapDataset
  std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
@@ -404,7 +419,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
  /// \param[in] count Number of times the dataset should be repeated
  /// \return Shared pointer to the current Dataset
  /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
-  ///    due to a limitation in the current implementation
+  ///     due to a limitation in the current implementation
  std::shared_ptr<Dataset> Repeat(int32_t count = -1);

  /// \brief Function to create a Shuffle Dataset
@@ -506,6 +521,31 @@ class SchemaObj {
 // DERIVED DATASET CLASSES FOR LEAF-NODE DATASETS
 // (In alphabetical order)

+class AlbumDataset : public Dataset {
+ public:
+  /// \brief Constructor
+  AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
+               const std::vector<std::string> &column_names, bool decode, const std::shared_ptr<SamplerObj> &sampler);
+
+  /// \brief Destructor
+  ~AlbumDataset() = default;
+
+  /// \brief a base class override function to create a runtime dataset op object from this class
+  /// \return shared pointer to the newly created DatasetOp
+  std::vector<std::shared_ptr<DatasetOp>> Build() override;
+
+  /// \brief Parameters validation
+  /// \return bool true if all the params are valid
+  bool ValidateParams() override;
+
+ private:
+  std::string dataset_dir_;
+  std::string schema_path_;
+  std::vector<std::string> column_names_;
+  bool decode_;
+  std::shared_ptr<SamplerObj> sampler_;
+};
+
 class CelebADataset : public Dataset {
 public:
  /// \brief Constructor

--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -5,6 +5,7 @@ SET(DE_UT_SRCS
        common/cvop_common.cc
        common/bboxop_common.cc
        auto_contrast_op_test.cc
+	album_op_test.cc
        batch_op_test.cc
        bit_functions_test.cc
        storage_container_test.cc
@@ -101,6 +102,7 @@ SET(DE_UT_SRCS
        c_api_samplers_test.cc
        c_api_transforms_test.cc
        c_api_dataset_ops_test.cc
+        c_api_dataset_album_test.cc
        c_api_dataset_cifar_test.cc
        c_api_dataset_clue_test.cc
        c_api_dataset_coco_test.cc

--- a/tests/ut/cpp/dataset/album_op_test.cc
+++ b/tests/ut/cpp/dataset/album_op_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+#include "common/common.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "securec.h"
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/include/transforms.h"
+
+using namespace mindspore::dataset;
+using mindspore::MsLogLevel::ERROR;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+
+std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
+
+std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
+
+std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+
+std::shared_ptr<AlbumOp> Album(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                           bool shuf = false, std::unique_ptr<Sampler> sampler = nullptr,
+                                           bool decode = false) {
+  std::shared_ptr<AlbumOp> so;
+  AlbumOp::Builder builder;
+  Status rc = builder.SetNumWorkers(num_works)
+                     .SetAlbumDir(path)
+                     .SetRowsPerBuffer(rows)
+                     .SetOpConnectorSize(conns)
+                     .SetExtensions({".json"})
+                     .SetSampler(std::move(sampler))
+                     .SetDecode(decode)
+                     .Build(&so);
+  return so;
+}
+
+std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                       std::string schema_file, std::vector<std::string> column_names = {},
+                                       bool shuf = false, std::unique_ptr<Sampler> sampler = nullptr,
+                                       bool decode = false) {
+  std::shared_ptr<AlbumOp> so;
+  AlbumOp::Builder builder;
+  Status rc = builder.SetNumWorkers(num_works)
+    .SetSchemaFile(schema_file)
+    .SetColumnsToLoad(column_names)
+    .SetAlbumDir(path)
+    .SetRowsPerBuffer(rows)
+    .SetOpConnectorSize(conns)
+    .SetExtensions({".json"})
+    .SetSampler(std::move(sampler))
+    .SetDecode(decode)
+    .Build(&so);
+  return so;
+}
+
+class MindDataTestAlbum : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file, column_names, false), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " <<  ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"] << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"] << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  // add the priority column
+  std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    double priority = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      tensor_map["_priority"]->GetItemAt<double>(&priority, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"]  << "priority: " << priority << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  // add the priority column
+  std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    double priority = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      tensor_map["_priority"]->GetItemAt<double>(&priority, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"]  << "priority: " << priority << " embedding : " <<
+		    tensor_map["_embedding"]->shape() << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
--- a/tests/ut/cpp/dataset/c_api_dataset_album_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_album_test.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/common.h"
+#include "minddata/dataset/include/datasets.h"
+
+using namespace mindspore::dataset::api;
+using mindspore::dataset::Tensor;
+
+class MindDataTestPipeline : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestPipeline, TestAlbumBasic) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumBasic.";
+
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumDecode) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumDecode.";
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    auto shape = image->shape();
+    MS_LOG(INFO) << "Tensor image shape size: " << shape.Size();
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    EXPECT_GT(shape.Size(), 1); // Verify decode=true took effect
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumNumSamplers) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumNumSamplers.";
+  
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(0, 1));
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 1);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumError) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumError.";
+  std::string folder_path = datasets_root_path_ + "/testAlbum/ima";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(0, 1));
+
+  EXPECT_EQ(ds, nullptr);
+}
--- a/tests/ut/cpp/runtest.sh
+++ b/tests/ut/cpp/runtest.sh
@@ -32,6 +32,8 @@ export GLOG_v=2

 ## prepare data for dataset & mindrecord
 cp -fr $PROJECT_PATH/tests/ut/data ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/
+## prepare album dataset, uses absolute path so has to be generated
+python ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/data/dataset/testAlbum/gen_json.py

 if [ $# -gt 0 ]; then 
  ./ut_tests --gtest_filter=$1

--- a/tests/ut/data/dataset/testAlbum/bin/sample.bin
+++ b/tests/ut/data/dataset/testAlbum/bin/sample.bin
+just some random stuff
--- a/tests/ut/data/dataset/testAlbum/datasetSchema.json
+++ b/tests/ut/data/dataset/testAlbum/datasetSchema.json
+{
+  "columns": {
+    "image": {
+      "type": "uint8",
+      "rank": 1
+    },
+    "label" : {
+      "type": "string",
+      "rank": 1
+    },
+    "id" : {
+      "type": "int64",
+      "rank": 0
+    }
+  }
+}
--- a/tests/ut/data/dataset/testAlbum/floatSchema.json
+++ b/tests/ut/data/dataset/testAlbum/floatSchema.json
@@ -5,7 +5,7 @@
      "rank": 1
    },
    "label" : {
-      "type": "int32",
+      "type": "string",
      "rank": 1
    },
    "id" : {

--- a/tests/ut/data/dataset/testAlbum/fullSchema.json
+++ b/tests/ut/data/dataset/testAlbum/fullSchema.json
@@ -5,7 +5,7 @@
      "rank": 1
    },
    "label" : {
-      "type": "int32",
+      "type": "string",
      "rank": 1
    },
    "id" : {

--- a/tests/ut/data/dataset/testAlbum/gen_json.py
+++ b/tests/ut/data/dataset/testAlbum/gen_json.py
@@ -2,21 +2,21 @@ import json
 import os

 def dump_json_from_dict(structure, file_name):
-    with open(file_name + '.json', 'w') as file_path:
-        json.dump(structure, file_path)
+    with open(file_name + '.json', 'w') as fp:
+        json.dump(structure, fp)

 if __name__ == '__main__':
-    # iterate over directory
-    DIRECTORY = "imagefolder"
-    i = 0
+    # iterate over DIRECTORY
+    DIRECTORY = os.path.dirname(os.path.realpath(__file__)) + "/original"
+    PARENT_DIR = os.path.dirname(DIRECTORY)
+    i = -1
    for filename in os.listdir(DIRECTORY):
        default_dict = {}
        default_dict.update(dataset='')
-        default_dict.update(image=(os.path.join(DIRECTORY, filename)))
-        default_dict.update(label=[1, 2])
+        default_dict.update(image=os.path.abspath(os.path.join(DIRECTORY, filename)))
+        default_dict.update(label=['3', '2'])
        default_dict.update(_priority=0.8)
-        default_dict.update(_embedding='sample.bin')
-        default_dict.update(_segmented_image=(os.path.join(DIRECTORY, filename)))
-        default_dict.update(_processed_image=(os.path.join(DIRECTORY, filename)))
+        default_dict.update(_embedding=os.path.abspath(os.path.join(PARENT_DIR, 'sample.bin')))
+        default_dict.update(_processed_image=os.path.abspath(os.path.join(DIRECTORY, filename)))
        i = i + 1
-        dump_json_from_dict(default_dict, 'images/'+str(i))
+        dump_json_from_dict(default_dict, PARENT_DIR + '/images/'+str(i))
--- a/tests/ut/data/dataset/testAlbum/images/0.json
+++ b/tests/ut/data/dataset/testAlbum/images/0.json
+{"dataset": "", "image": "original/apple_expect_decoded.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "sample.bin", "_processed_image": "original/apple_expect_decoded.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/1.json
+++ b/tests/ut/data/dataset/testAlbum/images/1.json
-{"dataset": "", "image": "imagefolder/apple_expect_decoded.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_decoded.jpg", "_processed_image": "imagefolder/apple_expect_decoded.jpg"}
\ No newline at end of file
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_resize_bilinear.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_resize_bilinear.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/2.json
+++ b/tests/ut/data/dataset/testAlbum/images/2.json
-{"dataset": "", "image": "imagefolder/apple_expect_resize_bilinear.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_resize_bilinear.jpg", "_processed_image": "imagefolder/apple_expect_resize_bilinear.jpg"}
\ No newline at end of file
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_changemode.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_changemode.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/3.json
+++ b/tests/ut/data/dataset/testAlbum/images/3.json
-{"dataset": "", "image": "imagefolder/apple_expect_changemode.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_changemode.jpg", "_processed_image": "imagefolder/apple_expect_changemode.jpg"}
\ No newline at end of file
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_not_flip.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_not_flip.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/4.json
+++ b/tests/ut/data/dataset/testAlbum/images/4.json
-{"dataset": "", "image": "imagefolder/apple_expect_not_flip.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_not_flip.jpg", "_processed_image": "imagefolder/apple_expect_not_flip.jpg"}
\ No newline at end of file
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_flipped_horizontal.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_flipped_horizontal.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/5.json
+++ b/tests/ut/data/dataset/testAlbum/images/5.json
-{"dataset": "", "image": "imagefolder/apple_expect_flipped_horizontal.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_horizontal.jpg", "_processed_image": "imagefolder/apple_expect_flipped_horizontal.jpg"}
\ No newline at end of file
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_rescaled.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_rescaled.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/6.json
+++ b/tests/ut/data/dataset/testAlbum/images/6.json
-{"dataset": "", "image": "imagefolder/apple_expect_rescaled.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_rescaled.jpg", "_processed_image": "imagefolder/apple_expect_rescaled.jpg"}
\ No newline at end of file
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_flipped_vertical.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_flipped_vertical.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/7.json
+++ b/tests/ut/data/dataset/testAlbum/images/7.json
-{"dataset": "", "image": "imagefolder/apple_expect_flipped_vertical.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_vertical.jpg", "_processed_image": "imagefolder/apple_expect_flipped_vertical.jpg"}
\ No newline at end of file
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_changemode.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_changemode.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_decoded.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_decoded.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_horizontal.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_horizontal.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_vertical.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_vertical.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_not_flip.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_not_flip.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_rescaled.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_rescaled.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_resize_bilinear.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_resize_bilinear.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_changemode.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_changemode.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_decoded.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_decoded.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_horizontal.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_horizontal.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_vertical.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_flipped_vertical.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_not_flip.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_not_flip.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_rescaled.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_rescaled.jpg
--- a/tests/ut/data/dataset/testAlbum/processed/apple_expect_resize_bilinear.jpg
+++ b/tests/ut/data/dataset/testAlbum/processed/apple_expect_resize_bilinear.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_changemode.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_changemode.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_decoded.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_decoded.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_horizontal.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_horizontal.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_vertical.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_flipped_vertical.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_not_flip.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_not_flip.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_rescaled.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_rescaled.jpg
--- a/tests/ut/data/dataset/testAlbum/segmented/apple_expect_resize_bilinear.jpg
+++ b/tests/ut/data/dataset/testAlbum/segmented/apple_expect_resize_bilinear.jpg