提交 bd5a777f 编写于 作者: E ervinzhang

introducing new C++ API

上级 2f565f4c
......@@ -17,6 +17,10 @@ else()
set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
endif()
if (ENABLE_PYTHON)
add_compile_definitions(ENABLE_PYTHON)
endif()
set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")
......
......@@ -25,7 +25,7 @@ usage()
echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
echo ""
echo "Options:"
echo " -d Debug mode"
......@@ -56,6 +56,7 @@ usage()
echo " -s Enable serving module, default off"
echo " -B Enable debugger, default off"
echo " -E Enable IBVERBS for parameter server, default off"
echo " -l Compile with python dependency, default on"
}
# check value of input is 'on' or 'off'
......@@ -98,9 +99,10 @@ checkopts()
ENABLE_SERVING="off"
ENABLE_DEBUGGER="off"
ENABLE_IBVERBS="off"
ENABLE_PYTHON="on"
# Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in
......@@ -151,6 +153,10 @@ checkopts()
check_on_off $OPTARG p
ENABLE_PROFILE="$OPTARG"
;;
l)
check_on_off $OPTARG l
ENABLE_PYTHON="$OPTARG"
;;
i)
INC_BUILD="on"
;;
......@@ -316,6 +322,7 @@ build_mindspore()
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
fi
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
if [[ "X$ENABLE_MPI" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
fi
......
......@@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
option(ENABLE_AKG "enable akg" OFF)
option(ENABLE_DEBUGGER "enable debugger" OFF)
option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
option(ENABLE_PYTHON "Enable python" ON)
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (WIN32)
......
......@@ -39,6 +39,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform)
include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h
include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include)
include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/dataset/include)
######################################################################
####################### Flags ########################################
......@@ -67,7 +68,10 @@ add_dependencies(engine-gnn core)
add_dependencies(engine core)
add_dependencies(text core)
add_dependencies(text-kernels core)
add_dependencies(APItoPython core)
add_dependencies(cpp-API core)
if (ENABLE_PYTHON)
add_dependencies(APItoPython core)
endif()
if (ENABLE_TDTQUE)
add_dependencies(engine-tdt core)
endif ()
......@@ -78,7 +82,7 @@ set(submodules
$<TARGET_OBJECTS:kernels>
$<TARGET_OBJECTS:kernels-image>
$<TARGET_OBJECTS:kernels-data>
$<TARGET_OBJECTS:APItoPython>
$<TARGET_OBJECTS:cpp-API>
$<TARGET_OBJECTS:engine-datasetops-source>
$<TARGET_OBJECTS:engine-datasetops-source-sampler>
$<TARGET_OBJECTS:engine-gnn>
......@@ -90,6 +94,12 @@ set(submodules
$<TARGET_OBJECTS:text-kernels>
)
if (ENABLE_PYTHON)
set(submodules
${submodules}
$<TARGET_OBJECTS:APItoPython>)
endif()
if (ENABLE_TDTQUE)
add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>)
else ()
......
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(APItoPython OBJECT
de_pipeline.cc
python_bindings.cc
if (ENABLE_PYTHON)
add_library(APItoPython OBJECT
de_pipeline.cc
python_bindings.cc
)
target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
endif()
add_library(cpp-API OBJECT
datasets.cc
iterator.cc
transforms.cc
samplers.cc
)
target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include "dataset/include/datasets.h"
#include "dataset/include/transforms.h"
#include "dataset/include/samplers.h"
#include "dataset/engine/dataset_iterator.h"
#include "dataset/engine/datasetops/source/image_folder_op.h"
#include "dataset/engine/datasetops/source/mnist_op.h"
#include "dataset/engine/datasetops/source/cifar_op.h"
#include "dataset/engine/datasetops/batch_op.h"
#include "dataset/engine/datasetops/map_op.h"
#include "dataset/engine/datasetops/repeat_op.h"
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/engine/datasetops/project_op.h"
#include "dataset/engine/datasetops/source/sampler/sampler.h"
#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
#include "dataset/core/config_manager.h"
#include "dataset/util/random.h"
namespace mindspore {
namespace dataset {
namespace api {
#define RETURN_NULL_IF_ERROR(_s) \
do { \
Status __rc = (_s); \
if (__rc.IsError()) { \
return nullptr; \
} \
} while (false)
// Function to create the iterator, which will build and launch the execution tree.
std::shared_ptr<Iterator> Dataset::CreateIterator() {
std::shared_ptr<Iterator> iter;
try {
iter = std::make_shared<Iterator>();
Status rc = iter->BuildAndLaunchTree(shared_from_this());
if (rc.IsError()) {
MS_LOG(ERROR) << "CreateIterator failed.";
return nullptr;
}
return iter;
} catch (const std::exception &err) {
MS_LOG(ERROR) << "CreateIterator: Iterator exception caught: " << err.what();
return nullptr;
}
return iter;
}
// Constructor
Dataset::Dataset() {
// Fetch some default value from config manager
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
num_workers_ = cfg->num_parallel_workers();
rows_per_buffer_ = cfg->rows_per_buffer();
connector_que_size_ = cfg->op_connector_size();
}
// Function to create a ImageFolderDataset.
std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode,
std::shared_ptr<SamplerObj> sampler, std::set<std::string> extensions,
std::map<std::string, int32_t> class_indexing) {
// This arg is exist in ImageFolderOp, but not externalized (in Python API). The default value is false.
bool recursive = false;
// Create logical representation of ImageFolderDataset.
auto ds = std::make_shared<ImageFolderDataset>(dataset_dir, decode, sampler, recursive, extensions, class_indexing);
// Call derived class validation method.
return ds->ValidateParams() ? ds : nullptr;
}
// Function to create a MnistDataset.
std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler) {
auto ds = std::make_shared<MnistDataset>(dataset_dir, sampler);
// Call derived class validation method.
return ds->ValidateParams() ? ds : nullptr;
}
// Function to create a Cifar10Dataset.
std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
std::shared_ptr<SamplerObj> sampler) {
auto ds = std::make_shared<Cifar10Dataset>(dataset_dir, num_samples, sampler);
// Call derived class validation method.
return ds->ValidateParams() ? ds : nullptr;
}
// Function to create a Batch dataset
std::shared_ptr<BatchDataset> Dataset::Batch(int32_t batch_size, bool drop_remainder) {
// Default values
std::vector<std::string> cols_to_map = {};
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map;
bool pad = false;
auto ds = std::make_shared<BatchDataset>(batch_size, drop_remainder, pad, cols_to_map, pad_map);
if (!ds->ValidateParams()) {
return nullptr;
}
ds->children.push_back(shared_from_this());
return ds;
}
// Function to create Repeat dataset.
std::shared_ptr<Dataset> Dataset::Repeat(int32_t count) {
// Workaround for repeat == 1, do not inject repeat.
if (count == 1) {
return shared_from_this();
}
auto ds = std::make_shared<RepeatDataset>(count);
if (!ds->ValidateParams()) {
return nullptr;
}
ds->children.push_back(shared_from_this());
return ds;
}
// Function to create a Map dataset.
std::shared_ptr<MapDataset> Dataset::Map(std::vector<std::shared_ptr<TensorOperation>> operations,
std::vector<std::string> input_columns,
std::vector<std::string> output_columns,
const std::vector<std::string> &project_columns) {
auto ds = std::make_shared<MapDataset>(operations, input_columns, output_columns, project_columns);
if (!ds->ValidateParams()) {
return nullptr;
}
ds->children.push_back(shared_from_this());
return ds;
}
// Function to create a ShuffleOp
std::shared_ptr<ShuffleDataset> Dataset::Shuffle(int32_t shuffle_size) {
// Pass in reshuffle_each_epoch with true
auto ds = std::make_shared<ShuffleDataset>(shuffle_size, true);
if (!ds->ValidateParams()) {
return nullptr;
}
ds->children.push_back(shared_from_this());
return ds;
}
// Function to create a ProjectDataset.
std::shared_ptr<ProjectDataset> Dataset::Project(const std::vector<std::string> &columns) {
auto ds = std::make_shared<ProjectDataset>(columns);
// Call derived class validation method.
if (!ds->ValidateParams()) {
return nullptr;
}
ds->children.push_back(shared_from_this());
return ds;
}
// Helper function to create default RandomSampler.
std::shared_ptr<SamplerObj> CreateDefaultSampler() {
int32_t num_samples = 0; // 0 means to sample all ids.
bool replacement = false;
return std::make_shared<RandomSamplerObj>(replacement, num_samples);
}
/* ####################################### Derived Dataset classes ################################# */
ImageFolderDataset::ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
bool recursive, std::set<std::string> extensions,
std::map<std::string, int32_t> class_indexing)
: dataset_dir_(dataset_dir),
decode_(decode),
sampler_(sampler),
recursive_(recursive),
class_indexing_(class_indexing),
exts_(extensions) {}
bool ImageFolderDataset::ValidateParams() {
if (dataset_dir_.empty()) {
MS_LOG(ERROR) << "No dataset path is specified.";
return false;
}
return true;
}
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ImageFolderDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
TensorShape scalar = TensorShape::CreateScalar();
RETURN_NULL_IF_ERROR(
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
RETURN_NULL_IF_ERROR(
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
recursive_, decode_, exts_, class_indexing_, std::move(schema),
std::move(sampler_->Build())));
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
MnistDataset::MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler)
: dataset_dir_(dataset_dir), sampler_(sampler) {}
bool MnistDataset::ValidateParams() {
if (dataset_dir_.empty()) {
MS_LOG(ERROR) << "No dataset path is specified.";
return false;
}
return true;
}
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MnistDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
auto schema = std::make_unique<DataSchema>();
RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
TensorShape scalar = TensorShape::CreateScalar();
RETURN_NULL_IF_ERROR(
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
node_ops.push_back(std::make_shared<MnistOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
std::move(schema), std::move(sampler_->Build())));
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
BatchDataset::BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map)
: batch_size_(batch_size),
drop_remainder_(drop_remainder),
pad_(pad),
cols_to_map_(cols_to_map),
pad_map_(pad_map) {}
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> BatchDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
#ifdef ENABLE_PYTHON
py::function noop;
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
cols_to_map_, noop, noop, pad_map_));
#else
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
cols_to_map_, pad_map_));
#endif
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
bool BatchDataset::ValidateParams() {
if (batch_size_ <= 0) {
return false;
}
return true;
}
RepeatDataset::RepeatDataset(uint32_t count) : repeat_count_(count) {}
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> RepeatDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
node_ops.push_back(std::make_shared<RepeatOp>(repeat_count_));
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
bool RepeatDataset::ValidateParams() {
if (repeat_count_ <= 0) {
return false;
}
return true;
}
MapDataset::MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns,
std::vector<std::string> output_columns, const std::vector<std::string> &project_columns)
: operations_(operations),
input_columns_(input_columns),
output_columns_(output_columns),
project_columns_(project_columns) {}
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> MapDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// Currently default is true, and this is not exposed to user.
bool perf_mode = true;
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
// Build tensorOp from tensorOperation vector
// This is to ensure each iterator hold its own copy of the tensorOp objects.
(void)std::transform(
operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> operation) -> std::shared_ptr<TensorOp> { return operation->Build(); });
// This parameter will be removed with next rebase
std::vector<std::string> col_orders;
auto map_op =
std::make_shared<MapOp>(input_columns_, output_columns_, tensor_ops, num_workers_, connector_que_size_, perf_mode);
if (!project_columns_.empty()) {
auto project_op = std::make_shared<ProjectOp>(project_columns_);
node_ops.push_back(project_op);
}
node_ops.push_back(map_op);
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
bool MapDataset::ValidateParams() {
if (operations_.empty()) {
return false;
}
return true;
}
// Constructor for ShuffleDataset
ShuffleDataset::ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch)
: shuffle_size_(shuffle_size), shuffle_seed_(GetSeed()), reset_every_epoch_(reset_every_epoch) {}
// Function to build the ShuffleOp
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ShuffleDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
node_ops.push_back(std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_,
rows_per_buffer_));
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
// Function to validate the parameters for ShuffleDataset
bool ShuffleDataset::ValidateParams() {
if (shuffle_size_ <= 1) {
MS_LOG(ERROR) << "ShuffleDataset: Invalid input, shuffle_size: " << shuffle_size_;
return false;
}
return true;
}
// Constructor for Cifar10Dataset
Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler)
: dataset_dir_(dataset_dir), num_samples_(num_samples), sampler_(sampler) {}
bool Cifar10Dataset::ValidateParams() {
if (dataset_dir_.empty()) {
MS_LOG(ERROR) << "No dataset path is specified.";
return false;
}
if (num_samples_ < 0) {
MS_LOG(ERROR) << "Number of samples cannot be negative";
return false;
}
return true;
}
// Function to build CifarOp
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Cifar10Dataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
// If user does not specify Sampler, create a default sampler based on the shuffle variable.
if (sampler_ == nullptr) {
sampler_ = CreateDefaultSampler();
}
// Do internal Schema generation.
auto schema = std::make_unique<DataSchema>();
RETURN_NULL_IF_ERROR(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
TensorShape scalar = TensorShape::CreateScalar();
RETURN_NULL_IF_ERROR(
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
node_ops.push_back(std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, num_workers_, rows_per_buffer_,
dataset_dir_, connector_que_size_, std::move(schema),
std::move(sampler_->Build())));
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
// Function to build ProjectOp
ProjectDataset::ProjectDataset(const std::vector<std::string> &columns) : columns_(columns) {}
bool ProjectDataset::ValidateParams() {
if (columns_.empty()) {
MS_LOG(ERROR) << "No columns are specified.";
return false;
}
return true;
}
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> ProjectDataset::Build() {
// A vector containing shared pointer to the Dataset Ops that this object will create
std::vector<std::shared_ptr<DatasetOp>> node_ops;
node_ops.push_back(std::make_shared<ProjectOp>(columns_));
return std::make_shared<std::vector<std::shared_ptr<DatasetOp>>>(node_ops);
}
} // namespace api
} // namespace dataset
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/include/iterator.h"
#include "dataset/core/client.h"
#include "dataset/include/datasets.h"
namespace mindspore {
namespace dataset {
namespace api {
// Get the next row from the data pipeline.
void Iterator::GetNextRow(TensorMap *row) {
Status rc = iterator_->GetNextAsMap(row);
if (rc.IsError()) {
MS_LOG(ERROR) << "GetNextRow: Failed to get next row.";
row->clear();
}
}
// Shut down the data pipeline.
void Iterator::Stop() {
// Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_.
iterator_.reset();
// Release ownership of tree_ shared pointer. This will decrement the ref count.
tree_.reset();
}
// Function to build and launch the execution tree.
Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) {
// One time init
Status rc;
rc = GlobalInit();
RETURN_IF_NOT_OK(rc);
// Instantiate the execution tree
tree_ = std::make_shared<ExecutionTree>();
// Iterative BFS converting Dataset tree into runtime Execution tree.
std::queue<std::pair<std::shared_ptr<Dataset>, std::shared_ptr<DatasetOp>>> q;
if (ds != nullptr) {
// Convert the current root node.
auto root_op = ds->Build()->front();
RETURN_UNEXPECTED_IF_NULL(root_op);
RETURN_IF_NOT_OK(tree_->AssociateNode(root_op));
q.push(std::make_pair(ds, root_op));
// Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes)
while (!q.empty()) {
auto node_pair = q.front();
q.pop();
// Iterate through all the direct children of the first element in our BFS queue
for (auto child : node_pair.first->children) {
auto child_ops = child->Build();
RETURN_UNEXPECTED_IF_NULL(child_ops);
auto node_op = node_pair.second;
// Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them
// with the execution tree and add the child and parent relationship between the nodes
// Note that some Dataset objects might return more than one DatasetOps
// e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset
for (auto child_op : *child_ops) {
RETURN_IF_NOT_OK(tree_->AssociateNode(child_op));
RETURN_IF_NOT_OK(node_op->AddChild(child_op));
node_op = child_op;
}
// Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current
// execution tree) to the BFS queue
q.push(std::make_pair(child, child_ops->back()));
}
}
RETURN_IF_NOT_OK(tree_->AssignRoot(root_op));
}
// Launch the execution tree.
RETURN_IF_NOT_OK(tree_->Prepare());
RETURN_IF_NOT_OK(tree_->Launch());
iterator_ = std::make_unique<DatasetIterator>(tree_);
RETURN_UNEXPECTED_IF_NULL(iterator_);
return rc;
}
} // namespace api
} // namespace dataset
} // namespace mindspore
......@@ -297,7 +297,7 @@ void bindTensor(py::module *m) {
}))
.def_buffer([](Tensor &tensor) {
py::buffer_info info;
THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
return info;
})
.def("__str__", &Tensor::ToString)
......@@ -311,7 +311,7 @@ void bindTensor(py::module *m) {
return res;
}
py::buffer_info info;
THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t);
});
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/include/samplers.h"
#include "dataset/engine/datasetops/source/sampler/sampler.h"
#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
namespace mindspore {
namespace dataset {
namespace api {
SamplerObj::SamplerObj() {}
/// Function to create a Distributed Sampler.
std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle,
int64_t num_samples, uint32_t seed) {
auto sampler = std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed);
// Input validation
if (!sampler->ValidateParams()) {
return nullptr;
}
return sampler;
}
/// Function to create a PK Sampler.
std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) {
auto sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
// Input validation
if (!sampler->ValidateParams()) {
return nullptr;
}
return sampler;
}
/// Function to create a Random Sampler.
std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement, int64_t num_samples) {
auto sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples);
// Input validation
if (!sampler->ValidateParams()) {
return nullptr;
}
return sampler;
}
/// Function to create a Sequential Sampler.
std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index, int64_t num_samples) {
auto sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
// Input validation
if (!sampler->ValidateParams()) {
return nullptr;
}
return sampler;
}
/// Function to create a Subset Random Sampler.
std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, int64_t num_samples) {
auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
// Input validation
if (!sampler->ValidateParams()) {
return nullptr;
}
return sampler;
}
/// Function to create a Weighted Random Sampler.
std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, int64_t num_samples,
bool replacement) {
auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
// Input validation
if (!sampler->ValidateParams()) {
return nullptr;
}
return sampler;
}
/* ####################################### Derived Sampler classes ################################# */
// DistributedSampler
DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples,
uint32_t seed)
: num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {}
bool DistributedSamplerObj::ValidateParams() {
if (num_shards_ <= 0) {
MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_;
return false;
}
if (shard_id_ < 0 || shard_id_ >= num_shards_) {
MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_;
return false;
}
if (num_samples_ < 0) {
MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_;
return false;
}
return true;
}
std::shared_ptr<Sampler> DistributedSamplerObj::Build() {
return std::make_shared<dataset::DistributedSampler>(num_samples_, num_shards_, shard_id_, shuffle_, seed_);
}
// PKSampler
PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples)
: num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {}
bool PKSamplerObj::ValidateParams() {
if (num_val_ <= 0) {
MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_;
return false;
}
if (num_samples_ < 0) {
MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_;
return false;
}
return true;
}
std::shared_ptr<Sampler> PKSamplerObj::Build() {
return std::make_shared<dataset::PKSampler>(num_samples_, num_val_, shuffle_);
}
// RandomSampler
RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples)
: replacement_(replacement), num_samples_(num_samples) {}
bool RandomSamplerObj::ValidateParams() {
if (num_samples_ < 0) {
MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_;
return false;
}
return true;
}
std::shared_ptr<Sampler> RandomSamplerObj::Build() {
bool reshuffle_each_epoch = true;
auto sampler = std::make_shared<dataset::RandomSampler>(num_samples_, replacement_, reshuffle_each_epoch);
return sampler;
}
// SequentialSampler
SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples)
: start_index_(start_index), num_samples_(num_samples) {}
bool SequentialSamplerObj::ValidateParams() {
if (num_samples_ < 0) {
MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_;
return false;
}
if (start_index_ < 0) {
MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_;
return false;
}
return true;
}
std::shared_ptr<Sampler> SequentialSamplerObj::Build() {
auto sampler = std::make_shared<dataset::SequentialSampler>(num_samples_, start_index_);
return sampler;
}
// SubsetRandomSampler
SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples)
: indices_(indices), num_samples_(num_samples) {}
bool SubsetRandomSamplerObj::ValidateParams() {
if (num_samples_ < 0) {
MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_;
return false;
}
return true;
}
std::shared_ptr<Sampler> SubsetRandomSamplerObj::Build() {
auto sampler = std::make_shared<dataset::SubsetRandomSampler>(num_samples_, indices_);
return sampler;
}
// WeightedRandomSampler
WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples,
bool replacement)
: weights_(weights), num_samples_(num_samples), replacement_(replacement) {}
bool WeightedRandomSamplerObj::ValidateParams() {
if (num_samples_ < 0) {
MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_;
return false;
}
return true;
}
std::shared_ptr<Sampler> WeightedRandomSamplerObj::Build() {
auto sampler = std::make_shared<dataset::WeightedRandomSampler>(num_samples_, weights_, replacement_);
return sampler;
}
} // namespace api
} // namespace dataset
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dataset/include/transforms.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/kernels/image/normalize_op.h"
#include "dataset/kernels/image/decode_op.h"
#include "dataset/kernels/image/resize_op.h"
#include "dataset/kernels/image/random_crop_op.h"
#include "dataset/kernels/image/center_crop_op.h"
#include "dataset/kernels/image/uniform_aug_op.h"
#include "dataset/kernels/image/random_horizontal_flip_op.h"
#include "dataset/kernels/image/random_vertical_flip_op.h"
#include "dataset/kernels/image/random_rotation_op.h"
#include "dataset/kernels/image/cut_out_op.h"
#include "dataset/kernels/image/random_color_adjust_op.h"
#include "dataset/kernels/image/pad_op.h"
namespace mindspore {
namespace dataset {
namespace api {
TensorOperation::TensorOperation() {}
// Transform operations for computer vision.
namespace vision {
// Function to create NormalizeOperation.
std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std) {
auto op = std::make_shared<NormalizeOperation>(mean, std);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create DecodeOperation.
std::shared_ptr<DecodeOperation> Decode(bool rgb) {
auto op = std::make_shared<DecodeOperation>(rgb);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create ResizeOperation.
std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size, InterpolationMode interpolation) {
auto op = std::make_shared<ResizeOperation>(size, interpolation);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create RandomCropOperation.
std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding,
bool pad_if_needed, std::vector<uint8_t> fill_value) {
auto op = std::make_shared<RandomCropOperation>(size, padding, pad_if_needed, fill_value);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create CenterCropOperation.
std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size) {
auto op = std::make_shared<CenterCropOperation>(size);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create UniformAugOperation.
std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
int32_t num_ops) {
auto op = std::make_shared<UniformAugOperation>(operations, num_ops);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create RandomHorizontalFlipOperation.
std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob) {
auto op = std::make_shared<RandomHorizontalFlipOperation>(prob);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create RandomVerticalFlipOperation.
std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob) {
auto op = std::make_shared<RandomVerticalFlipOperation>(prob);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create RandomRotationOperation.
std::shared_ptr<RandomRotationOperation> RandomRotation(std::vector<float> degrees, InterpolationMode resample,
bool expand, std::vector<float> center,
std::vector<uint8_t> fill_value) {
auto op = std::make_shared<RandomRotationOperation>(degrees, resample, expand, center, fill_value);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create PadOperation.
std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value,
BorderType padding_mode) {
auto op = std::make_shared<PadOperation>(padding, fill_value, padding_mode);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create CutOutOp.
std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches) {
auto op = std::make_shared<CutOutOperation>(length, num_patches);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
// Function to create RandomColorAdjustOperation.
std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness,
std::vector<float> contrast,
std::vector<float> saturation, std::vector<float> hue) {
auto op = std::make_shared<RandomColorAdjustOperation>(brightness, contrast, saturation, hue);
// Input validation
if (!op->ValidateParams()) {
return nullptr;
}
return op;
}
/* ####################################### Derived TensorOperation classes ################################# */
// NormalizeOperation
NormalizeOperation::NormalizeOperation(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {}
bool NormalizeOperation::ValidateParams() {
if (mean_.size() != 3) {
MS_LOG(ERROR) << "Normalize: mean vector has incorrect size: " << mean_.size();
return false;
}
if (std_.size() != 3) {
MS_LOG(ERROR) << "Normalize: std vector has incorrect size: " << std_.size();
return false;
}
return true;
}
std::shared_ptr<TensorOp> NormalizeOperation::Build() {
return std::make_shared<NormalizeOp>(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]);
}
// DecodeOperation
DecodeOperation::DecodeOperation(bool rgb) : rgb_(rgb) {}
bool DecodeOperation::ValidateParams() { return true; }
std::shared_ptr<TensorOp> DecodeOperation::Build() { return std::make_shared<DecodeOp>(rgb_); }
// ResizeOperation
ResizeOperation::ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation)
: size_(size), interpolation_(interpolation) {}
bool ResizeOperation::ValidateParams() {
if (size_.empty() || size_.size() > 2) {
MS_LOG(ERROR) << "Resize: size vector has incorrect size: " << size_.size();
return false;
}
return true;
}
std::shared_ptr<TensorOp> ResizeOperation::Build() {
int32_t height = size_[0];
int32_t width = 0;
// User specified the width value.
if (size_.size() == 2) {
width = size_[1];
}
return std::make_shared<ResizeOp>(height, width, interpolation_);
}
// RandomCropOperation
RandomCropOperation::RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding, bool pad_if_needed,
std::vector<uint8_t> fill_value)
: size_(size), padding_(padding), pad_if_needed_(pad_if_needed), fill_value_(fill_value) {}
bool RandomCropOperation::ValidateParams() {
if (size_.empty() || size_.size() > 2) {
MS_LOG(ERROR) << "RandomCrop: size vector has incorrect size: " << size_.size();
return false;
}
if (padding_.empty() || padding_.size() != 4) {
MS_LOG(ERROR) << "RandomCrop: padding vector has incorrect size: padding.size()";
return false;
}
if (fill_value_.empty() || fill_value_.size() != 3) {
MS_LOG(ERROR) << "RandomCrop: fill_value vector has incorrect size: fill_value.size()";
return false;
}
return true;
}
std::shared_ptr<TensorOp> RandomCropOperation::Build() {
int32_t crop_height = size_[0];
int32_t crop_width = 0;
int32_t pad_top = padding_[0];
int32_t pad_bottom = padding_[1];
int32_t pad_left = padding_[2];
int32_t pad_right = padding_[3];
uint8_t fill_r = fill_value_[0];
uint8_t fill_g = fill_value_[1];
uint8_t fill_b = fill_value_[2];
// User has specified the crop_width value.
if (size_.size() == 2) {
crop_width = size_[1];
}
auto tensor_op = std::make_shared<RandomCropOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right,
BorderType::kConstant, pad_if_needed_, fill_r, fill_g, fill_b);
return tensor_op;
}
// CenterCropOperation
CenterCropOperation::CenterCropOperation(std::vector<int32_t> size) : size_(size) {}
bool CenterCropOperation::ValidateParams() {
if (size_.empty() || size_.size() > 2) {
MS_LOG(ERROR) << "CenterCrop: size vector has incorrect size.";
return false;
}
return true;
}
std::shared_ptr<TensorOp> CenterCropOperation::Build() {
int32_t crop_height = size_[0];
int32_t crop_width = 0;
// User has specified crop_width.
if (size_.size() == 2) {
crop_width = size_[1];
}
std::shared_ptr<CenterCropOp> tensor_op = std::make_shared<CenterCropOp>(crop_height, crop_width);
return tensor_op;
}
// UniformAugOperation
UniformAugOperation::UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops)
: operations_(operations), num_ops_(num_ops) {}
bool UniformAugOperation::ValidateParams() { return true; }
std::shared_ptr<TensorOp> UniformAugOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(operations_.begin(), operations_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
std::shared_ptr<UniformAugOp> tensor_op = std::make_shared<UniformAugOp>(tensor_ops, num_ops_);
return tensor_op;
}
// RandomHorizontalFlipOperation
RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability) : probability_(probability) {}
bool RandomHorizontalFlipOperation::ValidateParams() { return true; }
std::shared_ptr<TensorOp> RandomHorizontalFlipOperation::Build() {
std::shared_ptr<RandomHorizontalFlipOp> tensor_op = std::make_shared<RandomHorizontalFlipOp>(probability_);
return tensor_op;
}
// RandomVerticalFlipOperation
RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability) : probability_(probability) {}
bool RandomVerticalFlipOperation::ValidateParams() { return true; }
std::shared_ptr<TensorOp> RandomVerticalFlipOperation::Build() {
std::shared_ptr<RandomVerticalFlipOp> tensor_op = std::make_shared<RandomVerticalFlipOp>(probability_);
return tensor_op;
}
// Function to create RandomRotationOperation.
RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode,
bool expand, std::vector<float> center,
std::vector<uint8_t> fill_value)
: degrees_(degrees),
interpolation_mode_(interpolation_mode),
expand_(expand),
center_(center),
fill_value_(fill_value) {}
bool RandomRotationOperation::ValidateParams() {
if (degrees_.empty() || degrees_.size() != 2) {
MS_LOG(ERROR) << "RandomRotation: degrees vector has incorrect size: degrees.size()";
return false;
}
if (center_.empty() || center_.size() != 2) {
MS_LOG(ERROR) << "RandomRotation: center vector has incorrect size: center.size()";
return false;
}
if (fill_value_.empty() || fill_value_.size() != 3) {
MS_LOG(ERROR) << "RandomRotation: fill_value vector has incorrect size: fill_value.size()";
return false;
}
return true;
}
std::shared_ptr<TensorOp> RandomRotationOperation::Build() {
std::shared_ptr<RandomRotationOp> tensor_op =
std::make_shared<RandomRotationOp>(degrees_[0], degrees_[1], center_[0], center_[1], interpolation_mode_, expand_,
fill_value_[0], fill_value_[1], fill_value_[2]);
return tensor_op;
}
// PadOperation
PadOperation::PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value, BorderType padding_mode)
: padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {}
bool PadOperation::ValidateParams() {
if (padding_.empty() || padding_.size() == 3 || padding_.size() > 4) {
MS_LOG(ERROR) << "Pad: padding vector has incorrect size: padding.size()";
return false;
}
if (fill_value_.empty() || (fill_value_.size() != 1 && fill_value_.size() != 3)) {
MS_LOG(ERROR) << "Pad: fill_value vector has incorrect size: fill_value.size()";
return false;
}
return true;
}
std::shared_ptr<TensorOp> PadOperation::Build() {
int32_t pad_top, pad_bottom, pad_left, pad_right;
switch (padding_.size()) {
case 1:
pad_left = padding_[0];
pad_top = padding_[0];
pad_right = padding_[0];
pad_bottom = padding_[0];
break;
case 2:
pad_left = padding_[0];
pad_top = padding_[1];
pad_right = padding_[0];
pad_bottom = padding_[1];
break;
default:
pad_left = padding_[0];
pad_top = padding_[1];
pad_right = padding_[2];
pad_bottom = padding_[3];
}
uint8_t fill_r, fill_g, fill_b;
fill_r = fill_value_[0];
fill_g = fill_value_[0];
fill_b = fill_value_[0];
if (fill_value_.size() == 3) {
fill_r = fill_value_[0];
fill_g = fill_value_[1];
fill_b = fill_value_[2];
}
std::shared_ptr<PadOp> tensor_op =
std::make_shared<PadOp>(pad_top, pad_bottom, pad_left, pad_right, padding_mode_, fill_r, fill_g, fill_b);
return tensor_op;
}
// CutOutOperation
CutOutOperation::CutOutOperation(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {}
bool CutOutOperation::ValidateParams() {
if (length_ < 0) {
MS_LOG(ERROR) << "CutOut: length cannot be negative";
return false;
}
if (num_patches_ < 0) {
MS_LOG(ERROR) << "CutOut: number of patches cannot be negative";
return false;
}
return true;
}
std::shared_ptr<TensorOp> CutOutOperation::Build() {
std::shared_ptr<CutOutOp> tensor_op = std::make_shared<CutOutOp>(length_, length_, num_patches_, false, 0, 0, 0);
return tensor_op;
}
// RandomColorAdjustOperation.
RandomColorAdjustOperation::RandomColorAdjustOperation(std::vector<float> brightness, std::vector<float> contrast,
std::vector<float> saturation, std::vector<float> hue)
: brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {}
bool RandomColorAdjustOperation::ValidateParams() {
// Do some input validation.
if (brightness_.empty() || brightness_.size() > 2) {
MS_LOG(ERROR) << "RandomColorAdjust: brightness must be a vector of one or two values";
return false;
}
if (contrast_.empty() || contrast_.size() > 2) {
MS_LOG(ERROR) << "RandomColorAdjust: contrast must be a vector of one or two values";
return false;
}
if (saturation_.empty() || saturation_.size() > 2) {
MS_LOG(ERROR) << "RandomColorAdjust: saturation must be a vector of one or two values";
return false;
}
if (hue_.empty() || hue_.size() > 2) {
MS_LOG(ERROR) << "RandomColorAdjust: hue must be a vector of one or two values";
return false;
}
return true;
}
std::shared_ptr<TensorOp> RandomColorAdjustOperation::Build() {
float brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub;
brightness_lb = brightness_[0];
brightness_ub = brightness_[0];
if (brightness_.size() == 2) brightness_ub = brightness_[1];
contrast_lb = contrast_[0];
contrast_ub = contrast_[0];
if (contrast_.size() == 2) contrast_ub = contrast_[1];
saturation_lb = saturation_[0];
saturation_ub = saturation_[0];
if (saturation_.size() == 2) saturation_ub = saturation_[1];
hue_lb = hue_[0];
hue_ub = hue_[0];
if (hue_.size() == 2) hue_ub = hue_[1];
std::shared_ptr<RandomColorAdjustOp> tensor_op = std::make_shared<RandomColorAdjustOp>(
brightness_lb, brightness_ub, contrast_lb, contrast_ub, saturation_lb, saturation_ub, hue_lb, hue_ub);
return tensor_op;
}
} // namespace vision
} // namespace api
} // namespace dataset
} // namespace mindspore
ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(core OBJECT
${EXAMPLE_SRCS}
${FEATURE_SRCS}
set(DATASET_CORE_SRC_FILES
client.cc
config_manager.cc
cv_tensor.cc
......@@ -13,6 +9,13 @@ add_library(core OBJECT
tensor.cc
tensor_row.cc
tensor_shape.cc
)
)
ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS})
add_dependencies(core mindspore::protobuf)
target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
if (ENABLE_PYTHON)
target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
endif()
......@@ -25,21 +25,25 @@
#include "dataset/core/tensor_shape.h"
#include "dataset/engine/data_schema.h"
#include "dataset/engine/dataset_iterator.h"
#include "dataset/engine/datasetops/source/mindrecord_op.h"
#include "dataset/engine/datasetops/source/tf_reader_op.h"
#ifdef ENABLE_PYTHON
#include "dataset/engine/datasetops/barrier_op.h"
#include "dataset/engine/datasetops/batch_op.h"
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#include "dataset/engine/datasetops/build_vocab_op.h"
#endif
#include "dataset/engine/datasetops/batch_op.h"
#include "dataset/engine/datasetops/dataset_op.h"
#include "dataset/engine/datasetops/device_queue_op.h"
#include "dataset/engine/datasetops/map_op.h"
#include "dataset/engine/datasetops/project_op.h"
#include "dataset/engine/datasetops/rename_op.h"
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/repeat_op.h"
#include "dataset/engine/datasetops/skip_op.h"
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#include "dataset/engine/datasetops/source/mindrecord_op.h"
#include "dataset/engine/datasetops/source/tf_reader_op.h"
#include "dataset/engine/datasetops/take_op.h"
#include "dataset/engine/datasetops/zip_op.h"
#include "dataset/engine/datasetops/concat_op.h"
......
......@@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf };
// Possible flavours of Tensor implementations
enum class TensorImpl { kNone, kFlexible, kCv, kNP };
// Possible values for Border types
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
// Possible interpolation modes
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
// convenience functions for 32bit int bitmask
inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
......
......@@ -14,11 +14,12 @@
* limitations under the License.
*/
#include "dataset/core/data_type.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
#endif
#include "utils/log_adapter.h"
#include "dataset/core/pybind_support.h"
namespace mindspore {
namespace dataset {
......@@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const {
return 0;
}
#ifdef ENABLE_PYTHON
py::dtype DataType::AsNumpyType() const {
if (type_ < DataType::NUM_OF_TYPES)
return py::dtype(kTypeInfo[type_].pybindType_);
else
return py::dtype("unknown");
}
#endif
uint8_t DataType::AsCVType() const {
uint8_t res = kCVInvalidType;
......@@ -112,6 +115,7 @@ std::string DataType::ToString() const {
return "unknown";
}
#ifdef ENABLE_PYTHON
DataType DataType::FromNpArray(const py::array &arr) {
if (py::isinstance<py::array_t<bool>>(arr)) {
return DataType(DataType::DE_BOOL);
......@@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const {
}
return res;
}
#endif
} // namespace dataset
} // namespace mindspore
......@@ -19,14 +19,16 @@
#include <opencv2/core/hal/interface.h>
#include <string>
#ifdef ENABLE_PYTHON
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "dataset/core/constants.h"
#include "dataset/core/pybind_support.h"
namespace py = pybind11;
#else
#include "Eigen/Core"
using float16 = Eigen::half;
#endif
#include "dataset/core/constants.h"
namespace mindspore {
namespace dataset {
......@@ -59,6 +61,7 @@ class DataType {
const uint8_t cvType_; // OpenCv matching type
};
#ifdef ENABLE_PYTHON
static inline const TypeInfo kTypeInfo[] = {
// name, sizeInBytes, pybindTypem formatDescriptor, openCV
{"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN
......@@ -76,19 +79,38 @@ class DataType {
{"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F}, // DE_FLOAT64
{"string", 0, "bytes", "S", kCVInvalidType} // DE_STRING
};
#else
static inline const TypeInfo kTypeInfo[] = {
// name, sizeInBytes, pybindTypem formatDescriptor, openCV
{"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN
{"bool", 1, "bool", "", CV_8U}, // DE_BOOL
{"int8", 1, "int8", "", CV_8S}, // DE_INT8
{"uint8", 1, "uint8", "", CV_8U}, // DE_UINT8
{"int16", 2, "int16", "", CV_16S}, // DE_INT16
{"uint16", 2, "uint16", "", CV_16U}, // DE_UINT16
{"int32", 4, "int32", "", CV_32S}, // DE_INT32
{"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32
{"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64
{"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64
{"float16", 2, "float16", "", CV_16F}, // DE_FLOAT16
{"float32", 4, "float32", "", CV_32F}, // DE_FLOAT32
{"float64", 8, "double", "", CV_64F}, // DE_FLOAT64
{"string", 0, "bytes", "", kCVInvalidType} // DE_STRING
};
#endif
// No arg constructor to create an unknown shape
DataType() : type_(DE_UNKNOWN) {}
// Create a type from a given string
// @param type_str
/// \param type_str
explicit DataType(const std::string &type_str);
// Default destructor
~DataType() = default;
// Create a type from a given enum
// @param d
/// \param d
constexpr explicit DataType(Type d) : type_(d) {}
constexpr bool operator==(const DataType a) const { return type_ == a.type_; }
......@@ -100,49 +122,49 @@ class DataType {
constexpr bool operator!=(const Type a) const { return type_ != a; }
// Disable this usage `if(d)` where d is of type DataType
// @return
/// \return
operator bool() = delete;
// To be used in Switch/case
// @return
/// \return
operator Type() const { return type_; }
// The number of bytes needed to store one value of this type
// @return
/// \return
uint8_t SizeInBytes() const;
// Convert from DataType to OpenCV type
// @return
/// \return
uint8_t AsCVType() const;
// Convert from OpenCV type to DataType
// @param cv_type
// @return
/// \param cv_type
/// \return
static DataType FromCVType(int cv_type);
// Returns a string representation of the type
// @return
/// \return
std::string ToString() const;
// returns true if the template type is the same as the Tensor type_
// @tparam T
// @return true or false
/// \tparam T
/// \return true or false
template <typename T>
bool IsCompatible() const {
return type_ == FromCType<T>();
}
// returns true if the template type is the same as the Tensor type_
// @tparam T
// @return true or false
/// \tparam T
/// \return true or false
template <typename T>
bool IsLooselyCompatible() const;
// << Stream output operator overload
// @notes This allows you to print the info using stream operators
// @param out - reference to the output stream being overloaded
// @param rO - reference to the DataType to display
// @return - the output stream must be returned
/// \notes This allows you to print the info using stream operators
/// \param out - reference to the output stream being overloaded
/// \param rO - reference to the DataType to display
/// \return - the output stream must be returned
friend std::ostream &operator<<(std::ostream &out, const DataType &so) {
out << so.ToString();
return out;
......@@ -151,22 +173,24 @@ class DataType {
template <typename T>
static DataType FromCType();
#ifdef ENABLE_PYTHON
// Convert from DataType to Pybind type
// @return
/// \return
py::dtype AsNumpyType() const;
// Convert from NP type to DataType
// @param type
// @return
/// \param type
/// \return
static DataType FromNpType(const py::dtype &type);
// Convert from NP array to DataType
// @param py array
// @return
/// \param py array
/// \return
static DataType FromNpArray(const py::array &arr);
#endif
// Get the buffer string format of the current type. Used in pybind buffer protocol.
// @return
/// \return
std::string GetPybindFormat() const;
bool IsSignedInt() const {
......
......@@ -28,10 +28,12 @@
#include "dataset/core/constants.h"
#include "dataset/core/cv_tensor.h"
#include "dataset/core/global_context.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
namespace py = pybind11;
#endif
#include "dataset/core/tensor_shape.h"
namespace py = pybind11;
namespace mindspore {
namespace dataset {
// Helper macros for printing tensor elements
......@@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape
MS_ASSERT(num_bytes == 0);
if (shape.known()) Tensor::Reshape(shape);
}
Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
: Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) {
// total bytes needed = offset array + strings
......@@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape
MS_ASSERT(num_bytes == 0);
if (shape.known()) Tensor::Reshape(shape);
}
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
DataType type, const unsigned char *data) {
if (!shape.known()) {
......@@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
return Status::OK(); // returns base-class shared_ptr
}
#ifdef ENABLE_PYTHON
Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
std::vector<dsize_t> shape;
for (dsize_t i = 0; i < arr.ndim(); i++) {
......@@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
return Status::OK(); // returns base-class shared_ptr
}
#endif
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
const TensorShape &shape) {
......@@ -698,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() {
return strides;
}
Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) {
CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
#ifdef ENABLE_PYTHON
Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
RETURN_UNEXPECTED_IF_NULL(t);
CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
std::string format_desc = t.type().GetPybindFormat();
std::string format_desc = t->type().GetPybindFormat();
if (format_desc.empty()) {
RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
}
*out = py::buffer_info(t.GetMutableBuffer(), /* Pointer to buffer */
t.type().SizeInBytes(), /* Size of one scalar */
format_desc, /* Python struct-style format descriptor */
t.Rank(), /* Number of dimensions */
t.shape().AsVector(), /* Buffer dimensions */
t.Strides());
*out = py::buffer_info(t->GetMutableBuffer(), /* Pointer to buffer */
t->type().SizeInBytes(), /* Size of one scalar */
format_desc, /* Python struct-style format descriptor */
t->Rank(), /* Number of dimensions */
t->shape().AsVector(), /* Buffer dimensions */
t->Strides());
return Status::OK();
}
#endif
template <typename T>
Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
......@@ -752,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
o->swap(sv);
return Status::OK();
}
#ifdef ENABLE_PYTHON
// return data as numpy, should return status
Status Tensor::GetDataAsNumpy(py::array *data) {
RETURN_UNEXPECTED_IF_NULL(data_);
......@@ -815,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
return Status::OK();
}
#endif
void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
......
......@@ -26,20 +26,27 @@
#undef HAVE_STDDEF_H
#undef HAVE_STDLIB_H
#endif
#ifdef ENABLE_PYTHON
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#endif
#include "dataset/core/constants.h"
#include "dataset/core/data_type.h"
#include "dataset/core/tensor_shape.h"
#include "dataset/util/allocator.h"
#include "dataset/util/status.h"
#include "proto/example.pb.h"
#ifdef ENABLE_PYTHON
namespace py = pybind11;
#endif
namespace mindspore {
namespace dataset {
class Tensor;
template <typename T>
class Allocator;
using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors
......@@ -114,16 +121,17 @@ class Tensor {
static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
const unsigned char *data = nullptr);
/// Create a copy of the input tensor
/// \param out [out] output tensor to be generated
/// \param in [in] orginal tensor to be copied
/// \return Status
// Create a copy of the input tensor
// @param out [out] output tensor to be generated
// @param in [in] orginal tensor to be copied
// @return Status
static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
return Status::OK();
}
#ifdef ENABLE_PYTHON
// A static factory method to create a Tensor from a given py::array.
// @param ptr output argument to hold the created Tensor
// @param arr py::array
......@@ -132,6 +140,7 @@ class Tensor {
// Helper function to create a tensor from Numpy of strings
static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
#endif
// A static factory method to create a Tensor from a given list of strings.
// @param ptr output argument to hold the created Tensor
......@@ -170,6 +179,7 @@ class Tensor {
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
}
// Create tensor from protobuf bytelist with uint8 or int8 types
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
const TensorShape &shape, const DataType &type, dsize_t pad_size);
......@@ -346,12 +356,12 @@ class Tensor {
virtual void Squeeze();
/// Calculates the strides of the Tensor
/// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
/// The strides will be {6,2,1}.
/// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
/// The strides will be {24,8,4}.
/// @return vector of integers
// Calculates the strides of the Tensor
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
// The strides will be {6,2,1}.
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
// The strides will be {24,8,4}.
// @return vector of integers
std::vector<dsize_t> Strides();
std::string ToString() {
......@@ -376,6 +386,7 @@ class Tensor {
// Slice string tensors
Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
#ifdef ENABLE_PYTHON
// Constructs numpy array from input tensor
// @param data this data is the location of python data
// @return Status code
......@@ -383,7 +394,8 @@ class Tensor {
Status GetDataAsNumpyStrings(py::array *data);
static Status GetBufferInfo(Tensor &t, py::buffer_info *out);
static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
#endif
// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
......@@ -570,7 +582,7 @@ class Tensor {
// Return a TensorIterator that points to the start of the Tensor.
// It's the user responsibility to use the correct type that matches the Tensor type
// @tparam T The type of values in the Tensor
// @param T The type of values in the Tensor
// @return TensorIterator
template <typename T>
TensorIterator<T> begin() {
......
......@@ -18,7 +18,6 @@
#include "dataset/core/tensor_row.h"
namespace py = pybind11;
namespace mindspore {
namespace dataset {
......
......@@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape)
known_ = shape.known_; // override with the input shape in case of unknown-rank tensor shape.
}
#ifdef ENABLE_PYTHON
TensorShape::TensorShape(py::list l)
: raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
std::vector<dsize_t> list_c;
......@@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l)
}
AddListToShape(list_c);
}
#endif
TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type)
: raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
......@@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const {
return TensorShape(vec);
}
#ifdef ENABLE_PYTHON
py::list TensorShape::AsPyList() {
py::list list;
for (auto i : raw_shape_) {
......@@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() {
}
return list;
}
#endif
TensorShape TensorShape::Squeeze() const {
std::vector<dsize_t> new_shape;
......
......@@ -24,13 +24,16 @@
#include <opencv2/core/mat.hpp>
#ifdef ENABLE_PYTHON
#include "pybind11/pybind11.h"
namespace py = pybind11;
#endif
#include "dataset/core/constants.h"
#include "dataset/util/status.h"
#include "dataset/core/global_context.h"
#include "dataset/util/allocator.h"
namespace py = pybind11;
namespace mindspore {
namespace dataset {
// Class that represents a shape of a Tensor. A shape can be:
......@@ -43,7 +46,8 @@ namespace dataset {
// -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
// Example: <3,?> (the 1st dim is unknown)\n
// <2,?,?,?> (all dims but the 0th dim are unknown)
// TensorShape supports any dim > 0 and < 2^31-1
/// \brief TensorShape supports any dim > 0 and < 2^31-1
class TensorShape {
public:
static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension
......@@ -51,57 +55,59 @@ class TensorShape {
// Force the compiler to not create a no-arg constructor
TensorShape() = delete;
// Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
// @param list
/// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
/// \param[in] list
explicit TensorShape(const std::initializer_list<dsize_t> &list);
// Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
// @param list
/// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
/// \param[in] list
explicit TensorShape(const std::vector<dsize_t> &list);
// Copy constructor
// @param shape
/// \brief Copy constructor
/// \param[in] shape
TensorShape(const TensorShape &shape);
// construct a TensorShape via a python list
// @param py::list l - a list object from python
#ifdef ENABLE_PYTHON
/// \brief construct a TensorShape via a python list
/// \param[in] py::list l - a list object from python
explicit TensorShape(py::list l);
#endif
~TensorShape() = default;
// Create a scalar Shape (i.e., empty shape with mKnown = true)
// @return TensorShape
/// \brief Create a scalar Shape (i.e., empty shape with mKnown = true)
/// \return TensorShape
static TensorShape CreateScalar() { return TensorShape({}); }
// Create a shape with an unknown rank.
// @return TensorShape
/// \brief Create a shape with an unknown rank.
/// \return TensorShape
static TensorShape CreateUnknownRankShape();
// Create a shape with a known rank .
// @return TensorShape
/// \brief Create a shape with a known rank .
/// \return TensorShape
static TensorShape CreateUnknownShapeWithRank(dsize_t rank);
// Insert a new dim into a copy of the current shape.
// @param dim to be added
// @param axis the index where dim should be added
// @return New modified shape
/// \brief Insert a new dim into a copy of the current shape.
/// \param[in] dim to be added
/// \param[in] axis the index where dim should be added
/// \return New modified shape
TensorShape InsertDim(dsize_t axis, dsize_t dim) const;
// Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
// @param dim
// @return
/// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
/// \param[in] dim
/// \return
TensorShape PrependDim(dsize_t dim) const;
// Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
// @param dim
// @return
/// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
/// \param[in] dim
/// \return
TensorShape AppendDim(dsize_t dim) const;
// Create a shape based on OpenCV shape and type
// @param cv_size
// @param type int that represent the type in OpenCV, example CV_8U, CV_64S
/// \brief Create a shape based on OpenCV shape and type
/// \param[in] cv_size
/// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S
TensorShape(cv::MatSize cv_size, uint32_t type);
dsize_t Size() const { return raw_shape_.size(); }
......@@ -123,47 +129,50 @@ class TensorShape {
return raw_shape_[index];
}
// Return the Shape as a vector
// @return
/// \brief Return the Shape as a vector
/// \return
std::vector<dsize_t> AsVector() const;
// Returns the class info as a string
// @return
/// \brief Returns the class info as a string
/// \return
std::string ToString() const {
std::stringstream ss;
ss << *this;
return ss.str();
}
// Actual print function used by operator<<
// @param out output string stream
/// \brief Actual print function used by operator<<
/// \param out output string stream
void Print(std::ostream &out) const;
// << Stream output operator overload
// @notes This allows you to print the info using stream operators
// @param out - reference to the output stream being overloaded
// @param rO - reference to the TensorShape to display
// @return - the output stream must be returned
/// \brief << Stream output operator overload
/// This allows you to print the info using stream operators
/// \param[in] out - reference to the output stream being overloaded
/// \param[in] rO - reference to the TensorShape to display
/// \return - the output stream must be returned
friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) {
so.Print(out);
return out;
}
#ifdef ENABLE_PYTHON
py::list AsPyList();
#endif
// Checks if the given index is a valid index for this tensor.
// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
// @param index
// @return bool
/// \brief Checks if the given index is a valid index for this tensor.
/// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
/// \param[in] index
/// \return bool
bool IsValidIndex(const std::vector<dsize_t> &index) const;
TensorShape Squeeze() const;
std::vector<dsize_t> Strides() const;
// Returns the location of the item assuming row major memory layout.
// @param index
// @return
/// \brief Returns the location of the item assuming row major memory layout.
/// \param[in] index
/// \param[out] flat_index
/// \return
Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
private:
......@@ -174,11 +183,11 @@ class TensorShape {
// Vector to keep the strides of the shape. The size is rank+1
std::vector<dsize_t, IntAlloc> strides_;
// Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape.
// @tparam T list
// @param list Iterable list
// @return true if the shape is valid and no overflow would be generated when counting the number of elements.
// False otherwise.
/// \brief Internal utility function to iterate over a list,
/// check if the dim is valid and then insert it into the shape.
/// \param[in] list Iterable list
/// \return true if the shape is valid and no overflow would be generated when counting the number of elements.
/// False otherwise.
template <typename T>
void AddListToShape(const T &list);
};
......
......@@ -2,13 +2,12 @@ add_subdirectory(source)
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(engine-datasetops OBJECT
set(DATASET_ENGINE_DATASETOPS_SRC_FILES
dataset_op.cc
parallel_op.cc
pipeline_op.cc
barrier_op.cc
batch_op.cc
bucket_batch_by_length_op.cc
device_queue_op.cc
map_op.cc
project_op.cc
......@@ -18,8 +17,18 @@ add_library(engine-datasetops OBJECT
take_op.cc
shuffle_op.cc
zip_op.cc
concat_op.cc
filter_op.cc
build_vocab_op.cc
concat_op.cc
)
if (ENABLE_PYTHON)
set(DATASET_ENGINE_DATASETOPS_SRC_FILES
${DATASET_ENGINE_DATASETOPS_SRC_FILES}
bucket_batch_by_length_op.cc
barrier_op.cc
filter_op.cc
build_vocab_op.cc
)
endif()
add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES})
......@@ -19,7 +19,9 @@
#include <iomanip>
#include "common/utils.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
#endif
#include "dataset/engine/data_buffer.h"
#include "dataset/engine/db_connector.h"
#include "dataset/engine/opt/pass.h"
......@@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
#ifdef ENABLE_PYTHON
*ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_,
builder_batch_map_func_, builder_pad_map_);
#else
*ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
builder_num_workers_, builder_cols_to_map_, builder_pad_map_);
#endif
return Status::OK();
}
......@@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() {
return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err));
}
#ifdef ENABLE_PYTHON
BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func,
PadInfo pad_map)
......@@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size,
pad_info_(pad_map) {
worker_queues_.Init(num_workers, op_queue_size);
}
#else
BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
const std::vector<std::string> &cols_to_map, PadInfo pad_map)
: ParallelOp(num_workers, op_queue_size),
start_batch_size_(batch_size),
drop_(drop),
pad_(pad),
pyfunc_column_names_(cols_to_map),
pad_info_(pad_map) {
worker_queues_.Init(num_workers, op_queue_size);
}
#endif
Status BatchOp::operator()() {
Status rc = LaunchThreadsAndInitOp();
......@@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
std::unique_ptr<DataBuffer> *db) {
RETURN_UNEXPECTED_IF_NULL(table_pair.first);
if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc
#ifdef ENABLE_PYTHON
if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc
#endif
if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_)); // do padding if needed
(*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>();
......@@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) {
return Status::OK();
}
#ifdef ENABLE_PYTHON
Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
TensorBatchTable input_table;
input_table.reserve(pyfunc_column_names_.size());
......@@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
}
return Status::OK();
}
#endif
Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
#ifdef ENABLE_PYTHON
if (batch_size_func_ != nullptr) {
RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
} else {
(*batch_size) = start_batch_size_;
}
#else
(*batch_size) = start_batch_size_;
#endif
return Status::OK();
}
#ifdef ENABLE_PYTHON
Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
{
// Acquire Python GIL
......@@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
}
return Status(StatusCode::kOK);
}
#endif
Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info,
const std::unordered_map<std::string, int32_t> &column_name_id_map) {
......
......@@ -89,6 +89,7 @@ class BatchOp : public ParallelOp {
return *this;
}
#ifdef ENABLE_PYTHON
// set columns to perform map on
// @param const std::vector<std::string> & cols_to_map - name of columns to perform map on
// @return Builder & reference to builder class object
......@@ -104,6 +105,7 @@ class BatchOp : public ParallelOp {
builder_batch_size_func_ = batch_size_func;
return *this;
}
#endif
// @param std::shared_ptr<BatchOp> *ptr pointer to shared_ptr, actual return arg
// @return Status - The error code return
......@@ -121,8 +123,10 @@ class BatchOp : public ParallelOp {
int32_t builder_op_connector_size_;
std::vector<std::string> builder_cols_to_map_;
PadInfo builder_pad_map_;
#ifdef ENABLE_PYTHON
py::function builder_batch_size_func_;
py::function builder_batch_map_func_;
#endif
};
enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 };
......@@ -144,6 +148,7 @@ class BatchOp : public ParallelOp {
const int64_t get_epoch_num() const { return epoch_num_; }
};
#ifdef ENABLE_PYTHON
// BatchOp constructor
// @param int32_t batch_size
// @param bool drop
......@@ -152,6 +157,10 @@ class BatchOp : public ParallelOp {
// @param int32_t num_workers
BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map);
#else
BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
const std::vector<std::string> &, PadInfo pad_map);
#endif
// BatchOp destructor
~BatchOp() {}
......@@ -219,10 +228,13 @@ class BatchOp : public ParallelOp {
// @return Status - The error code return
Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
std::unique_ptr<DataBuffer> *db);
#ifdef ENABLE_PYTHON
// Function that calls pyfunc to perform map on batch
// @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor
// @return Status - The error code return
Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair);
#endif
// @param const PadInfo &pad_info pad info to unpack
// @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping
......@@ -247,6 +259,7 @@ class BatchOp : public ParallelOp {
// @return Status - The error code return
Status LaunchThreadsAndInitOp();
#ifdef ENABLE_PYTHON
// Invoke batch size function with current BatchInfo to generate batch size.
// @return Status - The error code return
Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info);
......@@ -254,6 +267,7 @@ class BatchOp : public ParallelOp {
// Invoke batch map function with current BatchInfo to generate tensors to batch.
// @return Status - The error code return
Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info);
#endif
int32_t start_batch_size_;
bool drop_; // bool for whether to drop remainder or not
......@@ -262,8 +276,10 @@ class BatchOp : public ParallelOp {
PadInfo pad_info_; // column names to perform padding on
std::unique_ptr<ChildIterator> child_iterator_; // child iterator for fetching TensorRows 1 by 1
QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_; // internal queue for syncing worker
#ifdef ENABLE_PYTHON
py::function batch_size_func_; // Function pointer of batch size function
py::function batch_map_func_; // Function pointer of per batch map function
#endif
};
} // namespace dataset
} // namespace mindspore
......
add_subdirectory(sampler)
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(engine-datasetops-source OBJECT
generator_op.cc
set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
io_block.cc
mindrecord_op.cc
tf_reader_op.cc
image_folder_op.cc
mnist_op.cc
voc_op.cc
coco_op.cc
manifest_op.cc
cifar_op.cc
random_data_op.cc
celeba_op.cc
text_file_op.cc
clue_op.cc
)
\ No newline at end of file
)
set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
mindrecord_op.cc
tf_reader_op.cc
)
if (ENABLE_PYTHON)
set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
generator_op.cc
voc_op.cc
manifest_op.cc
)
endif()
add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES})
\ No newline at end of file
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(engine-datasetops-source-sampler OBJECT
set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
distributed_sampler.cc
pk_sampler.cc
python_sampler.cc
random_sampler.cc
sampler.cc
sequential_sampler.cc
subset_random_sampler.cc
weighted_random_sampler.cc
)
if (ENABLE_PYTHON)
set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}
python_sampler.cc
)
endif()
add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES})
......@@ -89,6 +89,7 @@ void Sampler::Print(std::ostream &out, bool show_all) const {
}
}
#ifdef ENABLE_PYTHON
Status Sampler::GetAllIdsThenReset(py::array *data) {
std::unique_ptr<DataBuffer> db;
std::shared_ptr<Tensor> sample_ids;
......@@ -120,6 +121,7 @@ Status Sampler::GetAllIdsThenReset(py::array *data) {
RETURN_IF_NOT_OK(ResetSampler());
return Status::OK();
}
#endif
Status Sampler::SetNumSamples(int64_t num_samples) {
CHECK_FAIL_RETURN_UNEXPECTED(num_samples >= 0, "num_samples is negative");
......
......@@ -74,8 +74,11 @@ class Sampler {
// @return - The error code return
virtual Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) = 0;
// This function only called by python layer. Not needed by Android.
#ifdef ENABLE_PYTHON
// return all ids in one epoch as a numpy array, then call reset
Status GetAllIdsThenReset(py::array *data);
#endif
// for next epoch of sampleIds
// @return - The error code return
......@@ -155,5 +158,4 @@ class Sampler {
};
} // namespace dataset
} // namespace mindspore
#endif // DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SAMPLER_H_
......@@ -387,6 +387,7 @@ Status Graph::GetMetaInfo(MetaInfo *meta_info) {
return Status::OK();
}
#ifdef ENABLE_PYTHON
Status Graph::GraphInfo(py::dict *out) {
MetaInfo meta_info;
RETURN_IF_NOT_OK(GetMetaInfo(&meta_info));
......@@ -398,6 +399,7 @@ Status Graph::GraphInfo(py::dict *out) {
(*out)["edge_feature_type"] = py::cast(meta_info.edge_feature_type);
return Status::OK();
}
#endif
Status Graph::LoadNodeAndEdge() {
GraphLoader gl(dataset_file_, num_workers_);
......
......@@ -140,8 +140,10 @@ class Graph {
// @return Status - The error code return
Status GetMetaInfo(MetaInfo *meta_info);
#ifdef ENABLE_PYTHON
// Return meta information to python layer
Status GraphInfo(py::dict *out);
#endif
Status Init();
......
......@@ -21,13 +21,15 @@
#include "dataset/engine/datasetops/map_op.h"
#include "dataset/engine/datasetops/project_op.h"
#include "dataset/engine/datasetops/rename_op.h"
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/repeat_op.h"
#include "dataset/engine/datasetops/skip_op.h"
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#include "dataset/engine/datasetops/source/mindrecord_op.h"
#include "dataset/engine/datasetops/source/tf_reader_op.h"
#ifdef ENABLE_PYTHON
#include "dataset/engine/datasetops/filter_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#endif
#include "dataset/engine/datasetops/source/image_folder_op.h"
#include "dataset/engine/datasetops/take_op.h"
#include "dataset/engine/datasetops/zip_op.h"
......@@ -111,35 +113,37 @@ Status NodePass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
}
Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
// Fallback to base class visitor by default
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
}
Status NodePass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
// Fallback to base class visitor by default
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
}
Status NodePass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
// Fallback to base class visitor by default
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
}
Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
// Fallback to base class visitor by default
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
}
Status NodePass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
#ifdef ENABLE_PYTHON
Status NodePass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
// Fallback to base class visitor by default
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
}
Status NodePass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) {
Status NodePass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
// Fallback to base class visitor by default
return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
}
#endif
Status NodePass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) {
// Fallback to base class visitor by default
......
......@@ -33,18 +33,20 @@ class ProjectOp;
class RenameOp;
class FilterOp;
class SkipOp;
class ShuffleOp;
class GeneratorOp;
class MindRecordOp;
class TFReaderOp;
#ifdef ENABLE_PYTHON
class FilterOp;
class GeneratorOp;
#endif
class TakeOp;
class ZipOp;
......@@ -122,18 +124,20 @@ class NodePass : public Pass {
virtual Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified);
#ifdef ENABLE_PYTHON
virtual Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified);
#endif
virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified);
virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified);
......
......@@ -50,12 +50,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) {
return Status::OK();
}
Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
*modified = false;
std::cout << "Visiting FilterOp" << '\n';
return Status::OK();
}
Status PrinterPass::RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) {
*modified = false;
std::cout << "Visiting SkipOp" << '\n';
......@@ -67,11 +61,6 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) {
return Status::OK();
}
Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
*modified = false;
std::cout << "Visiting GeneratorOp" << '\n';
return Status::OK();
}
Status PrinterPass::RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) {
*modified = false;
std::cout << "Visiting MindRecordOp" << '\n';
......@@ -84,6 +73,20 @@ Status PrinterPass::RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified)
return Status::OK();
}
#ifdef ENABLE_PYTHON
Status PrinterPass::RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) {
*modified = false;
std::cout << "Visiting FilterOp" << '\n';
return Status::OK();
}
Status PrinterPass::RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) {
*modified = false;
std::cout << "Visiting GeneratorOp" << '\n';
return Status::OK();
}
#endif
Status PrinterPass::RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) {
*modified = false;
std::cout << "Visiting TakeOp" << '\n';
......
......@@ -35,18 +35,20 @@ class PrinterPass : public NodePass {
Status RunOnNode(std::shared_ptr<RenameOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<SkipOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<ShuffleOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<MindRecordOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<TFReaderOp> node, bool *modified) override;
#ifdef ENABLE_PYTHON
Status RunOnNode(std::shared_ptr<FilterOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<GeneratorOp> node, bool *modified) override;
#endif
Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified) override;
Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified) override;
......
../../../core/constants.h
\ No newline at end of file
../../../core/data_type.h
\ No newline at end of file
../../../core/tensor_shape.h
\ No newline at end of file
../../../util/status.h
\ No newline at end of file
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_INCLUDE_DATASETS_H_
#define DATASET_INCLUDE_DATASETS_H_
#include <vector>
#include <memory>
#include <set>
#include <map>
#include <utility>
#include <string>
#include "dataset/include/tensor.h"
#include "dataset/include/iterator.h"
#include "dataset/include/samplers.h"
namespace mindspore {
namespace dataset {
// Forward declare
class DatasetOp;
class DataSchema;
class Tensor;
class TensorShape;
namespace api {
class TensorOperation;
class SamplerObj;
class ImageFolderDataset;
class MnistDataset;
class BatchDataset;
class RepeatDataset;
class MapDataset;
class ShuffleDataset;
class Cifar10Dataset;
class ProjectDataset;
/// \brief Function to create an ImageFolderDataset
/// \notes A source dataset that reads images from a tree of directories
/// All images within one folder have the same label
/// The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] decode A flag to decode in ImageFolder
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \param[in] extensions File extensions to be read
/// \param[in] class_indexing a class name to label map
/// \return Shared pointer to the current ImageFolderDataset
std::shared_ptr<ImageFolderDataset> ImageFolder(std::string dataset_dir, bool decode = false,
std::shared_ptr<SamplerObj> sampler = nullptr,
std::set<std::string> extensions = {},
std::map<std::string, int32_t> class_indexing = {});
/// \brief Function to create a MnistDataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`,
/// A `RandomSampler` will be used to randomly iterate the entire dataset
/// \return Shared pointer to the current MnistDataset
std::shared_ptr<MnistDataset> Mnist(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler = nullptr);
/// \brief Function to create a Cifar10 Dataset
/// \notes The generated dataset has two columns ['image', 'label']
/// \param[in] dataset_dir Path to the root directory that contains the dataset
/// \param[in] num_samples The number of images to be included in the dataset
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is `nullptr`, A `RandomSampler`
/// will be used to randomly iterate the entire dataset
/// \return Shared pointer to the current Dataset
std::shared_ptr<Cifar10Dataset> Cifar10(const std::string &dataset_dir, int32_t num_samples,
std::shared_ptr<SamplerObj> sampler);
/// \class Dataset datasets.h
/// \brief A base class to represent a dataset in the data pipeline.
class Dataset : public std::enable_shared_from_this<Dataset> {
public:
friend class Iterator;
/// \brief Constructor
Dataset();
/// \brief Destructor
~Dataset() = default;
/// \brief Pure virtual function to convert a Dataset class into a runtime dataset object
/// \return shared pointer to the list of newly created DatasetOps
virtual std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() = 0;
/// \brief Pure virtual function for derived class to implement parameters validation
/// \return bool True if all the params are valid
virtual bool ValidateParams() = 0;
/// \brief Setter function for runtime number of workers
/// \param[in] num_workers The number of threads in this operator
/// \return Shared pointer to the original object
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers) {
num_workers_ = num_workers;
return shared_from_this();
}
/// \brief Function to create an Iterator over the Dataset pipeline
/// \return Shared pointer to the Iterator
std::shared_ptr<Iterator> CreateIterator();
/// \brief Function to create a BatchDataset
/// \notes Combines batch_size number of consecutive rows into batches
/// \param[in] batch_size Path to the root directory that contains the dataset
/// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
/// batch. If true, and if there are less than batch_size rows
/// available to make the last batch, then those rows will
/// be dropped and not propagated to the next node
/// \return Shared pointer to the current BatchDataset
std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
/// \brief Function to create a RepeatDataset
/// \notes Repeats this dataset count times. Repeat indefinitely if count is -1
/// \param[in] count Number of times the dataset should be repeated
/// \return Shared pointer to the current Dataset
/// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset`
/// due to a limitation in the current implementation
std::shared_ptr<Dataset> Repeat(int32_t count = -1);
/// \brief Function to create a MapDataset
/// \notes Applies each operation in operations to this dataset
/// \param[in] operations Vector of operations to be applied on the dataset. Operations are
/// applied in the order they appear in this list
/// \param[in] input_columns Vector of the names of the columns that will be passed to the first
/// operation as input. The size of this list must match the number of
/// input columns expected by the first operator. The default input_columns
/// is the first column
/// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
/// This parameter is mandatory if len(input_columns) != len(output_columns)
/// The size of this list must match the number of output columns of the
/// last operation. The default output_columns will have the same
/// name as the input columns, i.e., the columns will be replaced
/// \param[in] project_columns A list of column names to project
/// \return Shared pointer to the current MapDataset
std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
std::vector<std::string> input_columns = {},
std::vector<std::string> output_columns = {},
const std::vector<std::string> &project_columns = {});
/// \brief Function to create a Shuffle Dataset
/// \notes Randomly shuffles the rows of this dataset
/// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
/// \return Shared pointer to the current ShuffleDataset
std::shared_ptr<ShuffleDataset> Shuffle(int32_t shuffle_size);
/// \brief Function to create a Project Dataset
/// \notes Applies project to the dataset
/// \param[in] columns The name of columns to project
/// \return Shared pointer to the current Dataset
std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns);
protected:
std::vector<std::shared_ptr<Dataset>> children;
std::shared_ptr<Dataset> parent;
int32_t num_workers_;
int32_t rows_per_buffer_;
int32_t connector_que_size_;
};
/* ####################################### Derived Dataset classes ################################# */
/// \class ImageFolderDataset
/// \brief A Dataset derived class to represent ImageFolder dataset
class ImageFolderDataset : public Dataset {
public:
/// \brief Constructor
ImageFolderDataset(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing);
/// \brief Destructor
~ImageFolderDataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
std::string dataset_dir_;
bool decode_;
bool recursive_;
std::shared_ptr<SamplerObj> sampler_;
std::map<std::string, int32_t> class_indexing_;
std::set<std::string> exts_;
};
class MnistDataset : public Dataset {
public:
/// \brief Constructor
MnistDataset(std::string dataset_dir, std::shared_ptr<SamplerObj> sampler);
/// \brief Destructor
~MnistDataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
std::string dataset_dir_;
std::shared_ptr<SamplerObj> sampler_;
};
class BatchDataset : public Dataset {
public:
/// \brief Constructor
BatchDataset(int32_t batch_size, bool drop_remainder, bool pad, std::vector<std::string> cols_to_map,
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map);
/// \brief Destructor
~BatchDataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
int32_t batch_size_;
bool drop_remainder_;
bool pad_;
std::vector<std::string> cols_to_map_;
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_;
};
class RepeatDataset : public Dataset {
public:
/// \brief Constructor
explicit RepeatDataset(uint32_t count);
/// \brief Destructor
~RepeatDataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
uint32_t repeat_count_;
};
class ShuffleDataset : public Dataset {
public:
ShuffleDataset(int32_t shuffle_size, bool reset_every_epoch);
~ShuffleDataset() = default;
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
bool ValidateParams() override;
private:
int32_t shuffle_size_;
uint32_t shuffle_seed_;
bool reset_every_epoch_;
};
class MapDataset : public Dataset {
public:
/// \brief Constructor
MapDataset(std::vector<std::shared_ptr<TensorOperation>> operations, std::vector<std::string> input_columns = {},
std::vector<std::string> output_columns = {}, const std::vector<std::string> &columns = {});
/// \brief Destructor
~MapDataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
std::vector<std::shared_ptr<TensorOperation>> operations_;
std::vector<std::string> input_columns_;
std::vector<std::string> output_columns_;
std::vector<std::string> project_columns_;
};
class Cifar10Dataset : public Dataset {
public:
/// \brief Constructor
Cifar10Dataset(const std::string &dataset_dir, int32_t num_samples, std::shared_ptr<SamplerObj> sampler);
/// \brief Destructor
~Cifar10Dataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
std::string dataset_dir_;
int32_t num_samples_;
std::shared_ptr<SamplerObj> sampler_;
};
class ProjectDataset : public Dataset {
public:
/// \brief Constructor
explicit ProjectDataset(const std::vector<std::string> &columns);
/// \brief Destructor
~ProjectDataset() = default;
/// \brief a base class override function to create the required runtime dataset op objects for this class
/// \return shared pointer to the list of newly created DatasetOps
std::shared_ptr<std::vector<std::shared_ptr<DatasetOp>>> Build() override;
/// \brief Parameters validation
/// \return bool true if all the params are valid
bool ValidateParams() override;
private:
std::vector<std::string> columns_;
};
} // namespace api
} // namespace dataset
} // namespace mindspore
#endif // DATASET_INCLUDE_DATASETS_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_INCLUDE_ITERATOR_H_
#define DATASET_INCLUDE_ITERATOR_H_
#include <unordered_map>
#include <memory>
#include <vector>
#include <string>
#include "dataset/include/status.h"
namespace mindspore {
namespace dataset {
// Forward declare
class ExecutionTree;
class DatasetIterator;
class DatasetOp;
class Tensor;
namespace api {
class Dataset;
using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>;
// Abstract class for iterating over the dataset.
class Iterator {
public:
/// \brief Constructor
Iterator() = default;
/// \brief Destructor
~Iterator() = default;
/// \brief Method for building and launching the pipeline.
/// \param[in] ops - a vector of DatasetOp in the data pipeline.
/// \return - a Status error code, returns OK if no error encountered.
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
/// \brief Function to get the next row from the data pipeline.
/// \param[out] row - the output tensor row.
void GetNextRow(TensorMap *row);
/// \brief Function to shut down the data pipeline.
void Stop();
class _Iterator {
public:
explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} {
if (lt_) {
cur_row_ = new TensorMap();
lt_->GetNextRow(cur_row_);
}
}
// Destructor
~_Iterator() {
if (cur_row_) {
delete cur_row_;
}
}
_Iterator &operator++() {
if (lt_) {
++ind_;
lt_->GetNextRow(cur_row_);
}
if (cur_row_ && cur_row_->size() == 0) {
delete cur_row_;
cur_row_ = nullptr;
}
return *this;
} // prefix ++ overload
TensorMap &operator*() { return *cur_row_; } // dereference operator
TensorMap *operator->() { return cur_row_; }
bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
private:
int ind_; // the cur node our Iterator points to
Iterator *lt_;
TensorMap *cur_row_;
};
_Iterator begin() { return _Iterator(this); }
_Iterator end() { return _Iterator(nullptr); }
private:
// Runtime tree.
// Use shared_ptr instead of unique_ptr because the DatasetIterator constructor takes in a shared_ptr type.
std::shared_ptr<ExecutionTree> tree_;
// Runtime iterator
std::unique_ptr<DatasetIterator> iterator_;
};
} // namespace api
} // namespace dataset
} // namespace mindspore
#endif // DATASET_INCLUDE_ITERATOR_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_API_SAMPLERS_H_
#define DATASET_API_SAMPLERS_H_
#include <vector>
#include <memory>
namespace mindspore {
namespace dataset {
// Internal Sampler class forward declaration
class Sampler;
namespace api {
class SamplerObj : public std::enable_shared_from_this<SamplerObj> {
public:
SamplerObj();
~SamplerObj() = default;
virtual std::shared_ptr<Sampler> Build() = 0;
virtual bool ValidateParams() = 0;
};
class DistributedSamplerObj;
class PKSamplerObj;
class RandomSamplerObj;
class SequentialSamplerObj;
class SubsetRandomSamplerObj;
class WeightedRandomSamplerObj;
/// Function to create a Distributed Sampler.
/// \notes A Sampler that access a shard of the dataset.
/// \param[in] num_shards - Number of shards to divide the dataset into.
/// \param[in] shard_id - Shard ID of the current shard within num_shards.
/// \param[in] shuffle - If true, the indices are shuffled.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \param[in] seed - The seed in use when shuffle is true.
/// \return Shared pointer to the current Sampler.
std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true,
int64_t num_samples = 0, uint32_t seed = 1);
/// Function to create a PK Sampler.
/// \notes Samples K elements for each P class in the dataset.
/// This will sample all classes.
/// \param[in] num_val - Number of elements to sample for each class.
/// \param[in] shuffle - If true, the class IDs are shuffled.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
/// Function to create a Random Sampler.
/// \notes Samples the elements randomly.
/// \param[in] replacement - If True, put the sample ID back for the next draw.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0);
/// Function to create a Sequential Sampler.
/// \notes Samples the dataset elements sequentially, same as not having a sampler.
/// \param[in] start_index - Index to start sampling at (dafault to start at first id).
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
/// Function to create a Subset Random Sampler.
/// \notes Samples the elements randomly from a sequence of indices.
/// \param[in] indices - A vector sequence of indices.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \return Shared pointer to the current Sampler.
std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices,
int64_t num_samples = 0);
/// Function to create a Weighted Random Sampler.
/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given
/// weights (probabilities).
/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
/// \param[in] num_samples - The number of samples to draw (default to all elements).
/// \param[in] replacement - If True, put the sample ID back for the next draw.
/// \return Shared pointer to the current Sampler.
std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights,
int64_t num_samples = 0, bool replacement = true);
/* ####################################### Derived Sampler classes ################################# */
class DistributedSamplerObj : public SamplerObj {
public:
DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed);
~DistributedSamplerObj() = default;
std::shared_ptr<Sampler> Build() override;
bool ValidateParams() override;
private:
int64_t num_shards_;
int64_t shard_id_;
bool shuffle_;
int64_t num_samples_;
uint32_t seed_;
};
class PKSamplerObj : public SamplerObj {
public:
PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples);
~PKSamplerObj() = default;
std::shared_ptr<Sampler> Build() override;
bool ValidateParams() override;
private:
int64_t num_val_;
bool shuffle_;
int64_t num_samples_;
};
class RandomSamplerObj : public SamplerObj {
public:
RandomSamplerObj(bool replacement, int64_t num_samples);
~RandomSamplerObj() = default;
std::shared_ptr<Sampler> Build() override;
bool ValidateParams() override;
private:
bool replacement_;
int64_t num_samples_;
};
class SequentialSamplerObj : public SamplerObj {
public:
SequentialSamplerObj(int64_t start_index, int64_t num_samples);
~SequentialSamplerObj() = default;
std::shared_ptr<Sampler> Build() override;
bool ValidateParams() override;
private:
int64_t start_index_;
int64_t num_samples_;
};
class SubsetRandomSamplerObj : public SamplerObj {
public:
SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples);
~SubsetRandomSamplerObj() = default;
std::shared_ptr<Sampler> Build() override;
bool ValidateParams() override;
private:
const std::vector<int64_t> &indices_;
int64_t num_samples_;
};
class WeightedRandomSamplerObj : public SamplerObj {
public:
explicit WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples = 0,
bool replacement = true);
~WeightedRandomSamplerObj() = default;
std::shared_ptr<Sampler> Build() override;
bool ValidateParams() override;
private:
const std::vector<double> &weights_;
int64_t num_samples_;
bool replacement_;
};
} // namespace api
} // namespace dataset
} // namespace mindspore
#endif // DATASET_API_SAMPLERS_H_
../util/status.h
\ No newline at end of file
../core/tensor.h
\ No newline at end of file
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_API_TRANSFORMS_H_
#define DATASET_API_TRANSFORMS_H_
#include <vector>
#include <memory>
#include "dataset/core/constants.h"
namespace mindspore {
namespace dataset {
class TensorOp;
namespace api {
// Abstract class to represent a dataset in the data pipeline.
class TensorOperation : public std::enable_shared_from_this<TensorOperation> {
public:
/// \brief Constructor
TensorOperation();
/// \brief Destructor
~TensorOperation() = default;
/// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object.
/// \return shared pointer to the newly created TensorOp.
virtual std::shared_ptr<TensorOp> Build() = 0;
virtual bool ValidateParams() = 0;
};
// Transform operations for performing computer vision.
namespace vision {
class NormalizeOperation;
class DecodeOperation;
class ResizeOperation;
class RandomCropOperation;
class CenterCropOperation;
class UniformAugOperation;
class RandomHorizontalFlipOperation;
class RandomVerticalFlipOperation;
class RandomRotationOperation;
class PadOperation;
class CutOutOperation;
class RandomColorAdjustOperation;
/// \brief Function to create a Normalize TensorOperation.
/// \notes Normalize the input image with respect to mean and standard deviation.
/// \param[in] mean - a vector of mean values for each channel, w.r.t channel order.
/// \param[in] std - a vector of standard deviations for each channel, w.r.t. channel order.
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std);
/// \brief Function to create a Decode TensorOperation.
/// \notes Decode the input image in RGB mode.
/// \param[in] rgb - a boolean of whether to decode in RGB mode or not.
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<DecodeOperation> Decode(bool rgb = true);
/// \brief Function to create a Resize TensorOperation.
/// \notes Resize the input image to the given size..
/// \param[in] size - a vector representing the output size of the resized image.
/// If size is a single value, the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
/// \param[in] interpolation An enum for the mode of interpolation
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size,
InterpolationMode interpolation = InterpolationMode::kLinear);
/// \brief Function to create a RandomCrop TensorOperation.
/// \notes Crop the input image at a random location.
/// \param[in] size - a vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] padding - a vector with the value of pixels to pad the image. If 4 values are provided,
/// it pads the left, top, right and bottom respectively.
/// \param[in] pad_if_needed - a boolean whether to pad the image if either side is smaller than
/// the given output size.
/// \param[in] fill_value - a vector representing the pixel intensity of the borders, it is used to
/// fill R, G, B channels respectively.
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<RandomCropOperation> RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
bool pad_if_needed = false,
std::vector<uint8_t> fill_value = {0, 0, 0});
/// \brief Function to create a CenterCrop TensorOperation.
/// \notes Crops the input image at the center to the given size.
/// \param[in] size - a vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size);
/// \brief Function to create a UniformAugment TensorOperation.
/// \notes Tensor operation to perform randomly selected augmentation.
/// \param[in] operations - a vector of TensorOperation operations.
/// \param[in] num_ops - integer representing the number of OPs to be selected and applied.
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<UniformAugOperation> UniformAugment(std::vector<std::shared_ptr<TensorOperation>> operations,
int32_t num_ops = 2);
/// \brief Function to create a RandomHorizontalFlip TensorOperation.
/// \notes Tensor operation to perform random horizontal flip.
/// \param[in] prob - float representing the probability of flip.
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<RandomHorizontalFlipOperation> RandomHorizontalFlip(float prob = 0.5);
/// \brief Function to create a RandomVerticalFlip TensorOperation.
/// \notes Tensor operation to perform random vertical flip.
/// \param[in] prob - float representing the probability of flip.
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<RandomVerticalFlipOperation> RandomVerticalFlip(float prob = 0.5);
/// \brief Function to create a RandomRotation TensorOp
/// \notes Rotates the image according to parameters
/// \param[in] degrees A float vector size 2, representing the starting and ending degree
/// \param[in] resample An enum for the mode of interpolation
/// \param[in] expand A boolean representing whether the image is expanded after rotation
/// \param[in] center A float vector size 2, representing the x and y center of rotation.
/// \param[in] fill_value A uint8_t vector size 3, representing the rgb value of the fill color
/// \return Shared pointer to the current TensorOp
std::shared_ptr<RandomRotationOperation> RandomRotation(
std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
std::vector<float> center = {-1, -1}, std::vector<uint8_t> fill_value = {0, 0, 0});
/// \brief Function to create a Pad TensorOp
/// \notes Pads the image according to padding parameters
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value
/// If vector has two values, it pads left and right with the first and
/// top and bottom with the second value
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 3 values are provided,
/// it is used to fill R, G, B channels respectively
/// \param[in] padding_mode The method of padding (default=BorderType.kConstant)
/// Can be any of
/// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
/// - BorderType.kConstant, means it fills the border with constant values
/// - BorderType.kEdge, means it pads with the last value on the edge
/// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
/// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
/// \return Shared pointer to the current TensorOp
std::shared_ptr<PadOperation> Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
BorderType padding_mode = BorderType::kConstant);
/// \brief Function to create a CutOut TensorOp
/// \notes Randomly cut (mask) out a given number of square patches from the input image
/// \param[in] length Integer representing the side length of each square patch
/// \param[in] num_patches Integer representing the number of patches to be cut out of an image
/// \return Shared pointer to the current TensorOp
std::shared_ptr<CutOutOperation> CutOut(int32_t length, int32_t num_patches = 1);
/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image
/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1}
/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
/// Default value is {0, 0}
/// \return Shared pointer to the current TensorOp
std::shared_ptr<RandomColorAdjustOperation> RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0},
std::vector<float> contrast = {1.0, 1.0},
std::vector<float> saturation = {1.0, 1.0},
std::vector<float> hue = {0.0, 0.0});
/* ####################################### Derived TensorOperation classes ################################# */
class NormalizeOperation : public TensorOperation {
public:
NormalizeOperation(std::vector<float> mean, std::vector<float> std);
~NormalizeOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<float> mean_;
std::vector<float> std_;
};
class DecodeOperation : public TensorOperation {
public:
explicit DecodeOperation(bool rgb = true);
~DecodeOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
bool rgb_;
};
class ResizeOperation : public TensorOperation {
public:
explicit ResizeOperation(std::vector<int32_t> size,
InterpolationMode interpolation_mode = InterpolationMode::kLinear);
~ResizeOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<int32_t> size_;
InterpolationMode interpolation_;
};
class RandomCropOperation : public TensorOperation {
public:
RandomCropOperation(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0});
~RandomCropOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<int32_t> size_;
std::vector<int32_t> padding_;
bool pad_if_needed_;
std::vector<uint8_t> fill_value_;
};
class CenterCropOperation : public TensorOperation {
public:
explicit CenterCropOperation(std::vector<int32_t> size);
~CenterCropOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<int32_t> size_;
};
class UniformAugOperation : public TensorOperation {
public:
explicit UniformAugOperation(std::vector<std::shared_ptr<TensorOperation>> operations, int32_t num_ops = 2);
~UniformAugOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<std::shared_ptr<TensorOperation>> operations_;
int32_t num_ops_;
};
class RandomHorizontalFlipOperation : public TensorOperation {
public:
explicit RandomHorizontalFlipOperation(float probability = 0.5);
~RandomHorizontalFlipOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
float probability_;
};
class RandomVerticalFlipOperation : public TensorOperation {
public:
explicit RandomVerticalFlipOperation(float probability = 0.5);
~RandomVerticalFlipOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
float probability_;
};
class RandomRotationOperation : public TensorOperation {
public:
RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand,
std::vector<float> center, std::vector<uint8_t> fill_value);
~RandomRotationOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<float> degrees_;
InterpolationMode interpolation_mode_;
std::vector<float> center_;
bool expand_;
std::vector<uint8_t> fill_value_;
};
class PadOperation : public TensorOperation {
public:
PadOperation(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
BorderType padding_mode = BorderType::kConstant);
~PadOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<int32_t> padding_;
std::vector<uint8_t> fill_value_;
BorderType padding_mode_;
};
class CutOutOperation : public TensorOperation {
public:
explicit CutOutOperation(int32_t length, int32_t num_patches = 1);
~CutOutOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
int32_t length_;
int32_t num_patches_;
};
class RandomColorAdjustOperation : public TensorOperation {
public:
RandomColorAdjustOperation(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
~RandomColorAdjustOperation() = default;
std::shared_ptr<TensorOp> Build() override;
bool ValidateParams() override;
private:
std::vector<float> brightness_;
std::vector<float> contrast_;
std::vector<float> saturation_;
std::vector<float> hue_;
};
} // namespace vision
} // namespace api
} // namespace dataset
} // namespace mindspore
#endif // DATASET_API_TRANSFORMS_H_
../../../utils/log_adapter.h
\ No newline at end of file
../../../utils/overload.h
\ No newline at end of file
......@@ -2,7 +2,13 @@ add_subdirectory(image)
add_subdirectory(data)
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(kernels OBJECT
py_func_op.cc
tensor_op.cc)
target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
if (ENABLE_PYTHON)
add_library(kernels OBJECT
py_func_op.cc
tensor_op.cc)
target_include_directories(kernels PRIVATE ${pybind11_INCLUDE_DIRS})
else()
add_library(kernels OBJECT
tensor_op.cc)
endif()
......@@ -23,7 +23,9 @@
#include "dataset/core/constants.h"
#include "dataset/core/data_type.h"
#ifdef ENABLE_PYTHON
#include "dataset/core/pybind_support.h"
#endif
#include "dataset/core/tensor.h"
#include "dataset/core/tensor_shape.h"
#include "dataset/kernels/data/type_cast_op.h"
......
......@@ -729,7 +729,6 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
int num_channels = input_cv->shape()[2];
if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2);
*output = std::static_pointer_cast<Tensor>(output_cv);
return Status::OK();
} catch (const cv::Exception &e) {
RETURN_STATUS_UNEXPECTED("Unexpected error in pad");
......
......@@ -35,10 +35,6 @@
namespace mindspore {
namespace dataset {
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
void JpegErrorExitCustom(j_common_ptr cinfo);
struct JpegErrorManagerCustom {
......
......@@ -16,6 +16,7 @@
#include "dataset/kernels/image/pad_op.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/core/constants.h"
#include "dataset/util/status.h"
namespace mindspore {
......
......@@ -21,7 +21,7 @@
#include "dataset/core/tensor.h"
#include "dataset/kernels/tensor_op.h"
#include "dataset/kernels/image/image_utils.h"
#include "dataset/core/constants.h"
#include "dataset/util/status.h"
namespace mindspore {
......
......@@ -18,7 +18,6 @@
#include "dataset/kernels/image/image_utils.h"
#include "dataset/util/status.h"
#include "dataset/core/cv_tensor.h"
#include "dataset/core/pybind_support.h"
namespace mindspore {
namespace dataset {
......
......@@ -16,8 +16,6 @@
#ifndef DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
#define DATASET_KERNELS_IMAGE_RANDOM_HORIZONTAL_FLIP_BBOX_OP_H_
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <memory>
#include <random>
#include <cstdlib>
......@@ -26,8 +24,6 @@
#include "dataset/kernels/tensor_op.h"
#include "dataset/util/random.h"
#include "dataset/util/status.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl_bind.h"
namespace mindspore {
namespace dataset {
......
......@@ -27,7 +27,6 @@
namespace mindspore {
namespace dataset {
namespace py = pybind11;
class NgramOp : public TensorOp {
public:
......
......@@ -32,7 +32,15 @@ if(ENABLE_MINDDATA)
endif()
# fetch ut test files
if(ENABLE_MINDDATA)
file(GLOB_RECURSE UT_SRCS ./*.cc)
file(GLOB_RECURSE UT_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./*.cc)
if(NOT ENABLE_PYTHON)
set(PYTHON_RELATED_SRCS
dataset/filter_op_test.cc
dataset/voc_op_test.cc
dataset/manifest_op_test.cc
)
list(REMOVE_ITEM UT_SRCS ${PYTHON_RELATED_SRCS})
endif()
else()
file(GLOB_RECURSE TEMP_UT_SRCS ./*.cc)
foreach(OBJ ${TEMP_UT_SRCS})
......
......@@ -90,6 +90,7 @@ SET(DE_UT_SRCS
concatenate_op_test.cc
cyclic_array_test.cc
perf_data_test.cc
c_api_test.cc
)
add_executable(de_ut_tests ${DE_UT_SRCS})
......
此差异已折叠。
......@@ -23,8 +23,6 @@
using namespace mindspore::dataset;
namespace py = pybind11;
class MindDataTestDatatype : public UT::Common {
public:
MindDataTestDatatype() = default;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册