提交 dee8471d 编写于 作者: Y Yang

!1993 [Dataset] Fix codedex.

上级 fc746062
......@@ -35,13 +35,13 @@ class TensorRow {
static constexpr row_id_type kDefaultRowId = -1; // Default row id
// Type definitions
typedef dsize_t size_type;
typedef std::shared_ptr<Tensor> value_type;
typedef std::shared_ptr<Tensor> &reference;
typedef const std::shared_ptr<Tensor> &const_reference;
typedef std::vector<std::shared_ptr<Tensor>> vector_type;
typedef std::vector<std::shared_ptr<Tensor>>::iterator iterator;
typedef std::vector<std::shared_ptr<Tensor>>::const_iterator const_iterator;
using size_type = dsize_t;
using value_type = std::shared_ptr<Tensor>;
using reference = std::shared_ptr<Tensor> &;
using const_reference = const std::shared_ptr<Tensor> &;
using vector_type = std::vector<std::shared_ptr<Tensor>>;
using iterator = std::vector<std::shared_ptr<Tensor>>::iterator;
using const_iterator = std::vector<std::shared_ptr<Tensor>>::const_iterator;
TensorRow() noexcept;
......
......@@ -84,7 +84,12 @@ Status IteratorBase::FetchNextTensorRow(TensorRow *out_row) {
// Constructor of the DatasetIterator
DatasetIterator::DatasetIterator(std::shared_ptr<ExecutionTree> exe_tree)
: IteratorBase(), root_(exe_tree->root()), tracing_(nullptr), cur_batch_num_(0), cur_connector_size_(0) {
: IteratorBase(),
root_(exe_tree->root()),
tracing_(nullptr),
cur_batch_num_(0),
cur_connector_size_(0),
cur_connector_capacity_(0) {
std::shared_ptr<Tracing> node;
Status s = exe_tree->GetProfilingManager()->GetTracingNode(kDatasetIteratorTracingName, &node);
if (s.IsOk()) {
......
......@@ -237,6 +237,5 @@ Status BucketBatchByLengthOp::Reset() {
return Status::OK();
}
} // namespace dataset
} // namespace mindspore
......@@ -146,7 +146,6 @@ class BucketBatchByLengthOp : public PipelineOp {
std::unique_ptr<ChildIterator> child_iterator_;
std::vector<std::unique_ptr<TensorQTable>> buckets_;
};
} // namespace dataset
} // namespace mindspore
......
......@@ -112,6 +112,8 @@ class BuildVocabOp : public ParallelOp {
BuildVocabOp(std::shared_ptr<Vocab> vocab, std::vector<std::string> col_names, std::pair<int64_t, int64_t> freq_range,
int64_t top_k, int32_t num_workers, int32_t op_connector_size);
~BuildVocabOp() = default;
Status WorkerEntry(int32_t worker_id) override;
// collect the work product from each worker
......
......@@ -30,7 +30,6 @@
namespace mindspore {
namespace dataset {
ClueOp::Builder::Builder()
: builder_device_id_(0), builder_num_devices_(1), builder_num_samples_(0), builder_shuffle_files_(false) {
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
......@@ -545,6 +544,5 @@ Status ClueOp::CountAllFileRows(const std::vector<std::string> &files, int64_t *
}
return Status::OK();
}
} // namespace dataset
} // namespace mindspore
......@@ -264,7 +264,6 @@ class ClueOp : public ParallelOp {
bool load_jagged_connector_;
ColKeyMap cols_to_keyword_;
};
} // namespace dataset
} // namespace mindspore
#endif // DATASET_ENGINE_DATASETOPS_SOURCE_CLUE_OP_H_
......@@ -59,8 +59,8 @@ CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
if (builder_sampler_ == nullptr) {
int64_t num_samples = 0;
int64_t start_index = 0;
const int64_t num_samples = 0;
const int64_t start_index = 0;
builder_sampler_ = std::make_shared<SequentialSampler>(start_index, num_samples);
}
builder_schema_ = std::make_unique<DataSchema>();
......
......@@ -44,6 +44,8 @@ class ConnectorSize : public Sampling {
public:
explicit ConnectorSize(ExecutionTree *tree) : tree_(tree) {}
~ConnectorSize() = default;
// Driver function for connector size sampling.
// This function samples the connector size of every nodes within the ExecutionTree
Status Sample() override;
......@@ -54,7 +56,7 @@ class ConnectorSize : public Sampling {
// @return Status - The error code return
Status SaveToFile() override;
Status Init(const std::string &dir_path, const std::string &device_id);
Status Init(const std::string &dir_path, const std::string &device_id) override;
// Parse op infomation and transform to json format
json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size);
......
......@@ -28,7 +28,7 @@ class DatasetIteratorTracing : public Tracing {
DatasetIteratorTracing() = default;
// Destructor
~DatasetIteratorTracing() = default;
~DatasetIteratorTracing() override = default;
// Record tracing data
// @return Status - The error code return
......@@ -40,7 +40,7 @@ class DatasetIteratorTracing : public Tracing {
// @return Status - The error code return
Status SaveToFile() override;
Status Init(const std::string &dir_path, const std::string &device_id);
Status Init(const std::string &dir_path, const std::string &device_id) override;
private:
std::vector<std::string> value_;
......
......@@ -29,7 +29,7 @@ class DeviceQueueTracing : public Tracing {
DeviceQueueTracing() = default;
// Destructor
~DeviceQueueTracing() = default;
~DeviceQueueTracing() override = default;
// Record tracing data
// @return Status - The error code return
......@@ -41,7 +41,7 @@ class DeviceQueueTracing : public Tracing {
// @return Status - The error code return
Status SaveToFile() override;
Status Init(const std::string &dir_path, const std::string &device_id);
Status Init(const std::string &dir_path, const std::string &device_id) override;
private:
std::vector<std::string> value_;
......
......@@ -25,6 +25,7 @@ namespace dataset {
Monitor::Monitor(ExecutionTree *tree) : tree_(tree) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
sampling_interval_ = cfg->monitor_sampling_interval();
max_samples_ = 0;
}
Status Monitor::operator()() {
......
......@@ -33,6 +33,8 @@ class Monitor {
Monitor() = default;
~Monitor() = default;
// Functor for Perf Monitor main loop.
// This function will be the entry point of Mindspore::Dataset::Task
Status operator()();
......
......@@ -99,7 +99,7 @@ class ProfilingManager {
// If profiling is enabled.
bool IsProfilingEnable() const;
std::unordered_map<std::string, std::shared_ptr<Sampling>> &GetSamplingNodes() { return sampling_nodes_; }
const std::unordered_map<std::string, std::shared_ptr<Sampling>> &GetSamplingNodes() { return sampling_nodes_; }
private:
std::unordered_map<std::string, std::shared_ptr<Tracing>> tracing_nodes_;
......
......@@ -119,7 +119,8 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i
data_item.tensorShape_ = dataShapes;
data_item.tensorType_ = datatype;
data_item.dataLen_ = ts->SizeInBytes();
data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](void *elem) {});
data_item.dataPtr_ =
std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](const void *elem) {});
items.emplace_back(data_item);
MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is "
<< ts->Size() << ".";
......
......@@ -21,7 +21,6 @@
namespace mindspore {
namespace dataset {
Status FillOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
Status s = Fill(input, output, fill_value_);
......
......@@ -26,7 +26,6 @@
namespace mindspore {
namespace dataset {
class FillOp : public TensorOp {
public:
explicit FillOp(std::shared_ptr<Tensor> value) : fill_value_(value) {}
......@@ -39,9 +38,7 @@ class FillOp : public TensorOp {
private:
std::shared_ptr<Tensor> fill_value_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_FILL_OP_H
......@@ -351,7 +351,7 @@ vector<uint8_t> ShardColumn::CompressInt(const vector<uint8_t> &src_bytes, const
// Write this int to destination blob
uint64_t u_n = *reinterpret_cast<uint64_t *>(&i_n);
auto temp_bytes = UIntToBytesLittle(u_n, dst_int_type);
for (uint64_t j = 0; j < (kUnsignedOne << dst_int_type); j++) {
for (uint64_t j = 0; j < (kUnsignedOne << static_cast<uint8_t>(dst_int_type)); j++) {
dst_bytes[i_dst++] = temp_bytes[j];
}
......@@ -406,7 +406,10 @@ MSRStatus ShardColumn::UncompressInt(const uint64_t &column_id, std::unique_ptr<
auto data = reinterpret_cast<const unsigned char *>(array_data.get());
*data_ptr = std::make_unique<unsigned char[]>(*num_bytes);
memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes);
int ret_code = memcpy_s(data_ptr->get(), *num_bytes, data, *num_bytes);
if (ret_code != 0) {
MS_LOG(ERROR) << "Failed to copy data!";
}
return SUCCESS;
}
......@@ -444,7 +447,8 @@ int64_t ShardColumn::BytesLittleToMinIntType(const std::vector<uint8_t> &bytes_a
const IntegerType &src_i_type, IntegerType *dst_i_type) {
uint64_t u_temp = 0;
for (uint64_t i = 0; i < (kUnsignedOne << static_cast<uint8_t>(src_i_type)); i++) {
u_temp = (u_temp << kBitsOfByte) + bytes_array[pos + (kUnsignedOne << src_i_type) - kUnsignedOne - i];
u_temp = (u_temp << kBitsOfByte) +
bytes_array[pos + (kUnsignedOne << static_cast<uint8_t>(src_i_type)) - kUnsignedOne - i];
}
int64_t i_out;
......
......@@ -554,26 +554,28 @@ def adjust_hue(img, hue_factor):
Returns:
img (PIL Image), Hue adjusted image.
"""
if not -0.5 <= hue_factor <= 0.5:
raise ValueError('hue_factor {} is not in [-0.5, 0.5].'.format(hue_factor))
image = img
image_hue_factor = hue_factor
if not -0.5 <= image_hue_factor <= 0.5:
raise ValueError('image_hue_factor {} is not in [-0.5, 0.5].'.format(image_hue_factor))
if not is_pil(img):
raise TypeError(augment_error_message.format(type(img)))
if not is_pil(image):
raise TypeError(augment_error_message.format(type(image)))
input_mode = img.mode
if input_mode in {'L', '1', 'I', 'F'}:
return img
mode = image.mode
if mode in {'L', '1', 'I', 'F'}:
return image
h, s, v = img.convert('HSV').split()
hue, saturation, value = img.convert('HSV').split()
np_h = np.array(h, dtype=np.uint8)
np_hue = np.array(hue, dtype=np.uint8)
with np.errstate(over='ignore'):
np_h += np.uint8(hue_factor * 255)
h = Image.fromarray(np_h, 'L')
np_hue += np.uint8(image_hue_factor * 255)
hue = Image.fromarray(np_hue, 'L')
img = Image.merge('HSV', (h, s, v)).convert(input_mode)
return img
image = Image.merge('HSV', (hue, saturation, value)).convert(mode)
return image
def to_type(img, output_type):
......
# Copyright 2020 Huawei Technologies Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# import jsbeautifier
import os
import urllib
import urllib.request
def create_data_cache_dir():
cwd = os.getcwd()
target_directory = os.path.join(cwd, "data_cache")
try:
if not os.path.exists(target_directory):
os.mkdir(target_directory)
except OSError:
print("Creation of the directory %s failed" % target_directory)
return target_directory
def download_and_uncompress(files, source_url, target_directory, is_tar=False):
for f in files:
url = source_url + f
target_file = os.path.join(target_directory, f)
##check if file already downloaded
if not (os.path.exists(target_file) or os.path.exists(target_file[:-3])):
urllib.request.urlretrieve(url, target_file)
if is_tar:
print("extracting from local tar file " + target_file)
rc = os.system("tar -C " + target_directory + " -xvf " + target_file)
else:
print("unzipping " + target_file)
rc = os.system("gunzip -f " + target_file)
if rc != 0:
print("Failed to uncompress ", target_file, " removing")
os.system("rm " + target_file)
##exit with error so that build script will fail
raise SystemError
else:
print("Using cached dataset at ", target_file)
def download_mnist(target_directory=None):
if target_directory is None:
target_directory = create_data_cache_dir()
##create mnst directory
target_directory = os.path.join(target_directory, "mnist")
try:
if not os.path.exists(target_directory):
os.mkdir(target_directory)
except OSError:
print("Creation of the directory %s failed" % target_directory)
MNIST_URL = "http://yann.lecun.com/exdb/mnist/"
files = ['train-images-idx3-ubyte.gz',
'train-labels-idx1-ubyte.gz',
't10k-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz']
download_and_uncompress(files, MNIST_URL, target_directory, is_tar=False)
return target_directory, os.path.join(target_directory, "datasetSchema.json")
CIFAR_URL = "https://www.cs.toronto.edu/~kriz/"
def download_cifar(target_directory, files, directory_from_tar):
if target_directory is None:
target_directory = create_data_cache_dir()
download_and_uncompress([files], CIFAR_URL, target_directory, is_tar=True)
##if target dir was specify move data from directory created by tar
##and put data into target dir
if target_directory is not None:
tar_dir_full_path = os.path.join(target_directory, directory_from_tar)
all_files = os.path.join(tar_dir_full_path, "*")
cmd = "mv " + all_files + " " + target_directory
if os.path.exists(tar_dir_full_path):
print("copy files back to target_directory")
print("Executing: ", cmd)
rc1 = os.system(cmd)
rc2 = os.system("rm -r " + tar_dir_full_path)
if rc1 != 0 or rc2 != 0:
print("error when running command: ", cmd)
download_file = os.path.join(target_directory, files)
print("removing " + download_file)
os.system("rm " + download_file)
##exit with error so that build script will fail
raise SystemError
##change target directory to directory after tar
return os.path.join(target_directory, directory_from_tar)
def download_cifar10(target_directory=None):
return download_cifar(target_directory, "cifar-10-binary.tar.gz", "cifar-10-batches-bin")
def download_cifar100(target_directory=None):
return download_cifar(target_directory, "cifar-100-binary.tar.gz", "cifar-100-binary")
def download_all_for_test(cwd):
download_mnist(os.path.join(cwd, "testMnistData"))
##Download all datasets to existing test directories
if __name__ == "__main__":
download_all_for_test(os.getcwd())
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册