提交 55249032 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!3572 [MD] fix save pydoc and log

Merge pull request !3572 from liyong126/r0.6_fix_save_pydoc_log
......@@ -410,6 +410,7 @@ Status DEPipeline::SaveDataset(const std::vector<std::string> &file_names, const
std::vector<std::string> index_fields;
s = FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields);
RETURN_IF_NOT_OK(s);
MS_LOG(DEBUG) << "Schema of saved mindrecord: " << mr_json.dump();
if (mindrecord::SUCCESS !=
mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
......@@ -569,6 +570,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
if (column_name_id_map.empty()) {
RETURN_STATUS_UNEXPECTED("Error: column not found.");
}
json dataset_schema;
for (auto &col : column_name_id_map) {
auto idx = col.second;
auto column_name = col.first;
......@@ -580,6 +582,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
auto shapes = column_shape.AsVector();
std::vector<int> mr_shape(shapes.begin(), shapes.end());
std::string el = column_type.ToString();
dataset_schema[column_name] = el;
if (mindrecord::kTypesMap.find(el) == mindrecord::kTypesMap.end()) {
std::string err_msg("Error: can not support data type: " + el);
RETURN_STATUS_UNEXPECTED(err_msg);
......@@ -605,6 +608,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
if (mr_type == "bytes" || !mr_shape.empty()) continue;
index_fields->emplace_back(column_name); // candidate of index fields
}
MS_LOG(DEBUG) << "Schema of dataset: " << dataset_schema.dump();
return Status::OK();
}
Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle,
......
......@@ -83,7 +83,7 @@ MSRStatus ShardWriter::OpenDataFiles(bool append) {
// if not append and mindrecord file exist, return FAILED
fs->open(common::SafeCStr(file), std::ios::in | std::ios::binary);
if (fs->good()) {
MS_LOG(ERROR) << "MindRecord file already existed.";
MS_LOG(ERROR) << "MindRecord file already existed, please delete file: " << common::SafeCStr(file);
fs->close();
return FAILED;
}
......
......@@ -1041,12 +1041,61 @@ class Dataset:
"""
Save the dynamic data processed by dataset pipeline as common dataset format, support: mindrecord.
Implicit type casting exists when saving data as mindrecord. Table below shows how to do type casting.
.. list-table:: Implicit Type Casting of Saving as mindrecord
:widths: 25 25 50
:header-rows: 1
* - type in 'dataset'
- type in 'mindrecord'
- detail
* - DE_BOOL
- None
- Not support
* - DE_INT8
- int32
-
* - DE_UINT8
- bytes(1D uint8)
- Drop dimension
* - DE_INT16
- int32
-
* - DE_UINT16
- int32
-
* - DE_INT32
- int32
-
* - DE_UINT32
- int64
-
* - DE_INT64
- int64
-
* - DE_UINT64
- None
- Not support
* - DE_FLOAT16
- Not support
-
* - DE_FLOAT32
- float32
-
* - DE_FLOAT64
- float64
-
* - DE_STRING
- string
- Not support multi-dimensional DE_STRING
Note:
1. To save the samples in order, should set dataset's shuffle false and num_files 1.
2. Before call the function, do not use batch, repeat operator or data augmentation operators
with random attribute in map operator.
3. Mindreocrd do not support np.uint64, multi-dimensional np.uint8(drop dimension) and
multi-dimensional string.
3. Mindrecord does not support DE_UINT64, multi-dimensional DE_UINT8(drop dimension) and
multi-dimensional DE_STRING.
Args:
file_name (str): Path to dataset file.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册