diff --git a/mindspore/ccsrc/dataset/core/CMakeLists.txt b/mindspore/ccsrc/dataset/core/CMakeLists.txt
index 8141009bf6202c1561d8db3042a8f3b7b5105222..0b9f08d0702e1a08caa11b97d758b0895a44bab1 100644
--- a/mindspore/ccsrc/dataset/core/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/core/CMakeLists.txt
@@ -1,6 +1,10 @@
+ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
+ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(core OBJECT
+  ${EXAMPLE_SRCS}
+  ${FEATURE_SRCS}
   client.cc
   config_manager.cc
   cv_tensor.cc
@@ -9,4 +13,5 @@ add_library(core OBJECT
   tensor.cc
   tensor_shape.cc
   )
+add_dependencies(core mindspore::protobuf)
 target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
diff --git a/mindspore/ccsrc/dataset/core/data_type.cc b/mindspore/ccsrc/dataset/core/data_type.cc
index 4420f78e2dce94f48bb30d80d2c86304560dfd98..744c8c1ca043cce0ca7aaa89754c2f68b444cbbd 100644
--- a/mindspore/ccsrc/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/dataset/core/data_type.cc
@@ -25,14 +25,14 @@ namespace dataset {
 
 uint8_t DataType::SizeInBytes() const {
   if (type_ < DataType::NUM_OF_TYPES)
-    return SIZE_IN_BYTES[type_];
+    return kTypeInfo[type_].sizeInBytes_;
   else
     return 0;
 }
 
 py::dtype DataType::AsNumpyType() const {
   if (type_ < DataType::NUM_OF_TYPES)
-    return py::dtype(PYBIND_TYPES[type_]);
+    return py::dtype(kTypeInfo[type_].pybindType_);
   else
     return py::dtype("unknown");
 }
@@ -40,7 +40,7 @@ py::dtype DataType::AsNumpyType() const {
 uint8_t DataType::AsCVType() const {
   uint8_t res = kCVInvalidType;
   if (type_ < DataType::NUM_OF_TYPES) {
-    res = CV_TYPES[type_];
+    res = kTypeInfo[type_].cvType_;
   }
 
   if (res == kCVInvalidType) {
@@ -108,7 +108,7 @@ DataType::DataType(const std::string &type_str) {
 
 std::string DataType::ToString() const {
   if (type_ < DataType::NUM_OF_TYPES)
-    return TO_STRINGS[type_];
+    return kTypeInfo[type_].name_;
   else
     return "unknown";
 }
@@ -149,7 +149,7 @@ DataType DataType::FromNpArray(const py::array &arr) {
 std::string DataType::GetPybindFormat() const {
   std::string res;
   if (type_ < DataType::NUM_OF_TYPES) {
-    res = PYBIND_FORMAT_DESCRIPTOR[type_];
+    res = kTypeInfo[type_].pybindFormatDescriptor_;
   }
 
   if (res.empty()) {
diff --git a/mindspore/ccsrc/dataset/core/data_type.h b/mindspore/ccsrc/dataset/core/data_type.h
index eb4bc24c77c8976f10430756b5ad662c9757a01c..f1f0bb2ebbb10b4992b66c3a4b9de99133e047c2 100644
--- a/mindspore/ccsrc/dataset/core/data_type.h
+++ b/mindspore/ccsrc/dataset/core/data_type.h
@@ -51,56 +51,31 @@ class DataType {
     NUM_OF_TYPES
   };
 
-  inline static constexpr uint8_t SIZE_IN_BYTES[] = {0,   // DE_UNKNOWN
-                                                     1,   // DE_BOOL
-                                                     1,   // DE_INT8
-                                                     1,   // DE_UINT8
-                                                     2,   // DE_INT16
-                                                     2,   // DE_UINT16
-                                                     4,   // DE_INT32
-                                                     4,   // DE_UINT32
-                                                     8,   // DE_INT64
-                                                     8,   // DE_UINT64
-                                                     2,   // DE_FLOAT16
-                                                     4,   // DE_FLOAT32
-                                                     8,   // DE_FLOAT64
-                                                     0};  // DE_STRING
-
-  inline static const char *TO_STRINGS[] = {"unknown", "bool",  "int8",   "uint8",   "int16",   "uint16",  "int32",
-                                            "uint32",  "int64", "uint64", "float16", "float32", "float64", "string"};
-
-  inline static const char *PYBIND_TYPES[] = {"object", "bool",  "int8",   "uint8",   "int16",   "uint16", "int32",
-                                              "uint32", "int64", "uint64", "float16", "float32", "double", "bytes"};
-
-  inline static const std::string PYBIND_FORMAT_DESCRIPTOR[] = {"",                                        // DE_UNKNOWN
-                                                                py::format_descriptor<bool>::format(),     // DE_BOOL
-                                                                py::format_descriptor<int8_t>::format(),   // DE_INT8
-                                                                py::format_descriptor<uint8_t>::format(),  // DE_UINT8
-                                                                py::format_descriptor<int16_t>::format(),  // DE_INT16
-                                                                py::format_descriptor<uint16_t>::format(),  // DE_UINT16
-                                                                py::format_descriptor<int32_t>::format(),   // DE_INT32
-                                                                py::format_descriptor<uint32_t>::format(),  // DE_UINT32
-                                                                py::format_descriptor<int64_t>::format(),   // DE_INT64
-                                                                py::format_descriptor<uint64_t>::format(),  // DE_UINT64
-                                                                "e",                                      // DE_FLOAT16
-                                                                py::format_descriptor<float>::format(),   // DE_FLOAT32
-                                                                py::format_descriptor<double>::format(),  // DE_FLOAT64
-                                                                "S"};                                     // DE_STRING
-
-  inline static constexpr uint8_t CV_TYPES[] = {kCVInvalidType,   // DE_UNKNOWN
-                                                CV_8U,            // DE_BOOL
-                                                CV_8S,            // DE_INT8
-                                                CV_8U,            // DE_UINT8
-                                                CV_16S,           // DE_INT16
-                                                CV_16U,           // DE_UINT16
-                                                CV_32S,           // DE_INT32
-                                                kCVInvalidType,   // DE_UINT32
-                                                kCVInvalidType,   // DE_INT64
-                                                kCVInvalidType,   // DE_UINT64
-                                                CV_16F,           // DE_FLOAT16
-                                                CV_32F,           // DE_FLOAT32
-                                                CV_64F,           // DE_FLOAT64
-                                                kCVInvalidType};  // DE_STRING
+  struct TypeInfo {
+    const char *name_;                          // name to be represent the type while printing
+    const uint8_t sizeInBytes_;                 // number of bytes needed for this type
+    const char *pybindType_;                    //  Python matching type, used in get_output_types
+    const std::string pybindFormatDescriptor_;  // pybind format used for numpy types
+    const uint8_t cvType_;                      // OpenCv matching type
+  };
+
+  static inline const TypeInfo kTypeInfo[] = {
+    // name, sizeInBytes, pybindTypem formatDescriptor, openCV
+    {"unknown", 0, "object", "", kCVInvalidType},                                        // DE_UNKNOWN
+    {"bool", 1, "bool", py::format_descriptor<bool>::format(), CV_8U},                   // DE_BOOL
+    {"int8", 1, "int8", py::format_descriptor<int8_t>::format(), CV_8S},                 // DE_INT8
+    {"uint8", 1, "uint8", py::format_descriptor<uint8_t>::format(), CV_8U},              // DE_UINT8
+    {"int16", 2, "int16", py::format_descriptor<int16_t>::format(), CV_16S},             // DE_INT16
+    {"uint16", 2, "uint16", py::format_descriptor<uint16_t>::format(), CV_16U},          // DE_UINT16
+    {"int32", 4, "int32", py::format_descriptor<int32_t>::format(), CV_32S},             // DE_INT32
+    {"uint32", 4, "uint32", py::format_descriptor<uint32_t>::format(), kCVInvalidType},  // DE_UINT32
+    {"int64", 8, "int64", py::format_descriptor<int64_t>::format(), kCVInvalidType},     // DE_INT64
+    {"uint64", 8, "uint64", py::format_descriptor<uint64_t>::format(), kCVInvalidType},  // DE_UINT64
+    {"float16", 2, "float16", "e", CV_16F},                                              // DE_FLOAT16
+    {"float32", 4, "float32", py::format_descriptor<float>::format(), CV_32F},           // DE_FLOAT32
+    {"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F},           // DE_FLOAT64
+    {"string", 0, "bytes", "S", kCVInvalidType}                                          // DE_STRING
+  };
 
   // No arg constructor to create an unknown shape
   DataType() : type_(DE_UNKNOWN) {}
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/example.proto b/mindspore/ccsrc/dataset/core/example.proto
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/example.proto
rename to mindspore/ccsrc/dataset/core/example.proto
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/feature.proto b/mindspore/ccsrc/dataset/core/feature.proto
similarity index 100%
rename from mindspore/ccsrc/dataset/engine/datasetops/source/feature.proto
rename to mindspore/ccsrc/dataset/core/feature.proto
diff --git a/mindspore/ccsrc/dataset/core/tensor.cc b/mindspore/ccsrc/dataset/core/tensor.cc
index 54e11ca0fbe74cfec98241b604bf03be39b6330f..629daefc61646ed84a8cd347d177e241dd768afc 100644
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/dataset/core/tensor.cc
@@ -57,18 +57,40 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape),
 }
 
 Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data) : Tensor(shape, type) {
+  if (type.IsNumeric()) {
+    // If the data pointer was given, then we can also populate the tensor with data
+    if (data != nullptr) {
+      // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
+      int64_t byte_size = this->SizeInBytes();
+      Status s = this->AllocateBuffer(byte_size);  // Allocates data_ inside itself
+      if (s.IsOk() && data_ != nullptr) {
+        int ret_code = memcpy_s(data_, byte_size, data, byte_size);
+        if (ret_code != 0) {
+          MS_LOG(ERROR) << "Failed to copy data into Tensor!";
+        }
+      } else {
+        MS_LOG(ERROR) << "Failed to create memory for Tensor!";
+      }
+    }
+  } else {
+    MS_LOG(ERROR) << "Type should be numeric to use this constructor.";
+  }
+}
+
+Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length)
+    : Tensor(shape, type) {
   // If the data pointer was given, then we can also populate the tensor with data
   if (data != nullptr) {
-    // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
-    int64_t byte_size = this->SizeInBytes();
-    static_cast<void>(this->GetMutableBuffer());  // Allocates data_ inside itself
+    // Allocates data_ inside itself
+    Status s = AllocateBuffer(length);
+    if (s.IsError()) {
+      MS_LOG(ERROR) << "Failed to create memory for Tensor!";
+    }
     if (data_ != nullptr) {
-      int ret_code = memcpy_s(data_, byte_size, data, byte_size);
+      int ret_code = memcpy_s(data_, length, data, length);
       if (ret_code != 0) {
         MS_LOG(ERROR) << "Failed to copy data into Tensor!";
       }
-    } else {
-      MS_LOG(ERROR) << "Failed to create memory for Tensor!";
     }
   }
 }
@@ -98,32 +120,79 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape
   auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
   dsize_t total_length = std::accumulate(strings.begin(), strings.end(), 0, length_sum);
 
-  dsize_t num_bytes = (kOffsetSize + 1) * shape_.NumOfElements() + total_length;
+  // total bytes needed = offset array + strings
+  // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
+  // strings will be null-terminated --> need 1 extra byte per element
+  dsize_t num_bytes = (kOffsetSize + 1) * shape_.NumOfElements() + kOffsetSize + total_length;
 
   data_ = data_allocator_->allocate(num_bytes);
 
   auto offset_arr = reinterpret_cast<offset_t *>(data_);
   uchar *buf = GetStringsBuffer();
 
-  offset_t offset = -1;
+  offset_t offset = buf - data_;  // the first string will start here
   uint32_t i = 0;
   for (const auto &str : strings) {
-    //  insert the end index of the string
-    //  end index of a string is the end index of previous string +  the length (including \0)
-    offset = offset + str.length() + 1;
+    //  insert the start index of the string.
     offset_arr[i++] = offset;
     // total bytes are reduced by kOffsetSize
     num_bytes -= kOffsetSize;
     // insert actual string
-    memcpy_s(buf, num_bytes, str.c_str(), str.length() + 1);
-    buf += str.length() + 1;
+    int ret_code = memcpy_s(data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
+    if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
+    //  next string will be stored right after the current one.
+    offset = offset + str.length() + 1;
+    // total bytes are reduced by the length of the string
     num_bytes -= str.length() + 1;
   }
-  this->data_end_ = buf;
+  // store one more offset value so we can get the length of the last string
+  // length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
+  offset_arr[i] = offset;
+
+  this->data_end_ = data_ + offset_arr[i];
+
   DS_ASSERT(num_bytes == 0);
   if (shape.known()) Tensor::Reshape(shape);
 }
+Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
+    : Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) {
+  // total bytes needed = offset array + strings
+  // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
+  // strings will be null-terminated --> need 1 extra byte per element
+  dsize_t num_bytes = (kOffsetSize)*shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
+
+  data_ = data_allocator_->allocate(num_bytes);
+
+  auto offset_arr = reinterpret_cast<offset_t *>(data_);
+  uchar *buf = GetStringsBuffer();
+
+  offset_t offset = buf - data_;  // the first string will start here
+  uint32_t i = 0;
+  for (; i < bytes_list.value_size(); i++) {
+    const std::string &str = bytes_list.value(i);
+    //  insert the start index of the string.
+    offset_arr[i] = offset;
+    // total bytes are reduced by kOffsetSize
+    num_bytes -= kOffsetSize;
+    // insert actual string
+    int ret_code = memcpy_s(data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
+    if (ret_code != 0) {
+      MS_LOG(ERROR) << "Cannot copy string into Tensor";
+    }
+    //  next string will be stored right after the current one.
+    offset = offset + str.length() + 1;
+    // total bytes are reduced by the length of the string
+    num_bytes -= str.length() + 1;
+  }
+  // store one more offset value so we can get the length of the last string
+  // length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
+  offset_arr[i] = offset;
+
+  data_end_ = data_ + offset_arr[i];
 
+  DS_ASSERT(num_bytes == 0);
+  if (shape.known()) Tensor::Reshape(shape);
+}
 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
                             DataType type, const unsigned char *data) {
   if (!shape.known()) {
@@ -152,20 +221,17 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
   }
   return Status::OK();  // returns base-class shared_ptr
 }
-std::string to(std::string x) { return x; }
+
 Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
   std::vector<dsize_t> shape;
   for (dsize_t i = 0; i < arr.ndim(); i++) {
     shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
   }
-  arr.resize({arr.size()});
-  auto itr = arr.begin();
+  arr.resize({arr.size()});  // flatten the py::array so we can iterate once
   std::vector<std::string> strings;
-  for (; itr != arr.end(); itr++) {
-    std::string s = to(py::cast<py::bytes>(*itr));
-    strings.push_back(s);
-  }
-  arr.resize(shape);
+  std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
+
+  arr.resize(shape);  // resize arr back to the original shape
 
   return CreateTensor(ptr, strings, TensorShape{shape});
 }
@@ -190,8 +256,9 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
 
   std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
   (*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
-  static_cast<void>((*ptr)->GetMutableBuffer());
   int64_t byte_size = (*ptr)->SizeInBytes();
+  RETURN_IF_NOT_OK((*ptr)->AllocateBuffer(byte_size));
+
   unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
   if ((*ptr)->data_ == nullptr) {
     RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor.");
@@ -232,6 +299,13 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std:
   return Status::OK();
 }
 
+Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
+                            const TensorShape &shape) {
+  const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
+  *ptr = std::allocate_shared<Tensor>(*alloc, bytes_list, shape);
+  return Status::OK();
+}
+
 // Memcpy the given strided array's used part to consecutive memory
 // Consider a 3-d array
 // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
@@ -370,25 +444,20 @@ void Tensor::Print(std::ostream &out) const {
     out << "[Data area is null]";
   }
 }
-
-// Name: ToFlatIndex()
-// Description: convert a vector style index to number, used to access memory internal use only
-Status Tensor::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const {
-  if (!shape_.IsValidIndex(index)) {
-    std::string err = "Not a valid index";
-    RETURN_STATUS_UNEXPECTED(err);
-  }
-  *flat_index = 0;
-  for (size_t k = 0; k < index.size(); k++) {
-    dsize_t product = 1;
-    for (size_t l = k + 1; l < index.size(); l++) {
-      product *= shape_[l];
+Status Tensor::AllocateBuffer(const dsize_t &length) {
+  if (data_ == nullptr) {
+    if (data_allocator_ != nullptr) {
+      data_ = data_allocator_->allocate(length);
+      RETURN_UNEXPECTED_IF_NULL(data_);
+      data_end_ = data_ + length;
+    } else {
+      data_ = static_cast<unsigned char *>(malloc(length));
+      data_end_ = data_ + length;
+      RETURN_UNEXPECTED_IF_NULL(data_);
     }
-    *flat_index += index[k] * product;
   }
   return Status::OK();
 }
-
 const unsigned char *Tensor::GetBuffer() const {
   // This version cannot modify anything.  data_ could possibly be null.
   return data_;
@@ -404,17 +473,11 @@ unsigned char *Tensor::GetMutableBuffer() {
   } else {
     // If the data area is not created, then identify the memory size based
     // on the shape and type and allocate it.
-    if (data_allocator_ != nullptr) {
-      data_ = data_allocator_->allocate(this->SizeInBytes());
-      data_end_ = data_ + SizeInBytes();
+    if (this->AllocateBuffer(this->SizeInBytes()).IsOk()) {
+      return data_;
     } else {
-      data_ = static_cast<unsigned char *>(malloc(this->SizeInBytes()));
-      data_end_ = data_ + SizeInBytes();
-      if (data_ == nullptr) {
-        return nullptr;
-      }
+      return nullptr;
     }
-    return data_;
   }
 }
 
@@ -444,7 +507,7 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
       RETURN_STATUS_UNEXPECTED(err);
     }
     dsize_t flat_idx;
-    RETURN_IF_NOT_OK(ToFlatIndex(index, &flat_idx));
+    RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
     *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
 
     return Status::OK();
@@ -461,7 +524,7 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset
       RETURN_STATUS_UNEXPECTED(err);
     }
     dsize_t flat_idx;
-    RETURN_IF_NOT_OK(ToFlatIndex(index, &flat_idx));
+    RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
     offset_t length_temp = 0;
     RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
     if (length != nullptr) *length = length_temp;
@@ -481,7 +544,7 @@ Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_
   std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end());
   *remaining = TensorShape(r);
   ind.resize(this->Rank(), 0);  //  same as -> while (ind.size() < this->Rank()) ind.push_back(0);
-  RETURN_IF_NOT_OK(ToFlatIndex(ind, &flat_ind));
+  RETURN_IF_NOT_OK(shape_.ToFlatIndex(ind, &flat_ind));
   // check if GetBuffer() returns null, we should flag this as an error, this sanity check will only
   // be true is the tensor failed to allocate memory.
   if (GetMutableBuffer() == nullptr) {
@@ -588,10 +651,10 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
   RETURN_UNEXPECTED_IF_NULL(o);
   CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not DE_STRING");
 
-  uchar *buf = nullptr;
+  uchar *start = nullptr;
   offset_t length = 0;
-  RETURN_IF_NOT_OK(GetItemPtr(&buf, index, &length));
-  std::string_view sv{reinterpret_cast<const char *>(buf), length};
+  RETURN_IF_NOT_OK(GetItemPtr(&start, index, &length));
+  std::string_view sv{reinterpret_cast<const char *>(start)};
   o->swap(sv);
   return Status::OK();
 }
@@ -778,13 +841,11 @@ Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length
   RETURN_UNEXPECTED_IF_NULL(string_start);
   RETURN_UNEXPECTED_IF_NULL(length);
   auto *offset_ptr = reinterpret_cast<offset_t *>(data_);  // offsets starts here
-  offset_t end = offset_ptr[index];
-  offset_t start = 0;
-  if (index != 0) start = offset_ptr[index - 1] + 1;  // string starts at where the previous string ends + 1
-  uchar *buf = GetStringsBuffer();                    // string data starts here
-  *string_start = buf + start;
-  *length = end - start;
+  offset_t start = offset_ptr[index];
+  *string_start = data_ + start;
+  *length = offset_ptr[index + 1] - start - 1;  // -1 to skip the \0 from the string length
   return Status::OK();
 }
+
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/core/tensor.h b/mindspore/ccsrc/dataset/core/tensor.h
index 1f3a2a40f8e9adfac9b1cd945f1b321b74c995e2..5efd989fc9c397a452c3edabcb7fdd3a5c800c96 100644
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
@@ -35,6 +35,7 @@
 #include "dataset/util/allocator.h"
 #include "dataset/util/de_error.h"
 #include "dataset/util/status.h"
+#include "proto/example.pb.h"
 
 namespace py = pybind11;
 namespace mindspore {
@@ -64,6 +65,8 @@ class Tensor {
   // @param data unsigned char*, pointer to the data.
   Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data);
 
+  Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length);
+
   Tensor(const Tensor &other) = delete;
 
   Tensor &operator=(const Tensor &other) = delete;
@@ -72,6 +75,8 @@ class Tensor {
 
   Tensor &operator=(Tensor &&other) noexcept;
 
+  Status AllocateBuffer(const dsize_t &length);
+
   // type of offest values to store strings information
   using offset_t = uint32_t;
   // const of the size of the offset variable
@@ -84,15 +89,24 @@ class Tensor {
   // Construct a tensor from  a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is
   // the size of the vector `strings`.
   // The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
-  // OFFSET1, OFFSET2, ... String1, String2, ...
-  // The value of each offset is the end index of the corresponding string
+  // Thr offset array will store one extra value to find the length of the last string.
+  // OFFSET1, OFFSET2, ..., OFFSETn+1, STRING1, STRING2, ..., STRINGn
+  // The value of each offset is the start index of the corresponding string
   // Offsets is of type offest_t
   // strings will ne null-terminated
   // example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
-  // 3 6 a b c \0 d e \0
+  // |----------------------------------------------------------------|
+  // |             OFFSET ARRAY           |            STRINGS        |
+  // | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
+  // |     11    |    15     |     18     |     abc\0   |      de\0   |
+  // |----------------------------------------------------------------|
   explicit Tensor(const std::vector<std::string> &strings,
                   const TensorShape &shape = TensorShape::CreateUnknownRankShape());
 
+  // Same as Tensor(vector<string>) but the input is protobuf bytelist
+  explicit Tensor(const dataengine::BytesList &bytes_list,
+                  const TensorShape &shape = TensorShape::CreateUnknownRankShape());
+
   // A static factory method to create the given flavour of derived Tensor
   // Returns the base class reference for the Tensor.
   // @param ptr output argument to hold the created Tensor of given tensor_impl
@@ -121,6 +135,9 @@ class Tensor {
   static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
                              const TensorShape &shape = TensorShape::CreateUnknownRankShape());
 
+  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
+                             const TensorShape &shape);
+
   // Copy raw data of a array based on shape and strides to the destination pointer
   // @param dst Pointer to the destination array where the content is to be copied
   // @param src Pointer to the source of strided array to be copied
@@ -166,7 +183,7 @@ class Tensor {
   // @param value of type `T`
   template <typename T>
   Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
-    static_cast<void>(GetMutableBuffer());
+    RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
     T *ptr = nullptr;
     RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
     *ptr = value;
@@ -203,7 +220,7 @@ class Tensor {
   template <typename T>
   Status Fill(const T &value) {
     CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings.");
-    static_cast<void>(GetMutableBuffer());
+    RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
     int64_t cellSize = type_.SizeInBytes();
     if ((data_ != nullptr) && type_.IsCompatible<T>()) {
       for (dsize_t i = 0; i < Size(); i++) {
@@ -418,32 +435,28 @@ class Tensor {
     using pointer = std::string_view *;
     using reference = std::string_view &;
 
-    explicit TensorIterator(uchar *offset = nullptr, const uchar *buf = nullptr, dsize_t index = 0) {
-      offset_ = reinterpret_cast<offset_t *>(offset);
-      buf_ = reinterpret_cast<const char *>(buf);
+    explicit TensorIterator(uchar *data = nullptr, dsize_t index = 0) {
+      data_ = reinterpret_cast<const char *>(data);
       index_ = index;
     }
 
     TensorIterator(const TensorIterator<std::string_view, DUMMY> &raw_iterator) {
-      offset_ = raw_iterator.offset_;
-      buf_ = raw_iterator.buf_;
+      data_ = raw_iterator.data_;
       index_ = raw_iterator.index_;
     }
 
     ~TensorIterator() = default;
 
-    bool operator==(const TensorIterator<std::string_view> &rhs) {
-      return buf_ == rhs.buf_ && offset_ == rhs.offset_ && index_ == rhs.index_;
-    }
+    bool operator==(const TensorIterator<std::string_view> &rhs) { return data_ == rhs.data_ && index_ == rhs.index_; }
 
     bool operator!=(const TensorIterator<std::string_view> &rhs) { return !(*this == rhs); }
 
-    operator bool() const { return offset_ != nullptr; }
+    operator bool() const { return data_ != nullptr; }
 
     std::string_view operator*() const {
-      offset_t start = 0;
-      if (index_ != 0) start = offset_[index_ - 1] + 1;
-      return std::string_view{buf_ + start};
+      auto offset_ = reinterpret_cast<const offset_t *>(data_);
+      offset_t start = offset_[index_];
+      return std::string_view{data_ + start};
     }
 
     TensorIterator<std::string_view> &operator+=(const dsize_t &inc) {
@@ -496,8 +509,7 @@ class Tensor {
 
    protected:
     dsize_t index_;
-    offset_t *offset_;
-    const char *buf_;
+    const char *data_;
   };
 
   // Return a TensorIterator that points to the start of the Tensor.
@@ -518,11 +530,6 @@ class Tensor {
   }
 
  protected:
-  // Returns the location of the item assuming row major memory layout.
-  // @param index
-  // @return
-  Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
-
   // A function that prints Tensor recursively, first called by print
   // @param out
   // @param cur_dim
@@ -559,7 +566,7 @@ class Tensor {
   // Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the
   // tensor's type is a string, otherwise undefined address would be returned.
   // @return address of the first string of the tensor.
-  uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements(); }
+  uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
 
   // all access to shape_ should be via shape
   TensorShape shape_;
@@ -573,14 +580,8 @@ class Tensor {
   unsigned char *data_end_ = nullptr;
 };
 template <>
-inline Tensor::TensorIterator<std::string_view> Tensor::begin<std::string_view>() {
-  uchar *buf = GetStringsBuffer();
-  return TensorIterator<std::string_view>(data_, buf);
-}
-template <>
 inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
-  uchar *buf = GetStringsBuffer();
-  return TensorIterator<std::string_view>(data_, buf, shape_.NumOfElements());
+  return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.cc b/mindspore/ccsrc/dataset/core/tensor_shape.cc
index e24b2bc12ba350e10169e0b69ba9d4e84828c3b1..30afdf38bc87400bee4b29a2363c24220a180501 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.cc
@@ -40,16 +40,7 @@ dsize_t TensorShape::NumOfElements() const {
   if (!known()) {
     return 0;
   }
-  dsize_t num = 1;
-  for (auto i : raw_shape_) {
-    if (multi_ok(num, i)) {
-      num *= i;
-    } else {
-      // dsize_t can wrap since it is signed int, we double check here
-      MS_LOG(ERROR) << "Tensor shape larger than maximum allowed value!";
-    }
-  }
-  return num;
+  return strides_[0];
 }
 
 void TensorShape::Print(std::ostream &out) const {
@@ -72,20 +63,23 @@ void TensorShape::Print(std::ostream &out) const {
 }
 
 TensorShape::TensorShape(const std::initializer_list<dsize_t> &list)
-    : raw_shape_(*GlobalContext::Instance()->int_allocator()) {
+    : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
   AddListToShape(list);
 }
 
-TensorShape::TensorShape(const std::vector<dsize_t> &list) : raw_shape_(*GlobalContext::Instance()->int_allocator()) {
+TensorShape::TensorShape(const std::vector<dsize_t> &list)
+    : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
   AddListToShape(list);
 }
 
-TensorShape::TensorShape(const TensorShape &shape) : raw_shape_(*GlobalContext::Instance()->int_allocator()) {
+TensorShape::TensorShape(const TensorShape &shape)
+    : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
   AddListToShape(shape.AsVector());
   known_ = shape.known_;  // override with the input shape in case of unknown-rank tensor shape.
 }
 
-TensorShape::TensorShape(py::list l) : raw_shape_(*GlobalContext::Instance()->int_allocator()) {
+TensorShape::TensorShape(py::list l)
+    : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
   std::vector<dsize_t> list_c;
   for (auto &i : l) {
     if (!i.is_none()) {
@@ -97,6 +91,18 @@ TensorShape::TensorShape(py::list l) : raw_shape_(*GlobalContext::Instance()->in
   AddListToShape(list_c);
 }
 
+TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type)
+    : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
+  for (int i = 0; i < cv_size.dims(); i++) {
+    raw_shape_.push_back(cv_size[i]);
+  }
+  auto channels = static_cast<uint8_t>(1 + (type >> static_cast<uint8_t>(CV_CN_SHIFT)));
+  if (channels != 1) {
+    raw_shape_.push_back(channels);
+  }
+  known_ = true;
+}
+
 TensorShape TensorShape::CreateUnknownRankShape() {
   TensorShape s({});
   s.known_ = false;
@@ -109,17 +115,6 @@ TensorShape TensorShape::InsertDim(dsize_t axis, dsize_t dim) const {
   return TensorShape(tmp);
 }
 
-TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type) : raw_shape_(*GlobalContext::Instance()->int_allocator()) {
-  for (int i = 0; i < cv_size.dims(); i++) {
-    raw_shape_.push_back(cv_size[i]);
-  }
-  auto channels = static_cast<uint8_t>(1 + (type >> static_cast<uint8_t>(CV_CN_SHIFT)));
-  if (channels != 1) {
-    raw_shape_.push_back(channels);
-  }
-  known_ = true;
-}
-
 std::vector<dsize_t> TensorShape::AsVector() const {
   return std::vector<dsize_t>(raw_shape_.begin(), raw_shape_.end());
 }
@@ -139,23 +134,28 @@ bool TensorShape::IsValidIndex(const std::vector<dsize_t> &index) const {
 
 template <typename T>
 void TensorShape::AddListToShape(const T &list) {
+  raw_shape_.resize(list.size());
+  strides_.resize(list.size() + 1);
+  strides_[list.size()] = 1;
   known_ = true;
-  dsize_t num = 1;
   dsize_t size = 0;
-  for (const auto &itr : list) {
-    if (itr > 0) {
-      if (num > std::numeric_limits<int64_t>::max() / itr) {
+  auto itr = std::rbegin(list);  // iterate over the list in reverse order
+  auto s = list.size() - 1;      // to compute strides while adding dims
+  for (; itr != std::rend(list); itr++, s--) {
+    dsize_t dim = *itr;
+    if (dim > 0) {
+      if (strides_[s + 1] > std::numeric_limits<int64_t>::max() / dim) {
         MS_LOG(ERROR) << "Invalid shape data, overflow occurred!";
         known_ = false;
         raw_shape_.clear();
         return;
       }
-      num *= itr;
+      strides_[s] = dim * strides_[s + 1];
     }
-    if (itr < 0) {
+    if (dim < 0) {
       known_ = false;
     }
-    if (itr > kDeMaxDim) {
+    if (dim > kDeMaxDim) {
       std::stringstream ss;
       ss << "Invalid shape data, dim (" << size << ") is larger than the maximum dim size(" << kDeMaxDim << ")!";
       MS_LOG(ERROR) << ss.str().c_str();
@@ -163,7 +163,7 @@ void TensorShape::AddListToShape(const T &list) {
       raw_shape_.clear();
       return;
     }
-    raw_shape_.push_back(itr);
+    raw_shape_[s] = dim;
     size++;
   }
   if (size > kDeMaxRank) {
@@ -215,17 +215,18 @@ TensorShape TensorShape::Squeeze() const {
   }
   return TensorShape(new_shape);
 }
-std::vector<dsize_t> TensorShape::Strides() {
-  std::vector<dsize_t> strides(Rank());
-  dsize_t count = NumOfElements();
-  for (dsize_t i = 0; i < Rank(); i++) {
-    if (raw_shape_[i] != 0)
-      count /= raw_shape_[i];
-    else
-      count = 0;
-    strides[i] = count;
+
+std::vector<dsize_t> TensorShape::Strides() const { return std::vector<dsize_t>{strides_.begin() + 1, strides_.end()}; }
+
+// Name: ToFlatIndex()
+// Description: convert a vector style index to number, used to access memory internal use only
+Status TensorShape::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const {
+  *flat_index = 0;
+  for (size_t k = 0; k < index.size(); k++) {
+    *flat_index += index[k] * strides_[k + 1];  // skip the first element of strides_ which is numOfElements
   }
-  return strides;
+  CHECK_FAIL_RETURN_UNEXPECTED(*flat_index < NumOfElements(), "Not a valid index");
+  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.h b/mindspore/ccsrc/dataset/core/tensor_shape.h
index 33dd0a81ee9ad5871e6513720e62ff7f594bccc9..6cfb007b561da2c7440008b437c9fd4e1a5f3e56 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.h
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.h
@@ -156,13 +156,20 @@ class TensorShape {
 
   TensorShape Squeeze() const;
 
-  std::vector<dsize_t> Strides();
+  std::vector<dsize_t> Strides() const;
+
+  // Returns the location of the item assuming row major memory layout.
+  // @param index
+  // @return
+  Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
 
  private:
   // True if known and valid shape, false otherwise
   bool known_;
   // Vector to keep the dims of the shape.
   std::vector<dsize_t, IntAlloc> raw_shape_;
+  // Vector to keep the strides of the shape. The size is rank+1
+  std::vector<dsize_t, IntAlloc> strides_;
 
   // Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape.
   // @tparam T list
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
index b909a76ee1212a0b19b6a23670cfc5dd7e1dc6dc..a1d0b22f152e955fb4eef1fcbddd7b48598f2519 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
@@ -1,5 +1,3 @@
-ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
-ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
 add_subdirectory(sampler)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
@@ -15,13 +13,9 @@ add_library(engine-datasetops-source OBJECT
     image_folder_op.cc
     mnist_op.cc
     voc_op.cc
-    ${EXAMPLE_SRCS}
-    ${FEATURE_SRCS}
     manifest_op.cc
     cifar_op.cc
     random_data_op.cc
     celeba_op.cc
     text_file_op.cc
-    )
-
-add_dependencies(engine-datasetops-source mindspore::protobuf)
+    )
\ No newline at end of file
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
index 49c7e78a60938f79afa0ff130edc4e4ef2b2a5f7..358dd07872bca150898092729010dc7ed55da392 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -127,8 +127,10 @@ Status MindRecordOp::Init() {
     std::string type_str = mindrecord::ColumnDataTypeNameNormalized[col_data_types[i]];
     DataType t_dtype = DataType(type_str);  // valid types: {"bytes", "string", "int32", "int64", "float32", "float64"}
 
-    if (col_data_types[i] == mindrecord::ColumnBytes || col_data_types[i] == mindrecord::ColumnString) {  // rank = 1
+    if (col_data_types[i] == mindrecord::ColumnBytes) {  // rank = 1
       col_desc = ColDescriptor(colname, t_dtype, TensorImpl::kFlexible, 1);
+    } else if (col_data_types[i] == mindrecord::ColumnString) {  // rank = 0
+      col_desc = ColDescriptor(colname, t_dtype, TensorImpl::kFlexible, 0);
     } else if (col_shapes[i].size() > 0) {
       std::vector<dsize_t> vec(col_shapes[i].size());  // temporary vector to hold shape
       (void)std::copy(col_shapes[i].begin(), col_shapes[i].end(), vec.begin());
@@ -309,7 +311,10 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
 
     // Set shape
     auto num_elements = n_bytes / column_data_type_size;
-    if (column.hasShape()) {
+    if (type == DataType::DE_STRING) {
+      std::string s{data, data + n_bytes};
+      RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {s}, TensorShape::CreateScalar()));
+    } else if (column.hasShape()) {
       auto new_shape = TensorShape(column.shape());
       RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape));
       RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, column.tensorImpl(), new_shape, type, data));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
index 90e950fceefe4af1cd620f85ad96c4bbc162faea..695f364b7f22e75a01a4751542156d3231fa1eff 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
@@ -63,7 +63,8 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t
   }
   TensorShape shape(std::vector<dsize_t>(1, num_elements));
   RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type()));
-  (void)(*sample_ids)->GetMutableBuffer();  // allocate memory in case user forgets!
+  RETURN_IF_NOT_OK(
+    (*sample_ids)->AllocateBuffer((*sample_ids)->SizeInBytes()));  // allocate memory in case user forgets!
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
index 8b679388a7b04610f5e4dd5ac265906bd4e3de37..1335344e6dccad57ec60f0568c95e6139893a5e0 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -724,18 +724,26 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
   // kBytesList can map to the following DE types ONLY!
   // DE_UINT8, DE_INT8
   // Must be single byte type for each element!
-  if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8) {
+  if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8 &&
+      current_col.type() != DataType::DE_STRING) {
     std::string err_msg = "Invalid datatype for Tensor at column: " + current_col.name();
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
 
   const dataengine::BytesList &bytes_list = column_values_list.bytes_list();
 
+  *num_elements = bytes_list.value_size();
+
+  if (current_col.type() == DataType::DE_STRING) {
+    TensorShape shape = TensorShape::CreateScalar();
+    RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape));
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, shape));
+    return Status::OK();
+  }
+
   uint64_t max_size = 0;
   for (uint32_t i = 0; i < bytes_list.value_size(); ++i) max_size = std::max(max_size, bytes_list.value(i).size());
 
-  *num_elements = bytes_list.value_size();
-
   int64_t pad_size = max_size;
 
   // if user provides a shape in the form of [-1, d1, 2d, ... , dn], we need to pad to d1 * d2 * ... * dn
@@ -879,7 +887,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengin
   RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type()));
 
   // Tensors are lazily allocated, this eagerly allocates memory for the tensor.
-  (void)(*tensor)->GetMutableBuffer();
+  RETURN_IF_NOT_OK((*tensor)->AllocateBuffer((*tensor)->SizeInBytes()));
 
   int64_t i = 0;
   auto it = (*tensor)->begin<T>();
diff --git a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc b/mindspore/ccsrc/dataset/kernels/data/data_utils.cc
index 03f1b99e2ac34b2657625f206744d56c7123a98b..f2635c1fe343867ea1702fe120f035901edb74ab 100644
--- a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc
+++ b/mindspore/ccsrc/dataset/kernels/data/data_utils.cc
@@ -162,7 +162,7 @@ void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
 Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
   RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), data_type));
 
-  static_cast<void>((*output)->GetMutableBuffer());
+  RETURN_IF_NOT_OK((*output)->AllocateBuffer((*output)->SizeInBytes()));
   switch (input->type().value()) {
     case DataType::DE_BOOL:
       CastFrom<bool>(input, output);
@@ -211,7 +211,7 @@ Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
   // initiate new tensor for type cast
   DataType new_type = DataType("float16");
   RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), new_type));
-  static_cast<void>((*output)->GetMutableBuffer());
+  RETURN_IF_NOT_OK((*output)->AllocateBuffer((*output)->SizeInBytes()));
 
   auto in_itr = input->begin<float>();
   auto out_itr = (*output)->begin<float16>();
diff --git a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
index a166f863b53bcfb5f189d6168c506568c8a39eb0..bf470173d9a693985094f76003905264bce0d3a0 100644
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
@@ -64,7 +64,8 @@ Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int
 
   std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
   RETURN_UNEXPECTED_IF_NULL(output_cv);
-  (void)output_cv->GetMutableBuffer();
+  RETURN_IF_NOT_OK(output_cv->AllocateBuffer(output_cv->SizeInBytes()));
+
   if (input_cv->mat().data) {
     try {
       cv::flip(input_cv->mat(), output_cv->mat(), flip_code);
diff --git a/mindspore/ccsrc/mindrecord/include/shard_column.h b/mindspore/ccsrc/mindrecord/include/shard_column.h
index e327ef511a906e012102eb707f267f376bf1822e..496e7ec3ea39f8260c18093d4e1d1ed9925c87da 100644
--- a/mindspore/ccsrc/mindrecord/include/shard_column.h
+++ b/mindspore/ccsrc/mindrecord/include/shard_column.h
@@ -51,7 +51,7 @@ enum ColumnDataType {
 // mapping as {"bytes", "string", "int32", "int64", "float32", "float64"};
 const uint32_t ColumnDataTypeSize[kDataTypes] = {1, 1, 4, 8, 4, 8};
 
-const std::vector<std::string> ColumnDataTypeNameNormalized = {"uint8", "uint8",   "int32",
+const std::vector<std::string> ColumnDataTypeNameNormalized = {"uint8", "string",  "int32",
                                                                "int64", "float32", "float64"};
 
 const std::unordered_map<std::string, ColumnDataType> ColumnDataTypeMap = {
diff --git a/mindspore/dataset/core/datatypes.py b/mindspore/dataset/core/datatypes.py
index a8411d729a815c168b26109520be2b82ffb9b2d3..292af67e8ad4e9e95e47b3634be86369c9e78df4 100644
--- a/mindspore/dataset/core/datatypes.py
+++ b/mindspore/dataset/core/datatypes.py
@@ -48,6 +48,7 @@ def mstype_to_detype(type_):
         mstype.float16: cde.DataType("float16"),
         mstype.float32: cde.DataType("float32"),
         mstype.float64: cde.DataType("float64"),
+        mstype.string: cde.DataType("string"),
     }[type_]
 
 
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index f5005e688cdb855e6c588ae9452aa80fc12493c4..c9c06e559ca0a0e13c80e906654ea54b16ec3319 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -26,7 +26,7 @@ from . import datasets
 INT32_MAX = 2147483647
 valid_detype = [
     "bool", "int8", "int16", "int32", "int64", "uint8", "uint16",
-    "uint32", "uint64", "float16", "float32", "float64"
+    "uint32", "uint64", "float16", "float32", "float64", "string"
 ]
 
 
diff --git a/tests/ut/cpp/dataset/datatype_test.cc b/tests/ut/cpp/dataset/datatype_test.cc
index 82843d4285d79e9281405a5bf6fb79099e96b262..a55853c4c599b9c1c511b3c97929851ba2439550 100644
--- a/tests/ut/cpp/dataset/datatype_test.cc
+++ b/tests/ut/cpp/dataset/datatype_test.cc
@@ -32,47 +32,47 @@ class MindDataTestDatatype : public UT::Common {
 
 
 TEST_F(MindDataTestDatatype, TestSizes) {
-  uint8_t x = DataType::SIZE_IN_BYTES[DataType::DE_BOOL];
+  uint8_t x = DataType::kTypeInfo[DataType::DE_BOOL].sizeInBytes_;
   DataType d = DataType(DataType::DE_BOOL);
   ASSERT_EQ(x, 1);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_INT8];
+  x = DataType::kTypeInfo[DataType::DE_INT8].sizeInBytes_;
   d = DataType(DataType::DE_INT8);
   ASSERT_EQ(x, 1);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_UINT8];
+  x = DataType::kTypeInfo[DataType::DE_UINT8].sizeInBytes_;
   d = DataType(DataType::DE_UINT8);
   ASSERT_EQ(x, 1);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_INT16];
+  x = DataType::kTypeInfo[DataType::DE_INT16].sizeInBytes_;
   d = DataType(DataType::DE_INT16);
   ASSERT_EQ(x, 2);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_UINT16];
+  x = DataType::kTypeInfo[DataType::DE_UINT16].sizeInBytes_;
   d = DataType(DataType::DE_UINT16);
   ASSERT_EQ(x, 2);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_INT32];
+  x = DataType::kTypeInfo[DataType::DE_INT32].sizeInBytes_;
   d = DataType(DataType::DE_INT32);
   ASSERT_EQ(x, 4);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_UINT32];
+  x = DataType::kTypeInfo[DataType::DE_UINT32].sizeInBytes_;
   d = DataType(DataType::DE_UINT32);
   ASSERT_EQ(x, 4);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_INT64];
+  x = DataType::kTypeInfo[DataType::DE_INT64].sizeInBytes_;
   d = DataType(DataType::DE_INT64);
   ASSERT_EQ(x, 8);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_UINT64];
+  x = DataType::kTypeInfo[DataType::DE_UINT64].sizeInBytes_;
   d = DataType(DataType::DE_UINT64);
   ASSERT_EQ(x, 8);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_FLOAT32];
+  x = DataType::kTypeInfo[DataType::DE_FLOAT32].sizeInBytes_;
   d = DataType(DataType::DE_FLOAT32);
   ASSERT_EQ(x, 4);
   ASSERT_EQ(d.SizeInBytes(), x);
-  x = DataType::SIZE_IN_BYTES[DataType::DE_FLOAT64];
+  x = DataType::kTypeInfo[DataType::DE_FLOAT64].sizeInBytes_;
   d = DataType(DataType::DE_FLOAT64);
   ASSERT_EQ(x, 8);
   ASSERT_EQ(d.SizeInBytes(), x);
diff --git a/tests/ut/cpp/dataset/one_hot_op_test.cc b/tests/ut/cpp/dataset/one_hot_op_test.cc
index 4b8bbc1bdd2b0b529c27ecc5846fb5105b3c498b..c414e371e5ab9f49ff6f4593204f3c52f62aefb6 100644
--- a/tests/ut/cpp/dataset/one_hot_op_test.cc
+++ b/tests/ut/cpp/dataset/one_hot_op_test.cc
@@ -14,9 +14,7 @@
  * limitations under the License.
  */
 #include "common/common.h"
-#include "common/cvop_common.h"
 #include "dataset/kernels/data/one_hot_op.h"
-#include "dataset/core/cv_tensor.h"
 #include "utils/log_adapter.h"
 
 using namespace mindspore::dataset;
@@ -24,9 +22,9 @@ using mindspore::MsLogLevel::INFO;
 using mindspore::ExceptionType::NoExceptionType;
 using mindspore::LogStream;
 
-class MindDataTestOneHotOp : public UT::CVOP::CVOpCommon {
+class MindDataTestOneHotOp : public UT::Common {
  protected:
-    MindDataTestOneHotOp() : CVOpCommon() {}
+    MindDataTestOneHotOp() {}
 };
 
 TEST_F(MindDataTestOneHotOp, TestOp) {
diff --git a/tests/ut/cpp/dataset/tensor_string_test.cc b/tests/ut/cpp/dataset/tensor_string_test.cc
index 8c58f68982040f577e3fb32866c68139a45da5a3..a440a93c15fa6a6c92063be3681c91c1f670c878 100644
--- a/tests/ut/cpp/dataset/tensor_string_test.cc
+++ b/tests/ut/cpp/dataset/tensor_string_test.cc
@@ -65,14 +65,14 @@ TEST_F(MindDataTestStringTensorDE, Basics) {
 TEST_F(MindDataTestStringTensorDE, Basics2) {
   std::shared_ptr<Tensor> t =
     std::make_shared<Tensor>(std::vector<std::string>{"abc", "defg", "hi", "klmno", "123", "789"}, TensorShape({2, 3}));
-  ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 20);
-  std::vector<uint32_t> offsets = {3, 8, 11, 17, 21, 25};
+  ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 20 + 4);
+  std::vector<uint32_t> offsets = {0, 4, 9, 12, 18, 22, 26};
   uint32_t ctr = 0;
   for (auto i : offsets) {
-    ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i);
+    ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i + 28);
     ctr += 4;
   }
-  const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4;
+  const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4 + 4;
   std::vector<uint32_t> starts = {0, 4, 9, 12, 18, 22};
 
   uint32_t index = 0;
@@ -90,14 +90,14 @@ TEST_F(MindDataTestStringTensorDE, Empty) {
   std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
   //  abc_defg___123__
   //  0123456789012345
-  ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 10);
-  std::vector<uint32_t> offsets = {3, 8, 9, 10, 14, 15};
+  ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 10 + 4);
+  std::vector<uint32_t> offsets = {0, 4, 9, 10, 11, 15, 16};
   uint32_t ctr = 0;
   for (auto i : offsets) {
-    ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i);
+    ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i + 28);
     ctr += 4;
   }
-  const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4;
+  const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4 + 4;
   std::vector<uint32_t> starts = {0, 4, 9, 10, 11, 15};
 
   uint32_t index = 0;
diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc
index 615427ab9247cb66c2f5416a1486871829e85869..b36f71f4ef88c9ab0c9dcfc7391ef8204db881d7 100644
--- a/tests/ut/cpp/dataset/tensor_test.cc
+++ b/tests/ut/cpp/dataset/tensor_test.cc
@@ -41,6 +41,7 @@ class MindDataTestTensorDE : public UT::Common {
 
 TEST_F(MindDataTestTensorDE, Basics) {
   std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
+  ASSERT_TRUE((t->AllocateBuffer(t->SizeInBytes())).IsOk());
   ASSERT_EQ(t->shape(), TensorShape({2, 3}));
   ASSERT_EQ(t->type(), DataType::DE_UINT64);
   ASSERT_EQ(t->SizeInBytes(), 2 * 3 * 8);
diff --git a/tests/ut/data/dataset/testTextMindRecord/test.mindrecord b/tests/ut/data/dataset/testTextMindRecord/test.mindrecord
new file mode 100644
index 0000000000000000000000000000000000000000..1a3bb4a12db47643ba910dc320c9bb8cb247db78
Binary files /dev/null and b/tests/ut/data/dataset/testTextMindRecord/test.mindrecord differ
diff --git a/tests/ut/data/dataset/testTextMindRecord/test.mindrecord.db b/tests/ut/data/dataset/testTextMindRecord/test.mindrecord.db
new file mode 100644
index 0000000000000000000000000000000000000000..8f0fa403f69c0e7d99a97b4803ba34d136f8267f
Binary files /dev/null and b/tests/ut/data/dataset/testTextMindRecord/test.mindrecord.db differ
diff --git a/tests/ut/data/dataset/testTextTFRecord/datasetSchema.json b/tests/ut/data/dataset/testTextTFRecord/datasetSchema.json
new file mode 100644
index 0000000000000000000000000000000000000000..d0493c524294280e7f6cdb837cc72b572889be97
--- /dev/null
+++ b/tests/ut/data/dataset/testTextTFRecord/datasetSchema.json
@@ -0,0 +1,18 @@
+{
+  "datasetType": "TF",
+  "numRows": 3,
+  "columns": {
+    "line": {
+      "type": "string",
+      "rank": 0
+    },
+    "words": {
+      "type": "string",
+      "rank": 1
+    },
+    "chinese": {
+      "type": "string",
+      "rank": 0
+    }
+  }
+}
diff --git a/tests/ut/data/dataset/testTextTFRecord/text.tfrecord b/tests/ut/data/dataset/testTextTFRecord/text.tfrecord
new file mode 100644
index 0000000000000000000000000000000000000000..e33a1c4b91323bae7da34726b6c6e7ba5abf739b
Binary files /dev/null and b/tests/ut/data/dataset/testTextTFRecord/text.tfrecord differ
diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py
index a882dc6bcb1ef10b0dafddda190a4c462e6d8be6..79a98491c2d3b56e2afbaafd87cfdff46eb1e127 100644
--- a/tests/ut/python/dataset/test_minddataset.py
+++ b/tests/ut/python/dataset/test_minddataset.py
@@ -584,7 +584,7 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file):
 
 def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_cv_file):
     """tutorial for cv minderdataset."""
-    columns_list = ["data", "file_name", "label"]
+    columns_list = ["data", "label"]
     num_readers = 4
     data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers)
 
@@ -948,8 +948,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset():
     data_value_to_list = []
     for item in data:
         new_data = {}
-        new_data['file_name'] = np.asarray(
-            list(bytes(item["file_name"], encoding='utf-8')), dtype=np.uint8)
+        new_data['file_name'] = np.asarray(item["file_name"], dtype='S')
         new_data['label'] = np.asarray(list([item["label"]]), dtype=np.int32)
         new_data['image1'] = np.asarray(list(item["image1"]), dtype=np.uint8)
         new_data['image2'] = np.asarray(list(item["image2"]), dtype=np.uint8)
@@ -1153,8 +1152,7 @@ def test_write_with_multi_bytes_and_MindDataset():
     data_value_to_list = []
     for item in data:
         new_data = {}
-        new_data['file_name'] = np.asarray(
-            list(bytes(item["file_name"], encoding='utf-8')), dtype=np.uint8)
+        new_data['file_name'] = np.asarray(item["file_name"], dtype='S')
         new_data['label'] = np.asarray(list([item["label"]]), dtype=np.int32)
         new_data['image1'] = np.asarray(list(item["image1"]), dtype=np.uint8)
         new_data['image2'] = np.asarray(list(item["image2"]), dtype=np.uint8)
diff --git a/tests/ut/python/dataset/test_minddataset_sampler.py b/tests/ut/python/dataset/test_minddataset_sampler.py
index 100d2d1e16c30edb57813aed8c27f0f65a7f9e49..c1affcea01e6ae758cdae25cee962e48ba17468f 100644
--- a/tests/ut/python/dataset/test_minddataset_sampler.py
+++ b/tests/ut/python/dataset/test_minddataset_sampler.py
@@ -27,6 +27,7 @@ import mindspore.dataset as ds
 import mindspore.dataset.transforms.vision.c_transforms as vision
 from mindspore import log as logger
 from mindspore.dataset.transforms.vision import Inter
+from mindspore.dataset.transforms.text import as_text
 from mindspore.mindrecord import FileWriter
 
 FILES_NUM = 4
@@ -72,7 +73,7 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
     for item in data_set.create_dict_iterator():
         logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter))
         logger.info("-------------- item[file_name]: \
-                {}------------------------".format("".join([chr(x) for x in item["file_name"]])))
+                {}------------------------".format(as_text(item["file_name"])))
         logger.info("-------------- item[label]: {} ----------------------------".format(item["label"]))
         num_iter += 1
 
@@ -92,7 +93,7 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file):
         logger.info("-------------- item[data]: \
                 {}------------------------".format(item["data"][:10]))
         logger.info("-------------- item[file_name]: \
-                {}------------------------".format("".join([chr(x) for x in item["file_name"]])))
+                {}------------------------".format(as_text(item["file_name"])))
         logger.info("-------------- item[label]: {} ----------------------------".format(item["label"]))
         num_iter += 1
 
@@ -110,7 +111,7 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file):
     for item in data_set.create_dict_iterator():
         logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter))
         logger.info("-------------- item[file_name]: \
-                {}------------------------".format("".join([chr(x) for x in item["file_name"]])))
+                {}------------------------".format(as_text(item["file_name"])))
         logger.info("-------------- item[label]: {} ----------------------------".format(item["label"]))
         num_iter += 1
 
@@ -127,7 +128,7 @@ def test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file):
     for item in data_set.create_dict_iterator():
         logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter))
         logger.info("-------------- item[file_name]: \
-                {}------------------------".format("".join([chr(x) for x in item["file_name"]])))
+                {}------------------------".format(as_text(item["file_name"])))
         logger.info("-------------- item[label]: {} ----------------------------".format(item["label"]))
         num_iter += 1
 
diff --git a/tests/ut/python/dataset/test_tensor_string.py b/tests/ut/python/dataset/test_tensor_string.py
index 7f905d61e5847932a16cac0ed42909bea0a22ee4..e5e2be865b13fcb4d1edb47a8b7a96dd76a7a91e 100644
--- a/tests/ut/python/dataset/test_tensor_string.py
+++ b/tests/ut/python/dataset/test_tensor_string.py
@@ -17,17 +17,15 @@ import numpy as np
 import pytest
 
 import mindspore.dataset as ds
+import mindspore.common.dtype as mstype
 
 
 # pylint: disable=comparison-with-itself
 def test_basic():
     x = np.array([["ab", "cde", "121"], ["x", "km", "789"]], dtype='S')
-    # x = np.array(["ab", "cde"], dtype='S')
     n = cde.Tensor(x)
     arr = n.as_array()
-    y = np.array([1, 2])
-    assert all(y == y)
-    # assert np.testing.assert_array_equal(y,y)
+    np.testing.assert_array_equal(x, arr)
 
 
 def compare(strings):
@@ -60,7 +58,125 @@ def test_batching_strings():
     assert "[Batch ERROR] Batch does not support" in str(info)
 
 
+def test_map():
+    def gen():
+        yield np.array(["ab cde 121"], dtype='S'),
+
+    data = ds.GeneratorDataset(gen, column_names=["col"])
+
+    def split(b):
+        splits = b.item().decode("utf8").split()
+        return np.array(splits, dtype='S')
+
+    data = data.map(input_columns=["col"], operations=split)
+    expected = np.array(["ab", "cde", "121"], dtype='S')
+    for d in data:
+        np.testing.assert_array_equal(d[0], expected)
+
+
+def as_str(arr):
+    def decode(s): return s.decode("utf8")
+
+    decode_v = np.vectorize(decode)
+    return decode_v(arr)
+
+
+line = np.array(["This is a text file.",
+                 "Be happy every day.",
+                 "Good luck to everyone."])
+
+words = np.array([["This", "text", "file", "a"],
+                  ["Be", "happy", "day", "b"],
+                  ["女", "", "everyone", "c"]])
+
+chinese = np.array(["今天天气太好了我们一起去外面玩吧",
+                    "男默女泪",
+                    "江州市长江大桥参加了长江大桥的通车仪式"])
+
+
+def test_tfrecord1():
+    s = ds.Schema()
+    s.add_column("line", "string", [])
+    s.add_column("words", "string", [-1])
+    s.add_column("chinese", "string", [])
+
+    data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema=s)
+
+    for i, d in enumerate(data.create_dict_iterator()):
+        assert d["line"].shape == line[i].shape
+        assert d["words"].shape == words[i].shape
+        assert d["chinese"].shape == chinese[i].shape
+        np.testing.assert_array_equal(line[i], as_str(d["line"]))
+        np.testing.assert_array_equal(words[i], as_str(d["words"]))
+        np.testing.assert_array_equal(chinese[i], as_str(d["chinese"]))
+
+
+def test_tfrecord2():
+    data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False,
+                              schema='../data/dataset/testTextTFRecord/datasetSchema.json')
+    for i, d in enumerate(data.create_dict_iterator()):
+        assert d["line"].shape == line[i].shape
+        assert d["words"].shape == words[i].shape
+        assert d["chinese"].shape == chinese[i].shape
+        np.testing.assert_array_equal(line[i], as_str(d["line"]))
+        np.testing.assert_array_equal(words[i], as_str(d["words"]))
+        np.testing.assert_array_equal(chinese[i], as_str(d["chinese"]))
+
+
+def test_tfrecord3():
+    s = ds.Schema()
+    s.add_column("line", mstype.string, [])
+    s.add_column("words", mstype.string, [-1, 2])
+    s.add_column("chinese", mstype.string, [])
+
+    data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema=s)
+
+    for i, d in enumerate(data.create_dict_iterator()):
+        assert d["line"].shape == line[i].shape
+        assert d["words"].shape == words[i].reshape([2, 2]).shape
+        assert d["chinese"].shape == chinese[i].shape
+        np.testing.assert_array_equal(line[i], as_str(d["line"]))
+        np.testing.assert_array_equal(words[i].reshape([2, 2]), as_str(d["words"]))
+        np.testing.assert_array_equal(chinese[i], as_str(d["chinese"]))
+
+
+def create_text_mindrecord():
+    # methood to create mindrecord with string data, used to generate testTextMindRecord/test.mindrecord
+    from mindspore.mindrecord import FileWriter
+
+    mindrecord_file_name = "test.mindrecord"
+    data = [{"english": "This is a text file.",
+             "chinese": "今天天气太好了我们一起去外面玩吧"},
+            {"english": "Be happy every day.",
+             "chinese": "男默女泪"},
+            {"english": "Good luck to everyone.",
+             "chinese": "江州市长江大桥参加了长江大桥的通车仪式"},
+            ]
+    writer = FileWriter(mindrecord_file_name)
+    schema = {"english": {"type": "string"},
+              "chinese": {"type": "string"},
+              }
+    writer.add_schema(schema)
+    writer.write_raw_data(data)
+    writer.commit()
+
+
+def test_mindrecord():
+    data = ds.MindDataset("../data/dataset/testTextMindRecord/test.mindrecord", shuffle=False)
+
+    for i, d in enumerate(data.create_dict_iterator()):
+        assert d["english"].shape == line[i].shape
+        assert d["chinese"].shape == chinese[i].shape
+        np.testing.assert_array_equal(line[i], as_str(d["english"]))
+        np.testing.assert_array_equal(chinese[i], as_str(d["chinese"]))
+
+
 if __name__ == '__main__':
-    test_generator()
-    test_basic()
-    test_batching_strings()
+    # test_generator()
+    # test_basic()
+    # test_batching_strings()
+    test_map()
+    # test_tfrecord1()
+    # test_tfrecord2()
+    # test_tfrecord3()
+    # test_mindrecord()