Python 3.7 Compatibility: Py_UnicodeAsUTF8AndSize() returns a "const char*"

instead of a "char*". (See https://docs.python.org/3/whatsnew/3.7.html#c-api-changes) There are additional changes needed for Python 3.7 compatibility, this change just pulls out one of them (and subsumes a related attempt in #21202 and #20766) Helps with #20517 PiperOrigin-RevId: 207008013

Python 3.7 Compatibility: Py_UnicodeAsUTF8AndSize() returns a "const char*"
instead of a "char*". (See https://docs.python.org/3/whatsnew/3.7.html#c-api-changes) There are additional changes needed for Python 3.7 compatibility, this change just pulls out one of them (and subsumes a related attempt in #21202 and #20766) Helps with #20517 PiperOrigin-RevId: 207008013
abb903df · Asim Shankar · TensorFlower Gardener · 72691c24 · abb903df · abb903df
隐藏空白更改
内联并排

Showing with 64 addition and 54 deletion

tensorflow/python/lib/core/ndarray_tensor.cc tensorflow/python/lib/core/ndarray_tensor.cc +34 -31

tensorflow/python/lib/core/py_func.cc tensorflow/python/lib/core/py_func.cc +30 -23

未找到文件。
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -136,6 +136,31 @@ Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array,
  return Status::OK();
 }

+Status PyObjectToString(PyObject* obj, const char** ptr, Py_ssize_t* len) {
+  if (!PyUnicode_Check(obj)) {
+    char* buf;
+    if (PyBytes_AsStringAndSize(obj, &buf, len) != 0) {
+      return errors::Internal("Unable to get element as bytes.");
+    }
+    *ptr = buf;
+    return Status::OK();
+  }
+#if (PY_MAJOR_VERSION > 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3))
+  *ptr = PyUnicode_AsUTF8AndSize(obj, len);
+  if (*ptr != nullptr) return Status::OK();
+#else
+  PyObject* utemp = PyUnicode_AsUTF8String(obj);
+  char* buf;
+  if (utemp != nullptr && PyBytes_AsStringAndSize(utemp, &buf, len) != -1) {
+    *ptr = buf;
+    Py_DECREF(utemp);
+    return Status::OK();
+  }
+  Py_XDECREF(utemp);
+#endif
+  return errors::Internal("Unable to convert element to UTF-8.");
+}
+
 // Iterate over the string array 'array', extract the ptr and len of each string
 // element and call f(ptr, len).
 template <typename F>
@@ -148,33 +173,10 @@ Status PyBytesArrayMap(PyArrayObject* array, F f) {
    if (!item) {
      return errors::Internal("Unable to get element from the feed - no item.");
    }
-    char* ptr;
    Py_ssize_t len;
-
-    if (PyUnicode_Check(item.get())) {
-#if PY_VERSION_HEX >= 0x03030000
-      // Accept unicode by converting to UTF-8 bytes.
-      ptr = PyUnicode_AsUTF8AndSize(item.get(), &len);
-      if (!ptr) {
-        return errors::Internal("Unable to get element as UTF-8.");
-      }
-      f(ptr, len);
-#else
-      PyObject* utemp = PyUnicode_AsUTF8String(item.get());
-      if (!utemp || PyBytes_AsStringAndSize(utemp, &ptr, &len) == -1) {
-        Py_XDECREF(utemp);
-        return errors::Internal("Unable to convert element to UTF-8.");
-      }
-      f(ptr, len);
-      Py_DECREF(utemp);
-#endif
-    } else {
-      int success = PyBytes_AsStringAndSize(item.get(), &ptr, &len);
-      if (success != 0) {
-        return errors::Internal("Unable to get element as bytes.");
-      }
-      f(ptr, len);
-    }
+    const char* ptr;
+    TF_RETURN_IF_ERROR(PyObjectToString(item.get(), &ptr, &len));
+    f(ptr, len);
    PyArray_ITER_NEXT(iter.get());
  }
  return Status::OK();
@@ -186,10 +188,11 @@ Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems,
                          size_t* size, void** buffer) {
  // Compute bytes needed for encoding.
  *size = 0;
-  TF_RETURN_IF_ERROR(PyBytesArrayMap(array, [&size](char* ptr, Py_ssize_t len) {
-    *size +=
-        sizeof(tensorflow::uint64) + tensorflow::core::VarintLength(len) + len;
-  }));
+  TF_RETURN_IF_ERROR(
+      PyBytesArrayMap(array, [&size](const char* ptr, Py_ssize_t len) {
+        *size += sizeof(tensorflow::uint64) +
+                 tensorflow::core::VarintLength(len) + len;
+      }));
  // Encode all strings.
  std::unique_ptr<char[]> base_ptr(new char[*size]);
  char* base = base_ptr.get();
@@ -198,7 +201,7 @@ Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems,
  tensorflow::uint64* offsets = reinterpret_cast<tensorflow::uint64*>(base);

  TF_RETURN_IF_ERROR(PyBytesArrayMap(
-      array, [&base, &data_start, &dst, &offsets](char* ptr, Py_ssize_t len) {
+      array, [&data_start, &dst, &offsets](const char* ptr, Py_ssize_t len) {
        *offsets = (dst - data_start);
        offsets++;
        dst = tensorflow::core::EncodeVarint64(dst, len);

--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -333,6 +333,35 @@ class NumpyTensorBuffer : public TensorBuffer {
  void* data_;
 };

+Status PyObjectToString(PyObject* obj, string* str) {
+  char* py_bytes;
+  Py_ssize_t size;
+  if (PyBytes_AsStringAndSize(obj, &py_bytes, &size) != -1) {
+    str->assign(py_bytes, size);
+    return Status::OK();
+  }
+#if PY_MAJOR_VERSION >= 3
+  const char* ptr = PyUnicode_AsUTF8AndSize(obj, &size);
+  if (ptr != nullptr) {
+    str->assign(ptr, size);
+    return Status::OK();
+  }
+#else
+  if (PyUnicode_Check(obj)) {
+    PyObject* unicode = PyUnicode_AsUTF8String(obj);
+    char* ptr;
+    if (unicode && PyString_AsStringAndSize(unicode, &ptr, &size) != -1) {
+      str->assign(ptr, size);
+      Py_DECREF(unicode);
+      return Status::OK();
+    }
+    Py_XDECREF(unicode);
+  }
+#endif
+  return errors::Unimplemented("Unsupported object type ",
+                               obj->ob_type->tp_name);
+}
+
 Status ConvertNdarrayToTensor(PyObject* obj, Tensor* ret) {
  PyArrayObject* input = reinterpret_cast<PyArrayObject*>(obj);
  DataType dtype = DT_INVALID;
@@ -348,29 +377,7 @@ Status ConvertNdarrayToTensor(PyObject* obj, Tensor* ret) {
      auto tflat = t.flat<string>();
      PyObject** input_data = reinterpret_cast<PyObject**>(PyArray_DATA(input));
      for (int i = 0; i < tflat.dimension(0); ++i) {
-        char* el;
-        Py_ssize_t el_size;
-        if (PyBytes_AsStringAndSize(input_data[i], &el, &el_size) == -1) {
-#if PY_MAJOR_VERSION >= 3
-          el = PyUnicode_AsUTF8AndSize(input_data[i], &el_size);
-#else
-          el = nullptr;
-          if (PyUnicode_Check(input_data[i])) {
-            PyObject* unicode = PyUnicode_AsUTF8String(input_data[i]);
-            if (unicode) {
-              if (PyString_AsStringAndSize(unicode, &el, &el_size) == -1) {
-                Py_DECREF(unicode);
-                el = nullptr;
-              }
-            }
-          }
-#endif
-          if (!el) {
-            return errors::Unimplemented("Unsupported object type ",
-                                         input_data[i]->ob_type->tp_name);
-          }
-        }
-        tflat(i) = string(el, el_size);
+        TF_RETURN_IF_ERROR(PyObjectToString(input_data[i], &tflat(i)));
      }
      *ret = t;
      break;