提交 abb903df 编写于 作者: A Asim Shankar 提交者: TensorFlower Gardener

Python 3.7 Compatibility: Py_UnicodeAsUTF8AndSize() returns a "const char*"

instead of a "char*".

(See https://docs.python.org/3/whatsnew/3.7.html#c-api-changes)

There are additional changes needed for Python 3.7 compatibility,
this change just pulls out one of them
(and subsumes a related attempt in #21202 and #20766)

Helps with #20517

PiperOrigin-RevId: 207008013
上级 72691c24
......@@ -136,6 +136,31 @@ Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array,
return Status::OK();
}
Status PyObjectToString(PyObject* obj, const char** ptr, Py_ssize_t* len) {
if (!PyUnicode_Check(obj)) {
char* buf;
if (PyBytes_AsStringAndSize(obj, &buf, len) != 0) {
return errors::Internal("Unable to get element as bytes.");
}
*ptr = buf;
return Status::OK();
}
#if (PY_MAJOR_VERSION > 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3))
*ptr = PyUnicode_AsUTF8AndSize(obj, len);
if (*ptr != nullptr) return Status::OK();
#else
PyObject* utemp = PyUnicode_AsUTF8String(obj);
char* buf;
if (utemp != nullptr && PyBytes_AsStringAndSize(utemp, &buf, len) != -1) {
*ptr = buf;
Py_DECREF(utemp);
return Status::OK();
}
Py_XDECREF(utemp);
#endif
return errors::Internal("Unable to convert element to UTF-8.");
}
// Iterate over the string array 'array', extract the ptr and len of each string
// element and call f(ptr, len).
template <typename F>
......@@ -148,33 +173,10 @@ Status PyBytesArrayMap(PyArrayObject* array, F f) {
if (!item) {
return errors::Internal("Unable to get element from the feed - no item.");
}
char* ptr;
Py_ssize_t len;
if (PyUnicode_Check(item.get())) {
#if PY_VERSION_HEX >= 0x03030000
// Accept unicode by converting to UTF-8 bytes.
ptr = PyUnicode_AsUTF8AndSize(item.get(), &len);
if (!ptr) {
return errors::Internal("Unable to get element as UTF-8.");
}
f(ptr, len);
#else
PyObject* utemp = PyUnicode_AsUTF8String(item.get());
if (!utemp || PyBytes_AsStringAndSize(utemp, &ptr, &len) == -1) {
Py_XDECREF(utemp);
return errors::Internal("Unable to convert element to UTF-8.");
}
f(ptr, len);
Py_DECREF(utemp);
#endif
} else {
int success = PyBytes_AsStringAndSize(item.get(), &ptr, &len);
if (success != 0) {
return errors::Internal("Unable to get element as bytes.");
}
f(ptr, len);
}
const char* ptr;
TF_RETURN_IF_ERROR(PyObjectToString(item.get(), &ptr, &len));
f(ptr, len);
PyArray_ITER_NEXT(iter.get());
}
return Status::OK();
......@@ -186,10 +188,11 @@ Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems,
size_t* size, void** buffer) {
// Compute bytes needed for encoding.
*size = 0;
TF_RETURN_IF_ERROR(PyBytesArrayMap(array, [&size](char* ptr, Py_ssize_t len) {
*size +=
sizeof(tensorflow::uint64) + tensorflow::core::VarintLength(len) + len;
}));
TF_RETURN_IF_ERROR(
PyBytesArrayMap(array, [&size](const char* ptr, Py_ssize_t len) {
*size += sizeof(tensorflow::uint64) +
tensorflow::core::VarintLength(len) + len;
}));
// Encode all strings.
std::unique_ptr<char[]> base_ptr(new char[*size]);
char* base = base_ptr.get();
......@@ -198,7 +201,7 @@ Status EncodePyBytesArray(PyArrayObject* array, tensorflow::int64 nelems,
tensorflow::uint64* offsets = reinterpret_cast<tensorflow::uint64*>(base);
TF_RETURN_IF_ERROR(PyBytesArrayMap(
array, [&base, &data_start, &dst, &offsets](char* ptr, Py_ssize_t len) {
array, [&data_start, &dst, &offsets](const char* ptr, Py_ssize_t len) {
*offsets = (dst - data_start);
offsets++;
dst = tensorflow::core::EncodeVarint64(dst, len);
......
......@@ -333,6 +333,35 @@ class NumpyTensorBuffer : public TensorBuffer {
void* data_;
};
Status PyObjectToString(PyObject* obj, string* str) {
char* py_bytes;
Py_ssize_t size;
if (PyBytes_AsStringAndSize(obj, &py_bytes, &size) != -1) {
str->assign(py_bytes, size);
return Status::OK();
}
#if PY_MAJOR_VERSION >= 3
const char* ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (ptr != nullptr) {
str->assign(ptr, size);
return Status::OK();
}
#else
if (PyUnicode_Check(obj)) {
PyObject* unicode = PyUnicode_AsUTF8String(obj);
char* ptr;
if (unicode && PyString_AsStringAndSize(unicode, &ptr, &size) != -1) {
str->assign(ptr, size);
Py_DECREF(unicode);
return Status::OK();
}
Py_XDECREF(unicode);
}
#endif
return errors::Unimplemented("Unsupported object type ",
obj->ob_type->tp_name);
}
Status ConvertNdarrayToTensor(PyObject* obj, Tensor* ret) {
PyArrayObject* input = reinterpret_cast<PyArrayObject*>(obj);
DataType dtype = DT_INVALID;
......@@ -348,29 +377,7 @@ Status ConvertNdarrayToTensor(PyObject* obj, Tensor* ret) {
auto tflat = t.flat<string>();
PyObject** input_data = reinterpret_cast<PyObject**>(PyArray_DATA(input));
for (int i = 0; i < tflat.dimension(0); ++i) {
char* el;
Py_ssize_t el_size;
if (PyBytes_AsStringAndSize(input_data[i], &el, &el_size) == -1) {
#if PY_MAJOR_VERSION >= 3
el = PyUnicode_AsUTF8AndSize(input_data[i], &el_size);
#else
el = nullptr;
if (PyUnicode_Check(input_data[i])) {
PyObject* unicode = PyUnicode_AsUTF8String(input_data[i]);
if (unicode) {
if (PyString_AsStringAndSize(unicode, &el, &el_size) == -1) {
Py_DECREF(unicode);
el = nullptr;
}
}
}
#endif
if (!el) {
return errors::Unimplemented("Unsupported object type ",
input_data[i]->ob_type->tp_name);
}
}
tflat(i) = string(el, el_size);
TF_RETURN_IF_ERROR(PyObjectToString(input_data[i], &tflat(i)));
}
*ret = t;
break;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册