未验证 提交 b189e83f 编写于 作者: C Chen Weihang 提交者: GitHub

[Eager] Adapt faster tokenizer op (#42718)

* adapt faster tokenizer op

* add eager test

* add unittest
上级 353ede5a
...@@ -21,24 +21,176 @@ ...@@ -21,24 +21,176 @@
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h" #include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/compat/convert_utils.h"
namespace egr {
/**
* VariableCompatTensor class is used by Eager mode for now. It's painful to
* do this in Eager Mode, the better choice is to design the special Tensor
* directly in phi and use it in paddle::experimental::Tensor.
* However, we have some special operators, and they use special input variable
* type, such as vector<string>, unordered_map<wstring, int>, these type cannot
* cover by DenseTensor or SparseTensor. So, we have to provide a compatible
* Tensor type like variable to support these special input type. We should
* remove this as soon as we finish the ResourceTensor in phi.
*
* Note: Keep this class as clean as possible.
* This class should only support method declared in framework::Variable and
* necessary overridden methods.
*
* Note: This class is only used to support types that cannot be supported by
* the phi Tensor system temporarily. You CANNOT use this class to handle types
* such as DenseTensor, SelectedRows, etc.
**/
class VariableCompatTensor
: public phi::TensorBase,
public phi::TypeInfoTraits<phi::TensorBase, VariableCompatTensor> {
public:
template <typename T>
const T& Get() const {
static_assert(
paddle::framework::IsRegisteredVarType<T>(),
"Not registered type. Please register T inside var_type_traits.h");
PADDLE_ENFORCE_NOT_NULL(holder_, paddle::platform::errors::NotFound(
"Variable is not initialized."));
PADDLE_ENFORCE_EQ(
holder_->Type(), paddle::framework::VarTypeTrait<T>::kId,
paddle::platform::errors::InvalidArgument(
"The Variable type must be %s, but the type it holds is %s.",
paddle::framework::ToTypeName(
paddle::framework::VarTypeTrait<T>::kId),
paddle::framework::ToTypeName(holder_->Type())));
return *static_cast<const T*>(holder_->Ptr());
}
bool IsInitialized() const { return holder_ != nullptr; }
template <typename T>
T* GetMutable() {
if (!holder_) {
holder_.reset(new PlaceholderImpl<T>());
} else {
PADDLE_ENFORCE_EQ(
holder_->Type(), paddle::framework::VarTypeTrait<T>::kId,
paddle::platform::errors::InvalidArgument(
"The Variable type must be %s, but the type it holds is %s.",
paddle::framework::ToTypeName(
paddle::framework::VarTypeTrait<T>::kId),
paddle::framework::ToTypeName(holder_->Type())));
}
return static_cast<T*>(holder_->Ptr());
}
template <typename T>
bool IsType() const {
return holder_ &&
holder_->Type() == paddle::framework::VarTypeTrait<T>::kId;
}
void Clear() { holder_.reset(); }
int Type() const {
PADDLE_ENFORCE_NOT_NULL(holder_, paddle::platform::errors::NotFound(
"Variable is not initialized."));
return holder_->Type();
}
// necessary overridden methods
static const char* name() { return "VariableCompatTensor"; }
~VariableCompatTensor() override = default;
int64_t numel() const override {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor does not support `numel` method."));
}
const phi::DDim& dims() const override {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor does not support `dims` method."));
}
phi::DataType dtype() const override {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor does not support `dtype` method."));
}
phi::DataLayout layout() const override {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor does not support `layout` method."));
}
const phi::Place& place() const override {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor does not support `place` method."));
}
bool valid() const override { return IsInitialized(); }
bool initialized() const override { return IsInitialized(); }
void* AllocateFrom(phi::Allocator* allocator, phi::DataType dtype,
size_t requested_size = 0) override {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor does not support `AllocateFrom` method."));
}
private:
struct Placeholder {
virtual ~Placeholder() PADDLE_MAY_THROW {}
inline int Type() const { return type_; }
inline const void* Ptr() const { return ptr_; }
inline void* Ptr() { return ptr_; }
protected:
inline void Init(void* p, int type) {
ptr_ = p;
type_ = type;
}
void* ptr_;
int type_;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template <typename T>
struct PlaceholderImpl : public Placeholder {
static_assert(
paddle::framework::IsRegisteredVarType<T>(),
"Not registered type. Please register T inside var_type_traits.h");
PlaceholderImpl() {
this->Init(&obj_, paddle::framework::VarTypeTrait<T>::kId);
}
private:
T obj_;
};
// pointers to a PlaceholderImpl object indeed.
std::shared_ptr<Placeholder> holder_;
};
inline bool IsVariableCompatTensor(const paddle::experimental::Tensor& tensor) {
return VariableCompatTensor::classof(tensor.impl().get());
}
/** /**
* This class is used by Eager mode for now. It's painful to do this in Eager * This class is used by Eager mode for now. It's painful to do this in Eager
* Mode, the better * Mode, the better choice is to use paddle::experimental::Tensor directly.
* choice is to use paddle::experimental::Tensor directly. However, we have a * However, we have a punch of nested kernel code, and they use
* punch of nested kernel code, and * paddle::framework::Variable in inner logic code. So, we have to provide
* they use paddle::framework::Variable in inner logic code. So, we have to * variable in paddle::framework::ExecutionContext to support it. We should
* provide variable in * remove this as soon as we finish our latest Phi Lib, and use
* paddle::framework::ExecutionContext to support it. We should remove this as * paddle::experimental::Tensor instead.
* soon as we finish our latest
* Phi Lib, and use paddle::experimental::Tensor instead.
* *
* Note: Keep this class as clean as possible. * Note: Keep this class as clean as possible.
* This class should only support method declared in * This class should only support method declared in
* paddle::experimental::Tensor with access method of * paddle::experimental::Tensor with access method of
* paddle::framework::Variable no more members are acceptable. * paddle::framework::Variable no more members are acceptable.
* **/ * **/
namespace egr {
class EagerVariable final { class EagerVariable final {
public: public:
/* Default constructor and name constructor should only be used for contruct /* Default constructor and name constructor should only be used for contruct
...@@ -54,6 +206,14 @@ class EagerVariable final { ...@@ -54,6 +206,14 @@ class EagerVariable final {
ConstructVariableFromTensor<phi::DenseTensor>(tensor); ConstructVariableFromTensor<phi::DenseTensor>(tensor);
} else if (tensor.is_selected_rows()) { } else if (tensor.is_selected_rows()) {
ConstructVariableFromTensor<phi::SelectedRows>(tensor); ConstructVariableFromTensor<phi::SelectedRows>(tensor);
} else if (IsVariableCompatTensor(tensor) &&
static_cast<const VariableCompatTensor*>(tensor.impl().get())
->IsType<paddle::framework::Vocab>()) {
ConstructVariableFromCompatTensor<paddle::framework::Vocab>(tensor);
} else if (IsVariableCompatTensor(tensor) &&
static_cast<const VariableCompatTensor*>(tensor.impl().get())
->IsType<paddle::framework::Strings>()) {
ConstructVariableFromCompatTensor<paddle::framework::Strings>(tensor);
} else { } else {
PADDLE_THROW(paddle::platform::errors::Fatal( PADDLE_THROW(paddle::platform::errors::Fatal(
"Unrecognized egr::EagerVariable type, only " "Unrecognized egr::EagerVariable type, only "
...@@ -119,6 +279,22 @@ class EagerVariable final { ...@@ -119,6 +279,22 @@ class EagerVariable final {
*framework_tensor = *tensor_dense; *framework_tensor = *tensor_dense;
} }
template <typename VarType>
void ConstructVariableFromCompatTensor(
const paddle::experimental::Tensor& tensor) {
auto* framework_holder = var_.GetMutable<VarType>();
// Contruct framework::Tensor from egr::EagerVariable
auto* compat_tensor =
static_cast<VariableCompatTensor*>(tensor.impl().get());
PADDLE_ENFORCE_NOT_NULL(compat_tensor,
paddle::platform::errors::Fatal(
"Tensor %s holds empty impl, this should not "
"happend since we should "
"treat all kinds of tensor as what they are.",
tensor.name()));
*framework_holder = compat_tensor->Get<VarType>();
}
private: private:
std::string name_{""}; std::string name_{""};
paddle::framework::Variable var_; paddle::framework::Variable var_;
......
...@@ -233,3 +233,88 @@ TEST(EagerVariable, DataLayout) { ...@@ -233,3 +233,88 @@ TEST(EagerVariable, DataLayout) {
layout = paddle::imperative::GetDataLayout(eager_var); layout = paddle::imperative::GetDataLayout(eager_var);
CHECK_EQ(layout, paddle::experimental::DataLayout::NCHW); CHECK_EQ(layout, paddle::experimental::DataLayout::NCHW);
} }
TEST(VariableCompatTensor, MemberFunction) {
egr::VariableCompatTensor var_tensor;
// test GetMutable and Get
var_tensor.GetMutable<paddle::framework::Vocab>();
auto& vocab = var_tensor.Get<paddle::framework::Vocab>();
EXPECT_EQ(vocab.size(), 0UL);
bool caught_exception = false;
try {
var_tensor.GetMutable<paddle::framework::Strings>();
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_TRUE(ex_msg.find("The Variable type must be") != std::string::npos);
}
EXPECT_TRUE(caught_exception);
// test Type and IsType
EXPECT_TRUE(var_tensor.IsType<paddle::framework::Vocab>());
EXPECT_EQ(var_tensor.Type(),
static_cast<int>(paddle::framework::proto::VarType::VOCAB));
// test valid and initialized
EXPECT_TRUE(var_tensor.IsInitialized());
EXPECT_TRUE(var_tensor.valid());
EXPECT_TRUE(var_tensor.initialized());
// test name
EXPECT_EQ(var_tensor.name(), "VariableCompatTensor");
// test other throw error methods
caught_exception = false;
try {
var_tensor.numel();
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_TRUE(ex_msg.find("numel") != std::string::npos);
}
EXPECT_TRUE(caught_exception);
caught_exception = false;
try {
var_tensor.dims();
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_TRUE(ex_msg.find("dims") != std::string::npos);
}
EXPECT_TRUE(caught_exception);
caught_exception = false;
try {
var_tensor.dtype();
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_TRUE(ex_msg.find("dtype") != std::string::npos);
}
EXPECT_TRUE(caught_exception);
caught_exception = false;
try {
var_tensor.layout();
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_TRUE(ex_msg.find("layout") != std::string::npos);
}
EXPECT_TRUE(caught_exception);
caught_exception = false;
try {
var_tensor.place();
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_TRUE(ex_msg.find("place") != std::string::npos);
}
EXPECT_TRUE(caught_exception);
caught_exception = false;
try {
var_tensor.AllocateFrom(nullptr, phi::DataType::UNDEFINED);
} catch (paddle::platform::EnforceNotMet& error) {
caught_exception = true;
std::string ex_msg = error.what();
EXPECT_TRUE(ex_msg.find("AllocateFrom") != std::string::npos);
}
EXPECT_TRUE(caught_exception);
// test Clear
var_tensor.Clear();
EXPECT_FALSE(var_tensor.IsInitialized());
}
...@@ -18,6 +18,7 @@ typedef SSIZE_T ssize_t; ...@@ -18,6 +18,7 @@ typedef SSIZE_T ssize_t;
#include <Python.h> #include <Python.h>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "pybind11/numpy.h" #include "pybind11/numpy.h"
...@@ -675,7 +676,9 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self, ...@@ -675,7 +676,9 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
PyObject* kwargs) { PyObject* kwargs) {
EAGER_TRY EAGER_TRY
if (!self->tensor.defined()) { if (!self->tensor.defined()) {
RETURN_PY_NONE // The original `get_tensor` method of Variable will create a empty tensor
phi::DenseTensor empty_tensor;
return ToPyObject(&empty_tensor);
} }
if (self->tensor.is_dense_tensor()) { if (self->tensor.is_dense_tensor()) {
auto* tensor = auto* tensor =
...@@ -1275,6 +1278,47 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self, PyObject* args, ...@@ -1275,6 +1278,47 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
static PyObject* tensor_method_set_vocab(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
using Vocab = std::unordered_map<std::wstring, int>;
auto vocab = CastPyArg2Vocab(PyTuple_GET_ITEM(args, 0), 0);
auto var_tensor = std::make_shared<egr::VariableCompatTensor>();
*var_tensor->GetMutable<Vocab>() = vocab;
self->tensor.set_impl(var_tensor);
RETURN_PY_NONE
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_method_set_string_list(TensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_TRY
using Strings = std::vector<std::string>;
auto strings = CastPyArg2Strings(PyTuple_GET_ITEM(args, 0), 0);
auto var_tensor = std::make_shared<egr::VariableCompatTensor>();
*var_tensor->GetMutable<Strings>() = strings;
self->tensor.set_impl(var_tensor);
RETURN_PY_NONE
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_method_get_map_tensor(TensorObject* self,
PyObject* args,
PyObject* kwargs) {
EAGER_TRY
PADDLE_ENFORCE_EQ(
egr::IsVariableCompatTensor(self->tensor), true,
paddle::platform::errors::Fatal(
"this method is only effective for VariableCompatTensor"));
using Vocab = std::unordered_map<std::wstring, int>;
auto* var_tensor =
static_cast<const egr::VariableCompatTensor*>(self->tensor.impl().get());
return ToPyObject(var_tensor->Get<Vocab>());
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor_method_get_non_zero_indices(TensorObject* self, static PyObject* tensor_method_get_non_zero_indices(TensorObject* self,
PyObject* args, PyObject* args,
PyObject* kwargs) { PyObject* kwargs) {
...@@ -1655,6 +1699,15 @@ PyMethodDef variable_methods[] = { ...@@ -1655,6 +1699,15 @@ PyMethodDef variable_methods[] = {
{"_copy_gradient_from", {"_copy_gradient_from",
(PyCFunction)(void (*)(void))tensor__copy_gradient_from, (PyCFunction)(void (*)(void))tensor__copy_gradient_from,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
/** the methods to adapt old dygraph, will be removed in the future **/
{"set_string_list",
(PyCFunction)(void (*)(void))tensor_method_set_string_list,
METH_VARARGS | METH_KEYWORDS, NULL},
{"set_vocab", (PyCFunction)(void (*)(void))tensor_method_set_vocab,
METH_VARARGS | METH_KEYWORDS, NULL},
{"get_map_tensor",
(PyCFunction)(void (*)(void))tensor_method_get_map_tensor,
METH_VARARGS | METH_KEYWORDS, NULL},
/***the method of sparse tensor****/ /***the method of sparse tensor****/
{"indices", (PyCFunction)(void (*)(void))tensor_method_get_non_zero_indices, {"indices", (PyCFunction)(void (*)(void))tensor_method_get_non_zero_indices,
METH_VARARGS | METH_KEYWORDS, NULL}, METH_VARARGS | METH_KEYWORDS, NULL},
......
...@@ -58,6 +58,10 @@ PyObject* tensor_properties_get_type(TensorObject* self, void* closure) { ...@@ -58,6 +58,10 @@ PyObject* tensor_properties_get_type(TensorObject* self, void* closure) {
return ToPyObject(paddle::framework::proto::VarType::LOD_TENSOR); return ToPyObject(paddle::framework::proto::VarType::LOD_TENSOR);
} else if (self->tensor.is_selected_rows()) { } else if (self->tensor.is_selected_rows()) {
return ToPyObject(paddle::framework::proto::VarType::SELECTED_ROWS); return ToPyObject(paddle::framework::proto::VarType::SELECTED_ROWS);
} else if (egr::IsVariableCompatTensor(self->tensor)) {
return ToPyObject(static_cast<paddle::framework::proto::VarType::Type>(
static_cast<const egr::VariableCompatTensor*>(self->tensor.impl().get())
->Type()));
} else { } else {
RETURN_PY_NONE RETURN_PY_NONE
} }
...@@ -152,12 +156,28 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { ...@@ -152,12 +156,28 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
if (!self->tensor.defined()) { if (!self->tensor.defined()) {
return ToPyObject(value); return ToPyObject(value);
} }
if (egr::IsVariableCompatTensor(self->tensor)) {
auto* var_tensor = static_cast<const egr::VariableCompatTensor*>(
self->tensor.impl().get());
if (var_tensor->IsType<paddle::framework::Vocab>()) {
value.emplace_back(static_cast<int64_t>(
var_tensor->Get<paddle::framework::Vocab>().size()));
} else if (var_tensor->IsType<paddle::framework::Strings>()) {
value.emplace_back(static_cast<int64_t>(
var_tensor->Get<paddle::framework::Strings>().size()));
} else {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor only support get shape from Vocab or "
"Strings."));
}
} else {
auto ddim = self->tensor.shape(); auto ddim = self->tensor.shape();
size_t rank = static_cast<size_t>(ddim.size()); size_t rank = static_cast<size_t>(ddim.size());
value.resize(rank); value.resize(rank);
for (size_t i = 0; i < rank; i++) { for (size_t i = 0; i < rank; i++) {
value[i] = ddim[i]; value[i] = ddim[i];
} }
}
return ToPyObject(value); return ToPyObject(value);
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
...@@ -183,8 +203,22 @@ PyObject* tensor_properties_get_dtype(TensorObject* self, void* closure) { ...@@ -183,8 +203,22 @@ PyObject* tensor_properties_get_dtype(TensorObject* self, void* closure) {
// be same to old dygraph // be same to old dygraph
return ToPyObject(framework::proto::VarType::FP32); return ToPyObject(framework::proto::VarType::FP32);
} }
if (egr::IsVariableCompatTensor(self->tensor)) {
auto* var_tensor = static_cast<const egr::VariableCompatTensor*>(
self->tensor.impl().get());
if (var_tensor->IsType<paddle::framework::Vocab>()) {
return ToPyObject(framework::proto::VarType::RAW);
} else if (var_tensor->IsType<paddle::framework::Strings>()) {
return ToPyObject(framework::proto::VarType::STRING);
} else {
PADDLE_THROW(paddle::platform::errors::Unavailable(
"VariableCompatTensor only support get shape from Vocab or "
"Strings."));
}
} else {
return ToPyObject( return ToPyObject(
paddle::framework::TransToProtoVarType(self->tensor.type())); paddle::framework::TransToProtoVarType(self->tensor.type()));
}
EAGER_CATCH_AND_THROW_RETURN_NULL EAGER_CATCH_AND_THROW_RETURN_NULL
} }
......
...@@ -472,6 +472,28 @@ paddle::framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj, ...@@ -472,6 +472,28 @@ paddle::framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
return dtype; return dtype;
} }
std::unordered_map<std::wstring, int> CastPyArg2Vocab(PyObject* obj,
ssize_t arg_pos) {
if (PyDict_Check(obj)) {
return ::pybind11::handle(obj)
.cast<std::unordered_map<std::wstring, int>>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be dict, but got %s", arg_pos + 1,
reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
}
}
std::vector<std::string> CastPyArg2Strings(PyObject* obj, ssize_t arg_pos) {
if (PyList_Check(obj)) {
return ::pybind11::handle(obj).cast<std::vector<std::string>>();
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"argument (position %d) must be list, but got %s", arg_pos + 1,
reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
}
}
paddle::CustomOpKernelContext CastPyArg2CustomOpKernelContext(PyObject* obj, paddle::CustomOpKernelContext CastPyArg2CustomOpKernelContext(PyObject* obj,
ssize_t arg_pos) { ssize_t arg_pos) {
if (PyObject_IsInstance( if (PyObject_IsInstance(
...@@ -719,6 +741,28 @@ PyObject* ToPyObject( ...@@ -719,6 +741,28 @@ PyObject* ToPyObject(
return dict; return dict;
} }
PyObject* ToPyObject(const std::unordered_map<std::wstring, int>& value) {
PyObject* dict = PyDict_New();
for (const auto map_iter : value) {
// Convert Key
PyObject* key_string =
PyUnicode_FromWideChar(map_iter.first.c_str(), map_iter.first.size());
if (!key_string) {
PADDLE_THROW(platform::errors::Fatal(
"Unable to convert std::wstring to PyObject"));
}
// Convert Val
PyObject* py_int = PyLong_FromLong(map_iter.second);
if (PyDict_SetItem(dict, key_string, py_int) != 0) {
PADDLE_THROW(
platform::errors::Fatal("Unable to set key:value for py_dict"));
}
}
return dict;
}
// For Final State Dygraph, // For Final State Dygraph,
// We directly use paddle::optional(Tensor) as dispensable Tensor // We directly use paddle::optional(Tensor) as dispensable Tensor
paddle::optional<const paddle::experimental::Tensor&> GetOptionalTensorFromArgs( paddle::optional<const paddle::experimental::Tensor&> GetOptionalTensorFromArgs(
......
...@@ -65,6 +65,9 @@ std::vector<std::vector<size_t>> CastPyArg2VectorOfVectorOfSize_t( ...@@ -65,6 +65,9 @@ std::vector<std::vector<size_t>> CastPyArg2VectorOfVectorOfSize_t(
PyObject* obj, size_t arg_pos); PyObject* obj, size_t arg_pos);
framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj, framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
ssize_t arg_pos); ssize_t arg_pos);
std::unordered_map<std::wstring, int> CastPyArg2Vocab(PyObject* obj,
ssize_t arg_pos);
std::vector<std::string> CastPyArg2Strings(PyObject* obj, ssize_t arg_pos);
PyObject* ToPyObject(int value); PyObject* ToPyObject(int value);
PyObject* ToPyObject(uint32_t value); PyObject* ToPyObject(uint32_t value);
...@@ -96,6 +99,7 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType& type); ...@@ -96,6 +99,7 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType& type);
PyObject* ToPyObject(const void* value); PyObject* ToPyObject(const void* value);
PyObject* ToPyObject( PyObject* ToPyObject(
const std::unordered_map<std::string, std::vector<std::string>>& value); const std::unordered_map<std::string, std::vector<std::string>>& value);
PyObject* ToPyObject(const std::unordered_map<std::wstring, int>& value);
template <typename Tuple, size_t N> template <typename Tuple, size_t N>
struct TupleTensorResult { struct TupleTensorResult {
......
...@@ -394,8 +394,8 @@ uint32_t Tensor::current_inplace_version() { ...@@ -394,8 +394,8 @@ uint32_t Tensor::current_inplace_version() {
static_cast<phi::DenseTensor *>(impl_.get())->InplaceVersionCounter(); static_cast<phi::DenseTensor *>(impl_.get())->InplaceVersionCounter();
return inplace_version_counter.CurrentVersion(); return inplace_version_counter.CurrentVersion();
} else { } else {
PADDLE_THROW(phi::errors::Unimplemented( LOG_FIRST_N(WARNING, 1)
"current_inplace_version is only supported on DenseTensor now.")); << "current_inplace_version is only supported on DenseTensor now.";
} }
return 0; return 0;
} }
......
...@@ -22,8 +22,7 @@ import numpy as np ...@@ -22,8 +22,7 @@ import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
from paddle.dataset.common import DATA_HOME from paddle.dataset.common import DATA_HOME
from paddle.fluid.framework import core, _non_static_mode, _enable_legacy_dygraph from paddle.fluid.framework import core, _non_static_mode, _test_eager_guard
_enable_legacy_dygraph()
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle import _C_ops from paddle import _C_ops
...@@ -151,13 +150,12 @@ class Predictor(object): ...@@ -151,13 +150,12 @@ class Predictor(object):
class TestBertTokenizerOp(unittest.TestCase): class TestBertTokenizerOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.bert_tokenizer = BertTokenizer.from_pretrained("bert-base-chinese") self.bert_tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
self.faster_tokenizer = FasterTokenizer(self.bert_tokenizer.vocab)
self.init_data()
self.save_path = os.path.join(DATA_HOME, "fast_tokenizer") self.save_path = os.path.join(DATA_HOME, "fast_tokenizer")
self.param_path = os.path.join(self.save_path, "model.pdparams") self.param_path = os.path.join(self.save_path, "model.pdparams")
self.inference_path = os.path.join(self.save_path, "inference") self.inference_path = os.path.join(self.save_path, "inference")
def init_data(self): def init_data(self):
self.faster_tokenizer = FasterTokenizer(self.bert_tokenizer.vocab)
self.text = [ self.text = [
'选择珠江花园的原因就是方便,有电动扶梯直接到达海边,周围餐馆、食廊、商场、超市、摊位一应俱全。' '选择珠江花园的原因就是方便,有电动扶梯直接到达海边,周围餐馆、食廊、商场、超市、摊位一应俱全。'
'酒店装修一般,但还算整洁。 泳池在大堂的屋顶,因此很小,不过女儿倒是喜欢。 包的早餐是西式的,' '酒店装修一般,但还算整洁。 泳池在大堂的屋顶,因此很小,不过女儿倒是喜欢。 包的早餐是西式的,'
...@@ -179,8 +177,8 @@ class TestBertTokenizerOp(unittest.TestCase): ...@@ -179,8 +177,8 @@ class TestBertTokenizerOp(unittest.TestCase):
self.texts_tensor = to_string_tensor(self.texts, "texts") self.texts_tensor = to_string_tensor(self.texts, "texts")
self.text_pairs_tensor = to_string_tensor(self.text_pairs, "text_pairs") self.text_pairs_tensor = to_string_tensor(self.text_pairs, "text_pairs")
def test_padding(self): def run_padding(self):
self.init_data()
self.max_seq_len = 128 self.max_seq_len = 128
self.pad_to_max_seq_len = True self.pad_to_max_seq_len = True
self.is_split_into_words = False self.is_split_into_words = False
...@@ -283,7 +281,13 @@ class TestBertTokenizerOp(unittest.TestCase): ...@@ -283,7 +281,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np.allclose( np.allclose(
token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) token_type_ids, py_token_type_ids, rtol=0, atol=0.01))
def test_no_padding(self): def test_padding(self):
with _test_eager_guard():
self.run_padding()
self.run_padding()
def run_no_padding(self):
self.init_data()
self.max_seq_len = 128 self.max_seq_len = 128
self.pad_to_max_seq_len = False self.pad_to_max_seq_len = False
self.is_split_into_words = False self.is_split_into_words = False
...@@ -336,7 +340,13 @@ class TestBertTokenizerOp(unittest.TestCase): ...@@ -336,7 +340,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np.allclose( np.allclose(
token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) token_type_ids, py_token_type_ids, rtol=0, atol=0.01))
def test_is_split_into_words(self): def test_no_padding(self):
with _test_eager_guard():
self.run_no_padding()
self.run_no_padding()
def run_is_split_into_words(self):
self.init_data()
self.is_split_into_words = True self.is_split_into_words = True
input_ids, token_type_ids = self.faster_tokenizer( input_ids, token_type_ids = self.faster_tokenizer(
...@@ -355,7 +365,13 @@ class TestBertTokenizerOp(unittest.TestCase): ...@@ -355,7 +365,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np.allclose( np.allclose(
token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) token_type_ids, py_token_type_ids, rtol=0, atol=0.01))
def test_is_split_into_words(self):
with _test_eager_guard():
self.run_is_split_into_words()
self.run_is_split_into_words()
def test_inference(self): def test_inference(self):
self.init_data()
if not os.path.exists(self.save_path): if not os.path.exists(self.save_path):
os.makedirs(self.save_path, exist_ok=True) os.makedirs(self.save_path, exist_ok=True)
paddle.save(self.faster_tokenizer.state_dict(), self.param_path) paddle.save(self.faster_tokenizer.state_dict(), self.param_path)
...@@ -383,6 +399,7 @@ class TestBertTokenizerOp(unittest.TestCase): ...@@ -383,6 +399,7 @@ class TestBertTokenizerOp(unittest.TestCase):
token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) token_type_ids, py_token_type_ids, rtol=0, atol=0.01))
def test_feed_string_var(self): def test_feed_string_var(self):
self.init_data()
paddle.enable_static() paddle.enable_static()
x = paddle.static.data( x = paddle.static.data(
name="x", shape=[-1], dtype=core.VarDesc.VarType.STRINGS) name="x", shape=[-1], dtype=core.VarDesc.VarType.STRINGS)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册