Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b189e83f
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b189e83f
编写于
5月 17, 2022
作者:
C
Chen Weihang
提交者:
GitHub
5月 17, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Eager] Adapt faster tokenizer op (#42718)
* adapt faster tokenizer op * add eager test * add unittest
上级
353ede5a
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
441 addition
and
28 deletion
+441
-28
paddle/fluid/eager/eager_tensor.h
paddle/fluid/eager/eager_tensor.h
+186
-10
paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
...uid/eager/tests/data_structure_tests/eager_tensor_test.cc
+85
-0
paddle/fluid/pybind/eager_method.cc
paddle/fluid/pybind/eager_method.cc
+54
-1
paddle/fluid/pybind/eager_properties.cc
paddle/fluid/pybind/eager_properties.cc
+41
-7
paddle/fluid/pybind/eager_utils.cc
paddle/fluid/pybind/eager_utils.cc
+44
-0
paddle/fluid/pybind/eager_utils.h
paddle/fluid/pybind/eager_utils.h
+4
-0
paddle/phi/api/lib/tensor.cc
paddle/phi/api/lib/tensor.cc
+2
-2
python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
.../paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
+25
-8
未找到文件。
paddle/fluid/eager/eager_tensor.h
浏览文件 @
b189e83f
...
...
@@ -21,24 +21,176 @@
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/core/compat/convert_utils.h"
namespace
egr
{
/**
* VariableCompatTensor class is used by Eager mode for now. It's painful to
* do this in Eager Mode, the better choice is to design the special Tensor
* directly in phi and use it in paddle::experimental::Tensor.
* However, we have some special operators, and they use special input variable
* type, such as vector<string>, unordered_map<wstring, int>, these type cannot
* cover by DenseTensor or SparseTensor. So, we have to provide a compatible
* Tensor type like variable to support these special input type. We should
* remove this as soon as we finish the ResourceTensor in phi.
*
* Note: Keep this class as clean as possible.
* This class should only support method declared in framework::Variable and
* necessary overridden methods.
*
* Note: This class is only used to support types that cannot be supported by
* the phi Tensor system temporarily. You CANNOT use this class to handle types
* such as DenseTensor, SelectedRows, etc.
**/
class
VariableCompatTensor
:
public
phi
::
TensorBase
,
public
phi
::
TypeInfoTraits
<
phi
::
TensorBase
,
VariableCompatTensor
>
{
public:
template
<
typename
T
>
const
T
&
Get
()
const
{
static_assert
(
paddle
::
framework
::
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PADDLE_ENFORCE_NOT_NULL
(
holder_
,
paddle
::
platform
::
errors
::
NotFound
(
"Variable is not initialized."
));
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The Variable type must be %s, but the type it holds is %s."
,
paddle
::
framework
::
ToTypeName
(
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
),
paddle
::
framework
::
ToTypeName
(
holder_
->
Type
())));
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
}
bool
IsInitialized
()
const
{
return
holder_
!=
nullptr
;
}
template
<
typename
T
>
T
*
GetMutable
()
{
if
(
!
holder_
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
());
}
else
{
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The Variable type must be %s, but the type it holds is %s."
,
paddle
::
framework
::
ToTypeName
(
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
),
paddle
::
framework
::
ToTypeName
(
holder_
->
Type
())));
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
template
<
typename
T
>
bool
IsType
()
const
{
return
holder_
&&
holder_
->
Type
()
==
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
;
}
void
Clear
()
{
holder_
.
reset
();
}
int
Type
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
holder_
,
paddle
::
platform
::
errors
::
NotFound
(
"Variable is not initialized."
));
return
holder_
->
Type
();
}
// necessary overridden methods
static
const
char
*
name
()
{
return
"VariableCompatTensor"
;
}
~
VariableCompatTensor
()
override
=
default
;
int64_t
numel
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `numel` method."
));
}
const
phi
::
DDim
&
dims
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `dims` method."
));
}
phi
::
DataType
dtype
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `dtype` method."
));
}
phi
::
DataLayout
layout
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `layout` method."
));
}
const
phi
::
Place
&
place
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `place` method."
));
}
bool
valid
()
const
override
{
return
IsInitialized
();
}
bool
initialized
()
const
override
{
return
IsInitialized
();
}
void
*
AllocateFrom
(
phi
::
Allocator
*
allocator
,
phi
::
DataType
dtype
,
size_t
requested_size
=
0
)
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `AllocateFrom` method."
));
}
private:
struct
Placeholder
{
virtual
~
Placeholder
()
PADDLE_MAY_THROW
{}
inline
int
Type
()
const
{
return
type_
;
}
inline
const
void
*
Ptr
()
const
{
return
ptr_
;
}
inline
void
*
Ptr
()
{
return
ptr_
;
}
protected:
inline
void
Init
(
void
*
p
,
int
type
)
{
ptr_
=
p
;
type_
=
type
;
}
void
*
ptr_
;
int
type_
;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
static_assert
(
paddle
::
framework
::
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PlaceholderImpl
()
{
this
->
Init
(
&
obj_
,
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
);
}
private:
T
obj_
;
};
// pointers to a PlaceholderImpl object indeed.
std
::
shared_ptr
<
Placeholder
>
holder_
;
};
inline
bool
IsVariableCompatTensor
(
const
paddle
::
experimental
::
Tensor
&
tensor
)
{
return
VariableCompatTensor
::
classof
(
tensor
.
impl
().
get
());
}
/**
* This class is used by Eager mode for now. It's painful to do this in Eager
* Mode, the better
* choice is to use paddle::experimental::Tensor directly. However, we have a
* punch of nested kernel code, and
* they use paddle::framework::Variable in inner logic code. So, we have to
* provide variable in
* paddle::framework::ExecutionContext to support it. We should remove this as
* soon as we finish our latest
* Phi Lib, and use paddle::experimental::Tensor instead.
* Mode, the better choice is to use paddle::experimental::Tensor directly.
* However, we have a punch of nested kernel code, and they use
* paddle::framework::Variable in inner logic code. So, we have to provide
* variable in paddle::framework::ExecutionContext to support it. We should
* remove this as soon as we finish our latest Phi Lib, and use
* paddle::experimental::Tensor instead.
*
* Note: Keep this class as clean as possible.
* This class should only support method declared in
* paddle::experimental::Tensor with access method of
* paddle::framework::Variable no more members are acceptable.
* **/
namespace
egr
{
class
EagerVariable
final
{
public:
/* Default constructor and name constructor should only be used for contruct
...
...
@@ -54,6 +206,14 @@ class EagerVariable final {
ConstructVariableFromTensor
<
phi
::
DenseTensor
>
(
tensor
);
}
else
if
(
tensor
.
is_selected_rows
())
{
ConstructVariableFromTensor
<
phi
::
SelectedRows
>
(
tensor
);
}
else
if
(
IsVariableCompatTensor
(
tensor
)
&&
static_cast
<
const
VariableCompatTensor
*>
(
tensor
.
impl
().
get
())
->
IsType
<
paddle
::
framework
::
Vocab
>
())
{
ConstructVariableFromCompatTensor
<
paddle
::
framework
::
Vocab
>
(
tensor
);
}
else
if
(
IsVariableCompatTensor
(
tensor
)
&&
static_cast
<
const
VariableCompatTensor
*>
(
tensor
.
impl
().
get
())
->
IsType
<
paddle
::
framework
::
Strings
>
())
{
ConstructVariableFromCompatTensor
<
paddle
::
framework
::
Strings
>
(
tensor
);
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Unrecognized egr::EagerVariable type, only "
...
...
@@ -119,6 +279,22 @@ class EagerVariable final {
*
framework_tensor
=
*
tensor_dense
;
}
template
<
typename
VarType
>
void
ConstructVariableFromCompatTensor
(
const
paddle
::
experimental
::
Tensor
&
tensor
)
{
auto
*
framework_holder
=
var_
.
GetMutable
<
VarType
>
();
// Contruct framework::Tensor from egr::EagerVariable
auto
*
compat_tensor
=
static_cast
<
VariableCompatTensor
*>
(
tensor
.
impl
().
get
());
PADDLE_ENFORCE_NOT_NULL
(
compat_tensor
,
paddle
::
platform
::
errors
::
Fatal
(
"Tensor %s holds empty impl, this should not "
"happend since we should "
"treat all kinds of tensor as what they are."
,
tensor
.
name
()));
*
framework_holder
=
compat_tensor
->
Get
<
VarType
>
();
}
private:
std
::
string
name_
{
""
};
paddle
::
framework
::
Variable
var_
;
...
...
paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
浏览文件 @
b189e83f
...
...
@@ -233,3 +233,88 @@ TEST(EagerVariable, DataLayout) {
layout
=
paddle
::
imperative
::
GetDataLayout
(
eager_var
);
CHECK_EQ
(
layout
,
paddle
::
experimental
::
DataLayout
::
NCHW
);
}
TEST
(
VariableCompatTensor
,
MemberFunction
)
{
egr
::
VariableCompatTensor
var_tensor
;
// test GetMutable and Get
var_tensor
.
GetMutable
<
paddle
::
framework
::
Vocab
>
();
auto
&
vocab
=
var_tensor
.
Get
<
paddle
::
framework
::
Vocab
>
();
EXPECT_EQ
(
vocab
.
size
(),
0UL
);
bool
caught_exception
=
false
;
try
{
var_tensor
.
GetMutable
<
paddle
::
framework
::
Strings
>
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"The Variable type must be"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
// test Type and IsType
EXPECT_TRUE
(
var_tensor
.
IsType
<
paddle
::
framework
::
Vocab
>
());
EXPECT_EQ
(
var_tensor
.
Type
(),
static_cast
<
int
>
(
paddle
::
framework
::
proto
::
VarType
::
VOCAB
));
// test valid and initialized
EXPECT_TRUE
(
var_tensor
.
IsInitialized
());
EXPECT_TRUE
(
var_tensor
.
valid
());
EXPECT_TRUE
(
var_tensor
.
initialized
());
// test name
EXPECT_EQ
(
var_tensor
.
name
(),
"VariableCompatTensor"
);
// test other throw error methods
caught_exception
=
false
;
try
{
var_tensor
.
numel
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"numel"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
dims
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"dims"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
dtype
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"dtype"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
layout
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"layout"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
place
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"place"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
AllocateFrom
(
nullptr
,
phi
::
DataType
::
UNDEFINED
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"AllocateFrom"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
// test Clear
var_tensor
.
Clear
();
EXPECT_FALSE
(
var_tensor
.
IsInitialized
());
}
paddle/fluid/pybind/eager_method.cc
浏览文件 @
b189e83f
...
...
@@ -18,6 +18,7 @@ typedef SSIZE_T ssize_t;
#include <Python.h>
#include <string>
#include <unordered_map>
#include <vector>
#include "pybind11/numpy.h"
...
...
@@ -675,7 +676,9 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
PyObject
*
kwargs
)
{
EAGER_TRY
if
(
!
self
->
tensor
.
defined
())
{
RETURN_PY_NONE
// The original `get_tensor` method of Variable will create a empty tensor
phi
::
DenseTensor
empty_tensor
;
return
ToPyObject
(
&
empty_tensor
);
}
if
(
self
->
tensor
.
is_dense_tensor
())
{
auto
*
tensor
=
...
...
@@ -1275,6 +1278,47 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_set_vocab
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
using
Vocab
=
std
::
unordered_map
<
std
::
wstring
,
int
>
;
auto
vocab
=
CastPyArg2Vocab
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
var_tensor
=
std
::
make_shared
<
egr
::
VariableCompatTensor
>
();
*
var_tensor
->
GetMutable
<
Vocab
>
()
=
vocab
;
self
->
tensor
.
set_impl
(
var_tensor
);
RETURN_PY_NONE
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_set_string_list
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
using
Strings
=
std
::
vector
<
std
::
string
>
;
auto
strings
=
CastPyArg2Strings
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
var_tensor
=
std
::
make_shared
<
egr
::
VariableCompatTensor
>
();
*
var_tensor
->
GetMutable
<
Strings
>
()
=
strings
;
self
->
tensor
.
set_impl
(
var_tensor
);
RETURN_PY_NONE
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_get_map_tensor
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
PADDLE_ENFORCE_EQ
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
),
true
,
paddle
::
platform
::
errors
::
Fatal
(
"this method is only effective for VariableCompatTensor"
));
using
Vocab
=
std
::
unordered_map
<
std
::
wstring
,
int
>
;
auto
*
var_tensor
=
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
());
return
ToPyObject
(
var_tensor
->
Get
<
Vocab
>
());
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_get_non_zero_indices
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
...
...
@@ -1655,6 +1699,15 @@ PyMethodDef variable_methods[] = {
{
"_copy_gradient_from"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor__copy_gradient_from
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
/** the methods to adapt old dygraph, will be removed in the future **/
{
"set_string_list"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_set_string_list
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"set_vocab"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_set_vocab
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"get_map_tensor"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_get_map_tensor
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
/***the method of sparse tensor****/
{
"indices"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_get_non_zero_indices
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
...
...
paddle/fluid/pybind/eager_properties.cc
浏览文件 @
b189e83f
...
...
@@ -58,6 +58,10 @@ PyObject* tensor_properties_get_type(TensorObject* self, void* closure) {
return
ToPyObject
(
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
else
if
(
self
->
tensor
.
is_selected_rows
())
{
return
ToPyObject
(
paddle
::
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
if
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
))
{
return
ToPyObject
(
static_cast
<
paddle
::
framework
::
proto
::
VarType
::
Type
>
(
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
())
->
Type
()));
}
else
{
RETURN_PY_NONE
}
...
...
@@ -152,11 +156,27 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
if
(
!
self
->
tensor
.
defined
())
{
return
ToPyObject
(
value
);
}
auto
ddim
=
self
->
tensor
.
shape
();
size_t
rank
=
static_cast
<
size_t
>
(
ddim
.
size
());
value
.
resize
(
rank
);
for
(
size_t
i
=
0
;
i
<
rank
;
i
++
)
{
value
[
i
]
=
ddim
[
i
];
if
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
))
{
auto
*
var_tensor
=
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
());
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Vocab
>
())
{
value
.
emplace_back
(
static_cast
<
int64_t
>
(
var_tensor
->
Get
<
paddle
::
framework
::
Vocab
>
().
size
()));
}
else
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Strings
>
())
{
value
.
emplace_back
(
static_cast
<
int64_t
>
(
var_tensor
->
Get
<
paddle
::
framework
::
Strings
>
().
size
()));
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor only support get shape from Vocab or "
"Strings."
));
}
}
else
{
auto
ddim
=
self
->
tensor
.
shape
();
size_t
rank
=
static_cast
<
size_t
>
(
ddim
.
size
());
value
.
resize
(
rank
);
for
(
size_t
i
=
0
;
i
<
rank
;
i
++
)
{
value
[
i
]
=
ddim
[
i
];
}
}
return
ToPyObject
(
value
);
...
...
@@ -183,8 +203,22 @@ PyObject* tensor_properties_get_dtype(TensorObject* self, void* closure) {
// be same to old dygraph
return
ToPyObject
(
framework
::
proto
::
VarType
::
FP32
);
}
return
ToPyObject
(
paddle
::
framework
::
TransToProtoVarType
(
self
->
tensor
.
type
()));
if
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
))
{
auto
*
var_tensor
=
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
());
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Vocab
>
())
{
return
ToPyObject
(
framework
::
proto
::
VarType
::
RAW
);
}
else
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Strings
>
())
{
return
ToPyObject
(
framework
::
proto
::
VarType
::
STRING
);
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor only support get shape from Vocab or "
"Strings."
));
}
}
else
{
return
ToPyObject
(
paddle
::
framework
::
TransToProtoVarType
(
self
->
tensor
.
type
()));
}
EAGER_CATCH_AND_THROW_RETURN_NULL
}
...
...
paddle/fluid/pybind/eager_utils.cc
浏览文件 @
b189e83f
...
...
@@ -472,6 +472,28 @@ paddle::framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
return
dtype
;
}
std
::
unordered_map
<
std
::
wstring
,
int
>
CastPyArg2Vocab
(
PyObject
*
obj
,
ssize_t
arg_pos
)
{
if
(
PyDict_Check
(
obj
))
{
return
::
pybind11
::
handle
(
obj
)
.
cast
<
std
::
unordered_map
<
std
::
wstring
,
int
>>
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"argument (position %d) must be dict, but got %s"
,
arg_pos
+
1
,
reinterpret_cast
<
PyTypeObject
*>
(
obj
->
ob_type
)
->
tp_name
));
}
}
std
::
vector
<
std
::
string
>
CastPyArg2Strings
(
PyObject
*
obj
,
ssize_t
arg_pos
)
{
if
(
PyList_Check
(
obj
))
{
return
::
pybind11
::
handle
(
obj
).
cast
<
std
::
vector
<
std
::
string
>>
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"argument (position %d) must be list, but got %s"
,
arg_pos
+
1
,
reinterpret_cast
<
PyTypeObject
*>
(
obj
->
ob_type
)
->
tp_name
));
}
}
paddle
::
CustomOpKernelContext
CastPyArg2CustomOpKernelContext
(
PyObject
*
obj
,
ssize_t
arg_pos
)
{
if
(
PyObject_IsInstance
(
...
...
@@ -719,6 +741,28 @@ PyObject* ToPyObject(
return
dict
;
}
PyObject
*
ToPyObject
(
const
std
::
unordered_map
<
std
::
wstring
,
int
>&
value
)
{
PyObject
*
dict
=
PyDict_New
();
for
(
const
auto
map_iter
:
value
)
{
// Convert Key
PyObject
*
key_string
=
PyUnicode_FromWideChar
(
map_iter
.
first
.
c_str
(),
map_iter
.
first
.
size
());
if
(
!
key_string
)
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"Unable to convert std::wstring to PyObject"
));
}
// Convert Val
PyObject
*
py_int
=
PyLong_FromLong
(
map_iter
.
second
);
if
(
PyDict_SetItem
(
dict
,
key_string
,
py_int
)
!=
0
)
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"Unable to set key:value for py_dict"
));
}
}
return
dict
;
}
// For Final State Dygraph,
// We directly use paddle::optional(Tensor) as dispensable Tensor
paddle
::
optional
<
const
paddle
::
experimental
::
Tensor
&>
GetOptionalTensorFromArgs
(
...
...
paddle/fluid/pybind/eager_utils.h
浏览文件 @
b189e83f
...
...
@@ -65,6 +65,9 @@ std::vector<std::vector<size_t>> CastPyArg2VectorOfVectorOfSize_t(
PyObject
*
obj
,
size_t
arg_pos
);
framework
::
proto
::
VarType
::
Type
CastPyArg2ProtoType
(
PyObject
*
obj
,
ssize_t
arg_pos
);
std
::
unordered_map
<
std
::
wstring
,
int
>
CastPyArg2Vocab
(
PyObject
*
obj
,
ssize_t
arg_pos
);
std
::
vector
<
std
::
string
>
CastPyArg2Strings
(
PyObject
*
obj
,
ssize_t
arg_pos
);
PyObject
*
ToPyObject
(
int
value
);
PyObject
*
ToPyObject
(
uint32_t
value
);
...
...
@@ -96,6 +99,7 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType& type);
PyObject
*
ToPyObject
(
const
void
*
value
);
PyObject
*
ToPyObject
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>&
value
);
PyObject
*
ToPyObject
(
const
std
::
unordered_map
<
std
::
wstring
,
int
>&
value
);
template
<
typename
Tuple
,
size_t
N
>
struct
TupleTensorResult
{
...
...
paddle/phi/api/lib/tensor.cc
浏览文件 @
b189e83f
...
...
@@ -394,8 +394,8 @@ uint32_t Tensor::current_inplace_version() {
static_cast
<
phi
::
DenseTensor
*>
(
impl_
.
get
())
->
InplaceVersionCounter
();
return
inplace_version_counter
.
CurrentVersion
();
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
"current_inplace_version is only supported on DenseTensor now."
))
;
LOG_FIRST_N
(
WARNING
,
1
)
<<
"current_inplace_version is only supported on DenseTensor now."
;
}
return
0
;
}
...
...
python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
浏览文件 @
b189e83f
...
...
@@ -22,8 +22,7 @@ import numpy as np
import
paddle
import
paddle.nn
as
nn
from
paddle.dataset.common
import
DATA_HOME
from
paddle.fluid.framework
import
core
,
_non_static_mode
,
_enable_legacy_dygraph
_enable_legacy_dygraph
()
from
paddle.fluid.framework
import
core
,
_non_static_mode
,
_test_eager_guard
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle
import
_C_ops
...
...
@@ -151,13 +150,12 @@ class Predictor(object):
class
TestBertTokenizerOp
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
bert_tokenizer
=
BertTokenizer
.
from_pretrained
(
"bert-base-chinese"
)
self
.
faster_tokenizer
=
FasterTokenizer
(
self
.
bert_tokenizer
.
vocab
)
self
.
init_data
()
self
.
save_path
=
os
.
path
.
join
(
DATA_HOME
,
"fast_tokenizer"
)
self
.
param_path
=
os
.
path
.
join
(
self
.
save_path
,
"model.pdparams"
)
self
.
inference_path
=
os
.
path
.
join
(
self
.
save_path
,
"inference"
)
def
init_data
(
self
):
self
.
faster_tokenizer
=
FasterTokenizer
(
self
.
bert_tokenizer
.
vocab
)
self
.
text
=
[
'选择珠江花园的原因就是方便,有电动扶梯直接到达海边,周围餐馆、食廊、商场、超市、摊位一应俱全。'
'酒店装修一般,但还算整洁。 泳池在大堂的屋顶,因此很小,不过女儿倒是喜欢。 包的早餐是西式的,'
...
...
@@ -179,8 +177,8 @@ class TestBertTokenizerOp(unittest.TestCase):
self
.
texts_tensor
=
to_string_tensor
(
self
.
texts
,
"texts"
)
self
.
text_pairs_tensor
=
to_string_tensor
(
self
.
text_pairs
,
"text_pairs"
)
def
test
_padding
(
self
):
def
run
_padding
(
self
):
self
.
init_data
()
self
.
max_seq_len
=
128
self
.
pad_to_max_seq_len
=
True
self
.
is_split_into_words
=
False
...
...
@@ -283,7 +281,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np
.
allclose
(
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_no_padding
(
self
):
def
test_padding
(
self
):
with
_test_eager_guard
():
self
.
run_padding
()
self
.
run_padding
()
def
run_no_padding
(
self
):
self
.
init_data
()
self
.
max_seq_len
=
128
self
.
pad_to_max_seq_len
=
False
self
.
is_split_into_words
=
False
...
...
@@ -336,7 +340,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np
.
allclose
(
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_is_split_into_words
(
self
):
def
test_no_padding
(
self
):
with
_test_eager_guard
():
self
.
run_no_padding
()
self
.
run_no_padding
()
def
run_is_split_into_words
(
self
):
self
.
init_data
()
self
.
is_split_into_words
=
True
input_ids
,
token_type_ids
=
self
.
faster_tokenizer
(
...
...
@@ -355,7 +365,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np
.
allclose
(
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_is_split_into_words
(
self
):
with
_test_eager_guard
():
self
.
run_is_split_into_words
()
self
.
run_is_split_into_words
()
def
test_inference
(
self
):
self
.
init_data
()
if
not
os
.
path
.
exists
(
self
.
save_path
):
os
.
makedirs
(
self
.
save_path
,
exist_ok
=
True
)
paddle
.
save
(
self
.
faster_tokenizer
.
state_dict
(),
self
.
param_path
)
...
...
@@ -383,6 +399,7 @@ class TestBertTokenizerOp(unittest.TestCase):
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_feed_string_var
(
self
):
self
.
init_data
()
paddle
.
enable_static
()
x
=
paddle
.
static
.
data
(
name
=
"x"
,
shape
=
[
-
1
],
dtype
=
core
.
VarDesc
.
VarType
.
STRINGS
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录