Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b189e83f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b189e83f
编写于
5月 17, 2022
作者:
C
Chen Weihang
提交者:
GitHub
5月 17, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Eager] Adapt faster tokenizer op (#42718)
* adapt faster tokenizer op * add eager test * add unittest
上级
353ede5a
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
441 addition
and
28 deletion
+441
-28
paddle/fluid/eager/eager_tensor.h
paddle/fluid/eager/eager_tensor.h
+186
-10
paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
...uid/eager/tests/data_structure_tests/eager_tensor_test.cc
+85
-0
paddle/fluid/pybind/eager_method.cc
paddle/fluid/pybind/eager_method.cc
+54
-1
paddle/fluid/pybind/eager_properties.cc
paddle/fluid/pybind/eager_properties.cc
+41
-7
paddle/fluid/pybind/eager_utils.cc
paddle/fluid/pybind/eager_utils.cc
+44
-0
paddle/fluid/pybind/eager_utils.h
paddle/fluid/pybind/eager_utils.h
+4
-0
paddle/phi/api/lib/tensor.cc
paddle/phi/api/lib/tensor.cc
+2
-2
python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
.../paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
+25
-8
未找到文件。
paddle/fluid/eager/eager_tensor.h
浏览文件 @
b189e83f
...
@@ -21,24 +21,176 @@
...
@@ -21,24 +21,176 @@
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/api/lib/utils/tensor_utils.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/compat/convert_utils.h"
namespace
egr
{
/**
* VariableCompatTensor class is used by Eager mode for now. It's painful to
* do this in Eager Mode, the better choice is to design the special Tensor
* directly in phi and use it in paddle::experimental::Tensor.
* However, we have some special operators, and they use special input variable
* type, such as vector<string>, unordered_map<wstring, int>, these type cannot
* cover by DenseTensor or SparseTensor. So, we have to provide a compatible
* Tensor type like variable to support these special input type. We should
* remove this as soon as we finish the ResourceTensor in phi.
*
* Note: Keep this class as clean as possible.
* This class should only support method declared in framework::Variable and
* necessary overridden methods.
*
* Note: This class is only used to support types that cannot be supported by
* the phi Tensor system temporarily. You CANNOT use this class to handle types
* such as DenseTensor, SelectedRows, etc.
**/
class
VariableCompatTensor
:
public
phi
::
TensorBase
,
public
phi
::
TypeInfoTraits
<
phi
::
TensorBase
,
VariableCompatTensor
>
{
public:
template
<
typename
T
>
const
T
&
Get
()
const
{
static_assert
(
paddle
::
framework
::
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PADDLE_ENFORCE_NOT_NULL
(
holder_
,
paddle
::
platform
::
errors
::
NotFound
(
"Variable is not initialized."
));
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The Variable type must be %s, but the type it holds is %s."
,
paddle
::
framework
::
ToTypeName
(
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
),
paddle
::
framework
::
ToTypeName
(
holder_
->
Type
())));
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
}
bool
IsInitialized
()
const
{
return
holder_
!=
nullptr
;
}
template
<
typename
T
>
T
*
GetMutable
()
{
if
(
!
holder_
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
());
}
else
{
PADDLE_ENFORCE_EQ
(
holder_
->
Type
(),
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
,
paddle
::
platform
::
errors
::
InvalidArgument
(
"The Variable type must be %s, but the type it holds is %s."
,
paddle
::
framework
::
ToTypeName
(
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
),
paddle
::
framework
::
ToTypeName
(
holder_
->
Type
())));
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
template
<
typename
T
>
bool
IsType
()
const
{
return
holder_
&&
holder_
->
Type
()
==
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
;
}
void
Clear
()
{
holder_
.
reset
();
}
int
Type
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
holder_
,
paddle
::
platform
::
errors
::
NotFound
(
"Variable is not initialized."
));
return
holder_
->
Type
();
}
// necessary overridden methods
static
const
char
*
name
()
{
return
"VariableCompatTensor"
;
}
~
VariableCompatTensor
()
override
=
default
;
int64_t
numel
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `numel` method."
));
}
const
phi
::
DDim
&
dims
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `dims` method."
));
}
phi
::
DataType
dtype
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `dtype` method."
));
}
phi
::
DataLayout
layout
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `layout` method."
));
}
const
phi
::
Place
&
place
()
const
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `place` method."
));
}
bool
valid
()
const
override
{
return
IsInitialized
();
}
bool
initialized
()
const
override
{
return
IsInitialized
();
}
void
*
AllocateFrom
(
phi
::
Allocator
*
allocator
,
phi
::
DataType
dtype
,
size_t
requested_size
=
0
)
override
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor does not support `AllocateFrom` method."
));
}
private:
struct
Placeholder
{
virtual
~
Placeholder
()
PADDLE_MAY_THROW
{}
inline
int
Type
()
const
{
return
type_
;
}
inline
const
void
*
Ptr
()
const
{
return
ptr_
;
}
inline
void
*
Ptr
()
{
return
ptr_
;
}
protected:
inline
void
Init
(
void
*
p
,
int
type
)
{
ptr_
=
p
;
type_
=
type
;
}
void
*
ptr_
;
int
type_
;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
static_assert
(
paddle
::
framework
::
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PlaceholderImpl
()
{
this
->
Init
(
&
obj_
,
paddle
::
framework
::
VarTypeTrait
<
T
>::
kId
);
}
private:
T
obj_
;
};
// pointers to a PlaceholderImpl object indeed.
std
::
shared_ptr
<
Placeholder
>
holder_
;
};
inline
bool
IsVariableCompatTensor
(
const
paddle
::
experimental
::
Tensor
&
tensor
)
{
return
VariableCompatTensor
::
classof
(
tensor
.
impl
().
get
());
}
/**
/**
* This class is used by Eager mode for now. It's painful to do this in Eager
* This class is used by Eager mode for now. It's painful to do this in Eager
* Mode, the better
* Mode, the better choice is to use paddle::experimental::Tensor directly.
* choice is to use paddle::experimental::Tensor directly. However, we have a
* However, we have a punch of nested kernel code, and they use
* punch of nested kernel code, and
* paddle::framework::Variable in inner logic code. So, we have to provide
* they use paddle::framework::Variable in inner logic code. So, we have to
* variable in paddle::framework::ExecutionContext to support it. We should
* provide variable in
* remove this as soon as we finish our latest Phi Lib, and use
* paddle::framework::ExecutionContext to support it. We should remove this as
* paddle::experimental::Tensor instead.
* soon as we finish our latest
* Phi Lib, and use paddle::experimental::Tensor instead.
*
*
* Note: Keep this class as clean as possible.
* Note: Keep this class as clean as possible.
* This class should only support method declared in
* This class should only support method declared in
* paddle::experimental::Tensor with access method of
* paddle::experimental::Tensor with access method of
* paddle::framework::Variable no more members are acceptable.
* paddle::framework::Variable no more members are acceptable.
* **/
* **/
namespace
egr
{
class
EagerVariable
final
{
class
EagerVariable
final
{
public:
public:
/* Default constructor and name constructor should only be used for contruct
/* Default constructor and name constructor should only be used for contruct
...
@@ -54,6 +206,14 @@ class EagerVariable final {
...
@@ -54,6 +206,14 @@ class EagerVariable final {
ConstructVariableFromTensor
<
phi
::
DenseTensor
>
(
tensor
);
ConstructVariableFromTensor
<
phi
::
DenseTensor
>
(
tensor
);
}
else
if
(
tensor
.
is_selected_rows
())
{
}
else
if
(
tensor
.
is_selected_rows
())
{
ConstructVariableFromTensor
<
phi
::
SelectedRows
>
(
tensor
);
ConstructVariableFromTensor
<
phi
::
SelectedRows
>
(
tensor
);
}
else
if
(
IsVariableCompatTensor
(
tensor
)
&&
static_cast
<
const
VariableCompatTensor
*>
(
tensor
.
impl
().
get
())
->
IsType
<
paddle
::
framework
::
Vocab
>
())
{
ConstructVariableFromCompatTensor
<
paddle
::
framework
::
Vocab
>
(
tensor
);
}
else
if
(
IsVariableCompatTensor
(
tensor
)
&&
static_cast
<
const
VariableCompatTensor
*>
(
tensor
.
impl
().
get
())
->
IsType
<
paddle
::
framework
::
Strings
>
())
{
ConstructVariableFromCompatTensor
<
paddle
::
framework
::
Strings
>
(
tensor
);
}
else
{
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Fatal
(
"Unrecognized egr::EagerVariable type, only "
"Unrecognized egr::EagerVariable type, only "
...
@@ -119,6 +279,22 @@ class EagerVariable final {
...
@@ -119,6 +279,22 @@ class EagerVariable final {
*
framework_tensor
=
*
tensor_dense
;
*
framework_tensor
=
*
tensor_dense
;
}
}
template
<
typename
VarType
>
void
ConstructVariableFromCompatTensor
(
const
paddle
::
experimental
::
Tensor
&
tensor
)
{
auto
*
framework_holder
=
var_
.
GetMutable
<
VarType
>
();
// Contruct framework::Tensor from egr::EagerVariable
auto
*
compat_tensor
=
static_cast
<
VariableCompatTensor
*>
(
tensor
.
impl
().
get
());
PADDLE_ENFORCE_NOT_NULL
(
compat_tensor
,
paddle
::
platform
::
errors
::
Fatal
(
"Tensor %s holds empty impl, this should not "
"happend since we should "
"treat all kinds of tensor as what they are."
,
tensor
.
name
()));
*
framework_holder
=
compat_tensor
->
Get
<
VarType
>
();
}
private:
private:
std
::
string
name_
{
""
};
std
::
string
name_
{
""
};
paddle
::
framework
::
Variable
var_
;
paddle
::
framework
::
Variable
var_
;
...
...
paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
浏览文件 @
b189e83f
...
@@ -233,3 +233,88 @@ TEST(EagerVariable, DataLayout) {
...
@@ -233,3 +233,88 @@ TEST(EagerVariable, DataLayout) {
layout
=
paddle
::
imperative
::
GetDataLayout
(
eager_var
);
layout
=
paddle
::
imperative
::
GetDataLayout
(
eager_var
);
CHECK_EQ
(
layout
,
paddle
::
experimental
::
DataLayout
::
NCHW
);
CHECK_EQ
(
layout
,
paddle
::
experimental
::
DataLayout
::
NCHW
);
}
}
TEST
(
VariableCompatTensor
,
MemberFunction
)
{
egr
::
VariableCompatTensor
var_tensor
;
// test GetMutable and Get
var_tensor
.
GetMutable
<
paddle
::
framework
::
Vocab
>
();
auto
&
vocab
=
var_tensor
.
Get
<
paddle
::
framework
::
Vocab
>
();
EXPECT_EQ
(
vocab
.
size
(),
0UL
);
bool
caught_exception
=
false
;
try
{
var_tensor
.
GetMutable
<
paddle
::
framework
::
Strings
>
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"The Variable type must be"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
// test Type and IsType
EXPECT_TRUE
(
var_tensor
.
IsType
<
paddle
::
framework
::
Vocab
>
());
EXPECT_EQ
(
var_tensor
.
Type
(),
static_cast
<
int
>
(
paddle
::
framework
::
proto
::
VarType
::
VOCAB
));
// test valid and initialized
EXPECT_TRUE
(
var_tensor
.
IsInitialized
());
EXPECT_TRUE
(
var_tensor
.
valid
());
EXPECT_TRUE
(
var_tensor
.
initialized
());
// test name
EXPECT_EQ
(
var_tensor
.
name
(),
"VariableCompatTensor"
);
// test other throw error methods
caught_exception
=
false
;
try
{
var_tensor
.
numel
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"numel"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
dims
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"dims"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
dtype
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"dtype"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
layout
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"layout"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
place
();
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"place"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
caught_exception
=
false
;
try
{
var_tensor
.
AllocateFrom
(
nullptr
,
phi
::
DataType
::
UNDEFINED
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_TRUE
(
ex_msg
.
find
(
"AllocateFrom"
)
!=
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
// test Clear
var_tensor
.
Clear
();
EXPECT_FALSE
(
var_tensor
.
IsInitialized
());
}
paddle/fluid/pybind/eager_method.cc
浏览文件 @
b189e83f
...
@@ -18,6 +18,7 @@ typedef SSIZE_T ssize_t;
...
@@ -18,6 +18,7 @@ typedef SSIZE_T ssize_t;
#include <Python.h>
#include <Python.h>
#include <string>
#include <string>
#include <unordered_map>
#include <vector>
#include <vector>
#include "pybind11/numpy.h"
#include "pybind11/numpy.h"
...
@@ -675,7 +676,9 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
...
@@ -675,7 +676,9 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
PyObject
*
kwargs
)
{
PyObject
*
kwargs
)
{
EAGER_TRY
EAGER_TRY
if
(
!
self
->
tensor
.
defined
())
{
if
(
!
self
->
tensor
.
defined
())
{
RETURN_PY_NONE
// The original `get_tensor` method of Variable will create a empty tensor
phi
::
DenseTensor
empty_tensor
;
return
ToPyObject
(
&
empty_tensor
);
}
}
if
(
self
->
tensor
.
is_dense_tensor
())
{
if
(
self
->
tensor
.
is_dense_tensor
())
{
auto
*
tensor
=
auto
*
tensor
=
...
@@ -1275,6 +1278,47 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self, PyObject* args,
...
@@ -1275,6 +1278,47 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
EAGER_CATCH_AND_THROW_RETURN_NULL
}
}
static
PyObject
*
tensor_method_set_vocab
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
using
Vocab
=
std
::
unordered_map
<
std
::
wstring
,
int
>
;
auto
vocab
=
CastPyArg2Vocab
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
var_tensor
=
std
::
make_shared
<
egr
::
VariableCompatTensor
>
();
*
var_tensor
->
GetMutable
<
Vocab
>
()
=
vocab
;
self
->
tensor
.
set_impl
(
var_tensor
);
RETURN_PY_NONE
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_set_string_list
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
using
Strings
=
std
::
vector
<
std
::
string
>
;
auto
strings
=
CastPyArg2Strings
(
PyTuple_GET_ITEM
(
args
,
0
),
0
);
auto
var_tensor
=
std
::
make_shared
<
egr
::
VariableCompatTensor
>
();
*
var_tensor
->
GetMutable
<
Strings
>
()
=
strings
;
self
->
tensor
.
set_impl
(
var_tensor
);
RETURN_PY_NONE
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_get_map_tensor
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
EAGER_TRY
PADDLE_ENFORCE_EQ
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
),
true
,
paddle
::
platform
::
errors
::
Fatal
(
"this method is only effective for VariableCompatTensor"
));
using
Vocab
=
std
::
unordered_map
<
std
::
wstring
,
int
>
;
auto
*
var_tensor
=
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
());
return
ToPyObject
(
var_tensor
->
Get
<
Vocab
>
());
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static
PyObject
*
tensor_method_get_non_zero_indices
(
TensorObject
*
self
,
static
PyObject
*
tensor_method_get_non_zero_indices
(
TensorObject
*
self
,
PyObject
*
args
,
PyObject
*
args
,
PyObject
*
kwargs
)
{
PyObject
*
kwargs
)
{
...
@@ -1655,6 +1699,15 @@ PyMethodDef variable_methods[] = {
...
@@ -1655,6 +1699,15 @@ PyMethodDef variable_methods[] = {
{
"_copy_gradient_from"
,
{
"_copy_gradient_from"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor__copy_gradient_from
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor__copy_gradient_from
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
/** the methods to adapt old dygraph, will be removed in the future **/
{
"set_string_list"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_set_string_list
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"set_vocab"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_set_vocab
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
{
"get_map_tensor"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_get_map_tensor
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
/***the method of sparse tensor****/
/***the method of sparse tensor****/
{
"indices"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_get_non_zero_indices
,
{
"indices"
,
(
PyCFunction
)(
void
(
*
)(
void
))
tensor_method_get_non_zero_indices
,
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
METH_VARARGS
|
METH_KEYWORDS
,
NULL
},
...
...
paddle/fluid/pybind/eager_properties.cc
浏览文件 @
b189e83f
...
@@ -58,6 +58,10 @@ PyObject* tensor_properties_get_type(TensorObject* self, void* closure) {
...
@@ -58,6 +58,10 @@ PyObject* tensor_properties_get_type(TensorObject* self, void* closure) {
return
ToPyObject
(
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR
);
return
ToPyObject
(
paddle
::
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
else
if
(
self
->
tensor
.
is_selected_rows
())
{
}
else
if
(
self
->
tensor
.
is_selected_rows
())
{
return
ToPyObject
(
paddle
::
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
return
ToPyObject
(
paddle
::
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
if
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
))
{
return
ToPyObject
(
static_cast
<
paddle
::
framework
::
proto
::
VarType
::
Type
>
(
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
())
->
Type
()));
}
else
{
}
else
{
RETURN_PY_NONE
RETURN_PY_NONE
}
}
...
@@ -152,12 +156,28 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
...
@@ -152,12 +156,28 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {
if
(
!
self
->
tensor
.
defined
())
{
if
(
!
self
->
tensor
.
defined
())
{
return
ToPyObject
(
value
);
return
ToPyObject
(
value
);
}
}
if
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
))
{
auto
*
var_tensor
=
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
());
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Vocab
>
())
{
value
.
emplace_back
(
static_cast
<
int64_t
>
(
var_tensor
->
Get
<
paddle
::
framework
::
Vocab
>
().
size
()));
}
else
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Strings
>
())
{
value
.
emplace_back
(
static_cast
<
int64_t
>
(
var_tensor
->
Get
<
paddle
::
framework
::
Strings
>
().
size
()));
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor only support get shape from Vocab or "
"Strings."
));
}
}
else
{
auto
ddim
=
self
->
tensor
.
shape
();
auto
ddim
=
self
->
tensor
.
shape
();
size_t
rank
=
static_cast
<
size_t
>
(
ddim
.
size
());
size_t
rank
=
static_cast
<
size_t
>
(
ddim
.
size
());
value
.
resize
(
rank
);
value
.
resize
(
rank
);
for
(
size_t
i
=
0
;
i
<
rank
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
rank
;
i
++
)
{
value
[
i
]
=
ddim
[
i
];
value
[
i
]
=
ddim
[
i
];
}
}
}
return
ToPyObject
(
value
);
return
ToPyObject
(
value
);
EAGER_CATCH_AND_THROW_RETURN_NULL
EAGER_CATCH_AND_THROW_RETURN_NULL
...
@@ -183,8 +203,22 @@ PyObject* tensor_properties_get_dtype(TensorObject* self, void* closure) {
...
@@ -183,8 +203,22 @@ PyObject* tensor_properties_get_dtype(TensorObject* self, void* closure) {
// be same to old dygraph
// be same to old dygraph
return
ToPyObject
(
framework
::
proto
::
VarType
::
FP32
);
return
ToPyObject
(
framework
::
proto
::
VarType
::
FP32
);
}
}
if
(
egr
::
IsVariableCompatTensor
(
self
->
tensor
))
{
auto
*
var_tensor
=
static_cast
<
const
egr
::
VariableCompatTensor
*>
(
self
->
tensor
.
impl
().
get
());
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Vocab
>
())
{
return
ToPyObject
(
framework
::
proto
::
VarType
::
RAW
);
}
else
if
(
var_tensor
->
IsType
<
paddle
::
framework
::
Strings
>
())
{
return
ToPyObject
(
framework
::
proto
::
VarType
::
STRING
);
}
else
{
PADDLE_THROW
(
paddle
::
platform
::
errors
::
Unavailable
(
"VariableCompatTensor only support get shape from Vocab or "
"Strings."
));
}
}
else
{
return
ToPyObject
(
return
ToPyObject
(
paddle
::
framework
::
TransToProtoVarType
(
self
->
tensor
.
type
()));
paddle
::
framework
::
TransToProtoVarType
(
self
->
tensor
.
type
()));
}
EAGER_CATCH_AND_THROW_RETURN_NULL
EAGER_CATCH_AND_THROW_RETURN_NULL
}
}
...
...
paddle/fluid/pybind/eager_utils.cc
浏览文件 @
b189e83f
...
@@ -472,6 +472,28 @@ paddle::framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
...
@@ -472,6 +472,28 @@ paddle::framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
return
dtype
;
return
dtype
;
}
}
std
::
unordered_map
<
std
::
wstring
,
int
>
CastPyArg2Vocab
(
PyObject
*
obj
,
ssize_t
arg_pos
)
{
if
(
PyDict_Check
(
obj
))
{
return
::
pybind11
::
handle
(
obj
)
.
cast
<
std
::
unordered_map
<
std
::
wstring
,
int
>>
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"argument (position %d) must be dict, but got %s"
,
arg_pos
+
1
,
reinterpret_cast
<
PyTypeObject
*>
(
obj
->
ob_type
)
->
tp_name
));
}
}
std
::
vector
<
std
::
string
>
CastPyArg2Strings
(
PyObject
*
obj
,
ssize_t
arg_pos
)
{
if
(
PyList_Check
(
obj
))
{
return
::
pybind11
::
handle
(
obj
).
cast
<
std
::
vector
<
std
::
string
>>
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"argument (position %d) must be list, but got %s"
,
arg_pos
+
1
,
reinterpret_cast
<
PyTypeObject
*>
(
obj
->
ob_type
)
->
tp_name
));
}
}
paddle
::
CustomOpKernelContext
CastPyArg2CustomOpKernelContext
(
PyObject
*
obj
,
paddle
::
CustomOpKernelContext
CastPyArg2CustomOpKernelContext
(
PyObject
*
obj
,
ssize_t
arg_pos
)
{
ssize_t
arg_pos
)
{
if
(
PyObject_IsInstance
(
if
(
PyObject_IsInstance
(
...
@@ -719,6 +741,28 @@ PyObject* ToPyObject(
...
@@ -719,6 +741,28 @@ PyObject* ToPyObject(
return
dict
;
return
dict
;
}
}
PyObject
*
ToPyObject
(
const
std
::
unordered_map
<
std
::
wstring
,
int
>&
value
)
{
PyObject
*
dict
=
PyDict_New
();
for
(
const
auto
map_iter
:
value
)
{
// Convert Key
PyObject
*
key_string
=
PyUnicode_FromWideChar
(
map_iter
.
first
.
c_str
(),
map_iter
.
first
.
size
());
if
(
!
key_string
)
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"Unable to convert std::wstring to PyObject"
));
}
// Convert Val
PyObject
*
py_int
=
PyLong_FromLong
(
map_iter
.
second
);
if
(
PyDict_SetItem
(
dict
,
key_string
,
py_int
)
!=
0
)
{
PADDLE_THROW
(
platform
::
errors
::
Fatal
(
"Unable to set key:value for py_dict"
));
}
}
return
dict
;
}
// For Final State Dygraph,
// For Final State Dygraph,
// We directly use paddle::optional(Tensor) as dispensable Tensor
// We directly use paddle::optional(Tensor) as dispensable Tensor
paddle
::
optional
<
const
paddle
::
experimental
::
Tensor
&>
GetOptionalTensorFromArgs
(
paddle
::
optional
<
const
paddle
::
experimental
::
Tensor
&>
GetOptionalTensorFromArgs
(
...
...
paddle/fluid/pybind/eager_utils.h
浏览文件 @
b189e83f
...
@@ -65,6 +65,9 @@ std::vector<std::vector<size_t>> CastPyArg2VectorOfVectorOfSize_t(
...
@@ -65,6 +65,9 @@ std::vector<std::vector<size_t>> CastPyArg2VectorOfVectorOfSize_t(
PyObject
*
obj
,
size_t
arg_pos
);
PyObject
*
obj
,
size_t
arg_pos
);
framework
::
proto
::
VarType
::
Type
CastPyArg2ProtoType
(
PyObject
*
obj
,
framework
::
proto
::
VarType
::
Type
CastPyArg2ProtoType
(
PyObject
*
obj
,
ssize_t
arg_pos
);
ssize_t
arg_pos
);
std
::
unordered_map
<
std
::
wstring
,
int
>
CastPyArg2Vocab
(
PyObject
*
obj
,
ssize_t
arg_pos
);
std
::
vector
<
std
::
string
>
CastPyArg2Strings
(
PyObject
*
obj
,
ssize_t
arg_pos
);
PyObject
*
ToPyObject
(
int
value
);
PyObject
*
ToPyObject
(
int
value
);
PyObject
*
ToPyObject
(
uint32_t
value
);
PyObject
*
ToPyObject
(
uint32_t
value
);
...
@@ -96,6 +99,7 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType& type);
...
@@ -96,6 +99,7 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType& type);
PyObject
*
ToPyObject
(
const
void
*
value
);
PyObject
*
ToPyObject
(
const
void
*
value
);
PyObject
*
ToPyObject
(
PyObject
*
ToPyObject
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>&
value
);
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>&
value
);
PyObject
*
ToPyObject
(
const
std
::
unordered_map
<
std
::
wstring
,
int
>&
value
);
template
<
typename
Tuple
,
size_t
N
>
template
<
typename
Tuple
,
size_t
N
>
struct
TupleTensorResult
{
struct
TupleTensorResult
{
...
...
paddle/phi/api/lib/tensor.cc
浏览文件 @
b189e83f
...
@@ -394,8 +394,8 @@ uint32_t Tensor::current_inplace_version() {
...
@@ -394,8 +394,8 @@ uint32_t Tensor::current_inplace_version() {
static_cast
<
phi
::
DenseTensor
*>
(
impl_
.
get
())
->
InplaceVersionCounter
();
static_cast
<
phi
::
DenseTensor
*>
(
impl_
.
get
())
->
InplaceVersionCounter
();
return
inplace_version_counter
.
CurrentVersion
();
return
inplace_version_counter
.
CurrentVersion
();
}
else
{
}
else
{
PADDLE_THROW
(
phi
::
errors
::
Unimplemented
(
LOG_FIRST_N
(
WARNING
,
1
)
"current_inplace_version is only supported on DenseTensor now."
))
;
<<
"current_inplace_version is only supported on DenseTensor now."
;
}
}
return
0
;
return
0
;
}
}
...
...
python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
浏览文件 @
b189e83f
...
@@ -22,8 +22,7 @@ import numpy as np
...
@@ -22,8 +22,7 @@ import numpy as np
import
paddle
import
paddle
import
paddle.nn
as
nn
import
paddle.nn
as
nn
from
paddle.dataset.common
import
DATA_HOME
from
paddle.dataset.common
import
DATA_HOME
from
paddle.fluid.framework
import
core
,
_non_static_mode
,
_enable_legacy_dygraph
from
paddle.fluid.framework
import
core
,
_non_static_mode
,
_test_eager_guard
_enable_legacy_dygraph
()
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle.fluid.layer_helper
import
LayerHelper
from
paddle
import
_C_ops
from
paddle
import
_C_ops
...
@@ -151,13 +150,12 @@ class Predictor(object):
...
@@ -151,13 +150,12 @@ class Predictor(object):
class
TestBertTokenizerOp
(
unittest
.
TestCase
):
class
TestBertTokenizerOp
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
bert_tokenizer
=
BertTokenizer
.
from_pretrained
(
"bert-base-chinese"
)
self
.
bert_tokenizer
=
BertTokenizer
.
from_pretrained
(
"bert-base-chinese"
)
self
.
faster_tokenizer
=
FasterTokenizer
(
self
.
bert_tokenizer
.
vocab
)
self
.
init_data
()
self
.
save_path
=
os
.
path
.
join
(
DATA_HOME
,
"fast_tokenizer"
)
self
.
save_path
=
os
.
path
.
join
(
DATA_HOME
,
"fast_tokenizer"
)
self
.
param_path
=
os
.
path
.
join
(
self
.
save_path
,
"model.pdparams"
)
self
.
param_path
=
os
.
path
.
join
(
self
.
save_path
,
"model.pdparams"
)
self
.
inference_path
=
os
.
path
.
join
(
self
.
save_path
,
"inference"
)
self
.
inference_path
=
os
.
path
.
join
(
self
.
save_path
,
"inference"
)
def
init_data
(
self
):
def
init_data
(
self
):
self
.
faster_tokenizer
=
FasterTokenizer
(
self
.
bert_tokenizer
.
vocab
)
self
.
text
=
[
self
.
text
=
[
'选择珠江花园的原因就是方便,有电动扶梯直接到达海边,周围餐馆、食廊、商场、超市、摊位一应俱全。'
'选择珠江花园的原因就是方便,有电动扶梯直接到达海边,周围餐馆、食廊、商场、超市、摊位一应俱全。'
'酒店装修一般,但还算整洁。 泳池在大堂的屋顶,因此很小,不过女儿倒是喜欢。 包的早餐是西式的,'
'酒店装修一般,但还算整洁。 泳池在大堂的屋顶,因此很小,不过女儿倒是喜欢。 包的早餐是西式的,'
...
@@ -179,8 +177,8 @@ class TestBertTokenizerOp(unittest.TestCase):
...
@@ -179,8 +177,8 @@ class TestBertTokenizerOp(unittest.TestCase):
self
.
texts_tensor
=
to_string_tensor
(
self
.
texts
,
"texts"
)
self
.
texts_tensor
=
to_string_tensor
(
self
.
texts
,
"texts"
)
self
.
text_pairs_tensor
=
to_string_tensor
(
self
.
text_pairs
,
"text_pairs"
)
self
.
text_pairs_tensor
=
to_string_tensor
(
self
.
text_pairs
,
"text_pairs"
)
def
test
_padding
(
self
):
def
run
_padding
(
self
):
self
.
init_data
()
self
.
max_seq_len
=
128
self
.
max_seq_len
=
128
self
.
pad_to_max_seq_len
=
True
self
.
pad_to_max_seq_len
=
True
self
.
is_split_into_words
=
False
self
.
is_split_into_words
=
False
...
@@ -283,7 +281,13 @@ class TestBertTokenizerOp(unittest.TestCase):
...
@@ -283,7 +281,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np
.
allclose
(
np
.
allclose
(
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_no_padding
(
self
):
def
test_padding
(
self
):
with
_test_eager_guard
():
self
.
run_padding
()
self
.
run_padding
()
def
run_no_padding
(
self
):
self
.
init_data
()
self
.
max_seq_len
=
128
self
.
max_seq_len
=
128
self
.
pad_to_max_seq_len
=
False
self
.
pad_to_max_seq_len
=
False
self
.
is_split_into_words
=
False
self
.
is_split_into_words
=
False
...
@@ -336,7 +340,13 @@ class TestBertTokenizerOp(unittest.TestCase):
...
@@ -336,7 +340,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np
.
allclose
(
np
.
allclose
(
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_is_split_into_words
(
self
):
def
test_no_padding
(
self
):
with
_test_eager_guard
():
self
.
run_no_padding
()
self
.
run_no_padding
()
def
run_is_split_into_words
(
self
):
self
.
init_data
()
self
.
is_split_into_words
=
True
self
.
is_split_into_words
=
True
input_ids
,
token_type_ids
=
self
.
faster_tokenizer
(
input_ids
,
token_type_ids
=
self
.
faster_tokenizer
(
...
@@ -355,7 +365,13 @@ class TestBertTokenizerOp(unittest.TestCase):
...
@@ -355,7 +365,13 @@ class TestBertTokenizerOp(unittest.TestCase):
np
.
allclose
(
np
.
allclose
(
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_is_split_into_words
(
self
):
with
_test_eager_guard
():
self
.
run_is_split_into_words
()
self
.
run_is_split_into_words
()
def
test_inference
(
self
):
def
test_inference
(
self
):
self
.
init_data
()
if
not
os
.
path
.
exists
(
self
.
save_path
):
if
not
os
.
path
.
exists
(
self
.
save_path
):
os
.
makedirs
(
self
.
save_path
,
exist_ok
=
True
)
os
.
makedirs
(
self
.
save_path
,
exist_ok
=
True
)
paddle
.
save
(
self
.
faster_tokenizer
.
state_dict
(),
self
.
param_path
)
paddle
.
save
(
self
.
faster_tokenizer
.
state_dict
(),
self
.
param_path
)
...
@@ -383,6 +399,7 @@ class TestBertTokenizerOp(unittest.TestCase):
...
@@ -383,6 +399,7 @@ class TestBertTokenizerOp(unittest.TestCase):
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
token_type_ids
,
py_token_type_ids
,
rtol
=
0
,
atol
=
0.01
))
def
test_feed_string_var
(
self
):
def
test_feed_string_var
(
self
):
self
.
init_data
()
paddle
.
enable_static
()
paddle
.
enable_static
()
x
=
paddle
.
static
.
data
(
x
=
paddle
.
static
.
data
(
name
=
"x"
,
shape
=
[
-
1
],
dtype
=
core
.
VarDesc
.
VarType
.
STRINGS
)
name
=
"x"
,
shape
=
[
-
1
],
dtype
=
core
.
VarDesc
.
VarType
.
STRINGS
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录