Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
487ee36a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
487ee36a
编写于
11月 27, 2018
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_cudnn_lstm
上级
084ff657
56a4912b
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
398 addition
and
143 deletion
+398
-143
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/operators/math/sampler.cc
paddle/fluid/operators/math/sampler.cc
+9
-54
paddle/fluid/operators/math/sampler.h
paddle/fluid/operators/math/sampler.h
+9
-4
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+58
-10
paddle/fluid/operators/nce_op.h
paddle/fluid/operators/nce_op.h
+139
-43
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+70
-25
python/paddle/fluid/tests/unittests/test_nce.py
python/paddle/fluid/tests/unittests/test_nce.py
+112
-6
未找到文件。
paddle/fluid/API.spec
浏览文件 @
487ee36a
...
@@ -97,7 +97,7 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
...
@@ -97,7 +97,7 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed'
], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0
))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed'
, 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False
))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
...
...
paddle/fluid/operators/math/sampler.cc
浏览文件 @
487ee36a
...
@@ -60,75 +60,30 @@ float LogUniformSampler::Probability(int64_t value) const {
...
@@ -60,75 +60,30 @@ float LogUniformSampler::Probability(int64_t value) const {
return
(
log
((
value
+
2.0
)
/
(
value
+
1.0
)))
/
log_range_
;
return
(
log
((
value
+
2.0
)
/
(
value
+
1.0
)))
/
log_range_
;
}
}
CustomSampler
::
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
CustomSampler
::
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
const
int
*
alias
,
const
float
*
alias_probabilities
,
unsigned
int
seed
)
unsigned
int
seed
)
:
Sampler
(
range
,
seed
)
{
:
Sampler
(
range
,
seed
)
{
random_engine_
=
std
::
make_shared
<
std
::
mt19937
_64
>
(
seed_
);
random_engine_
=
std
::
make_shared
<
std
::
mt19937
>
(
seed_
);
real_dist_
=
std
::
make_shared
<
std
::
uniform_real_distribution
<>>
(
0
,
1
);
real_dist_
=
std
::
make_shared
<
std
::
uniform_real_distribution
<>>
(
0
,
1
);
int_dist_
=
std
::
make_shared
<
std
::
uniform_int_distribution
<>>
(
0
,
range
);
int_dist_
=
std
::
make_shared
<
std
::
uniform_int_distribution
<>>
(
0
,
range
);
alias_probs_
=
std
::
make_shared
<
std
::
vector
<
float
>>
(
range
+
1
);
alias_
=
std
::
make_shared
<
std
::
vector
<
int64_t
>>
(
range
+
1
);
probs_
=
std
::
make_shared
<
std
::
vector
<
float
>>
(
range
+
1
);
std
::
queue
<
std
::
pair
<
int64_t
,
float
>>
bigs
;
std
::
queue
<
std
::
pair
<
int64_t
,
float
>>
littles
;
for
(
int64_t
i
=
0
;
i
<=
range
;
++
i
)
{
(
*
probs_
)[
i
]
=
probabilities
[
i
];
float
normal_prob
=
probabilities
[
i
]
*
(
range
+
1
);
if
(
normal_prob
-
1.0
>
1e-4
)
{
bigs
.
emplace
(
i
,
normal_prob
);
}
else
if
(
1.0
-
normal_prob
>
1e-4
)
{
littles
.
emplace
(
i
,
normal_prob
);
}
else
{
(
*
alias_probs_
)[
i
]
=
normal_prob
;
(
*
alias_
)[
i
]
=
-
1
;
}
}
while
((
!
littles
.
empty
())
&&
(
!
bigs
.
empty
()))
{
auto
big
=
bigs
.
front
();
auto
little
=
littles
.
front
();
bigs
.
pop
();
littles
.
pop
();
(
*
alias_probs_
)[
little
.
first
]
=
little
.
second
;
(
*
alias_
)[
little
.
first
]
=
big
.
first
;
auto
big_left
=
big
.
second
-
(
1
-
little
.
second
);
if
(
big_left
-
1.0
>
1e-4
)
{
bigs
.
emplace
(
big
.
first
,
big_left
);
}
else
if
(
1.0
-
big_left
>
1e-4
)
{
littles
.
emplace
(
big
.
first
,
big_left
);
}
else
{
(
*
alias_probs_
)[
big
.
first
]
=
big_left
;
(
*
alias_
)[
big
.
first
]
=
-
1
;
}
}
if
(
!
littles
.
empty
())
{
// littles.second is close to 1.0
alias_probs_
=
alias_probabilities
;
auto
little
=
littles
.
front
();
probs_
=
probabilities
;
(
*
alias_probs_
)[
little
.
first
]
=
1.0
;
alias_
=
alias
;
(
*
alias_
)[
little
.
first
]
=
-
1
;
}
if
(
!
bigs
.
empty
())
{
// bigs.second is close to 1.0
auto
big
=
bigs
.
front
();
(
*
alias_probs_
)[
big
.
first
]
=
1.0
;
(
*
alias_
)[
big
.
first
]
=
-
1
;
}
}
}
int64_t
CustomSampler
::
Sample
()
const
{
int64_t
CustomSampler
::
Sample
()
const
{
auto
index
=
(
*
int_dist_
)(
*
random_engine_
);
auto
index
=
(
*
int_dist_
)(
*
random_engine_
);
auto
p
=
(
*
real_dist_
)(
*
random_engine_
);
auto
p
=
(
*
real_dist_
)(
*
random_engine_
);
if
(
p
>
(
*
alias_probs_
)
[
index
])
{
if
(
p
>
alias_probs_
[
index
])
{
return
(
*
alias_
)
[
index
];
return
alias_
[
index
];
}
else
{
}
else
{
return
index
;
return
index
;
}
}
}
}
float
CustomSampler
::
Probability
(
int64_t
value
)
const
{
float
CustomSampler
::
Probability
(
int64_t
value
)
const
{
return
probs_
[
value
];
}
return
(
*
probs_
)[
value
];
}
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/operators/math/sampler.h
浏览文件 @
487ee36a
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <cstdint>
#include <cstdint>
#include <memory>
#include <memory>
#include <random>
#include <random>
...
@@ -38,9 +39,12 @@ class Sampler {
...
@@ -38,9 +39,12 @@ class Sampler {
seed_
=
seed
;
seed_
=
seed
;
}
}
}
}
virtual
~
Sampler
();
virtual
~
Sampler
();
// Sample a single value
// Sample a single value
virtual
int64_t
Sample
()
const
=
0
;
virtual
int64_t
Sample
()
const
=
0
;
// The probability that a single call to Sample() returns the given value.
// The probability that a single call to Sample() returns the given value.
virtual
float
Probability
(
int64_t
value
)
const
=
0
;
virtual
float
Probability
(
int64_t
value
)
const
=
0
;
...
@@ -99,6 +103,7 @@ class LogUniformSampler : public Sampler {
...
@@ -99,6 +103,7 @@ class LogUniformSampler : public Sampler {
class
CustomSampler
:
public
Sampler
{
class
CustomSampler
:
public
Sampler
{
public:
public:
explicit
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
explicit
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
const
int
*
alias
,
const
float
*
alias_probabilities
,
unsigned
int
seed
=
0UL
);
unsigned
int
seed
=
0UL
);
~
CustomSampler
()
override
{}
~
CustomSampler
()
override
{}
...
@@ -108,10 +113,10 @@ class CustomSampler : public Sampler {
...
@@ -108,10 +113,10 @@ class CustomSampler : public Sampler {
float
Probability
(
int64_t
value
)
const
override
;
float
Probability
(
int64_t
value
)
const
override
;
private:
private:
std
::
shared_ptr
<
std
::
vector
<
float
>>
alias_probs_
;
const
float
*
alias_probs_
;
std
::
shared_ptr
<
std
::
vector
<
int64_t
>>
alias_
;
const
int
*
alias_
;
std
::
shared_ptr
<
std
::
vector
<
float
>>
probs_
;
const
float
*
probs_
;
std
::
shared_ptr
<
std
::
mt19937
_64
>
random_engine_
;
std
::
shared_ptr
<
std
::
mt19937
>
random_engine_
;
std
::
shared_ptr
<
std
::
uniform_real_distribution
<>>
real_dist_
;
std
::
shared_ptr
<
std
::
uniform_real_distribution
<>>
real_dist_
;
std
::
shared_ptr
<
std
::
uniform_int_distribution
<>>
int_dist_
;
std
::
shared_ptr
<
std
::
uniform_int_distribution
<>>
int_dist_
;
};
};
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
487ee36a
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/nce_op.h"
#include "paddle/fluid/operators/nce_op.h"
#include <string>
#include <vector>
#include <vector>
namespace
paddle
{
namespace
paddle
{
...
@@ -25,7 +26,7 @@ class NCEOp : public framework::OperatorWithKernel {
...
@@ -25,7 +26,7 @@ class NCEOp : public framework::OperatorWithKernel {
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
...
@@ -67,7 +68,7 @@ class NCEOp : public framework::OperatorWithKernel {
...
@@ -67,7 +68,7 @@ class NCEOp : public framework::OperatorWithKernel {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
platform
::
CPUPlace
());
platform
::
CPUPlace
());
...
@@ -101,11 +102,24 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -101,11 +102,24 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
.
AsDispensable
();
.
AsDispensable
();
AddInput
(
AddInput
(
"CustomDist
ribution
"
,
"CustomDist
Probs
"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
.
AsDispensable
();
AddInput
(
"CustomDistAlias"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddInput
(
"CustomDistAliasProbs"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddOutput
(
"Cost"
,
AddOutput
(
"Cost"
,
"(Tensor) A tensor of shape [batch_size, 1]. Cost of samples."
);
"(Tensor) A tensor of shape [batch_size, 1]. Cost of samples."
);
AddOutput
(
"SampleLogits"
,
AddOutput
(
"SampleLogits"
,
...
@@ -124,21 +138,22 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -124,21 +138,22 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
"kernel to compute grads."
"kernel to compute grads."
""
)
""
)
.
AsIntermediate
();
.
AsIntermediate
();
AddAttr
<
int
>
(
"num_total_classes"
,
AddAttr
<
int
>
(
"num_total_classes"
,
"Total number of classes in all samples."
);
"Total number of classes in all samples."
);
AddAttr
<
int
>
(
"num_neg_samples"
,
AddAttr
<
int
>
(
"num_neg_samples"
,
"The number of negative classes. The default value is 10."
)
"The number of negative classes. The default value is 10."
)
.
SetDefault
(
10
);
.
SetDefault
(
10
);
AddAttr
<
int
>
(
"sampler"
,
AddAttr
<
int
>
(
"sampler"
,
"(int) Which sampler to be used to sample negative class."
"(int) Which sampler to be used to sample negative class."
"0: Uniform; 1: LogUniform; 2: CostumDist."
)
"0: Uniform; 1: LogUniform; 2: CostumDist."
)
.
SetDefault
(
0
);
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"seed"
,
AddAttr
<
int
>
(
"seed"
,
"(int) The seed used in sampler. If it is 0, "
"(int) The seed used in sampler. If it is 0, "
"the sampler will generate a seed randomly."
)
"the sampler will generate a seed randomly."
)
.
SetDefault
(
0
);
.
SetDefault
(
0
);
AddAttr
<
bool
>
(
"is_sparse"
,
"(boolean, default false) Sparse update."
)
.
SetDefault
(
false
);
AddAttr
<
std
::
vector
<
int
>>
(
"custom_neg_classes"
,
AddAttr
<
std
::
vector
<
int
>>
(
"custom_neg_classes"
,
"This attribute only be used in unitest. Classes "
"This attribute only be used in unitest. Classes "
...
@@ -156,11 +171,19 @@ By default this operator uses a uniform distribution for sampling.
...
@@ -156,11 +171,19 @@ By default this operator uses a uniform distribution for sampling.
}
}
};
};
class
NCEOpGradDescMaker
:
public
framework
::
DefaultGradOpDescMaker
<
true
>
{
using
::
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>::
DefaultGradOpDescMaker
;
protected:
virtual
std
::
string
GradOpType
()
const
{
return
"nce_grad"
;
}
};
class
NCEOpGrad
:
public
framework
::
OperatorWithKernel
{
class
NCEOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cost"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cost"
));
...
@@ -190,20 +213,45 @@ class NCEOpGrad : public framework::OperatorWithKernel {
...
@@ -190,20 +213,45 @@ class NCEOpGrad : public framework::OperatorWithKernel {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
platform
::
CPUPlace
());
platform
::
CPUPlace
());
}
}
};
};
class
NCEOpGradVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
weight_grad
=
op_desc
.
Output
(
framework
::
GradVarName
(
"Weight"
)).
front
();
auto
bias_grad
=
op_desc
.
Output
(
framework
::
GradVarName
(
"Bias"
)).
front
();
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
if
(
is_sparse
)
{
VLOG
(
30
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to SelectedRows"
;
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
VLOG
(
30
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to LoDTensor"
;
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
block
->
Var
(
weight_grad
)
->
SetDataType
(
block
->
Var
(
"Input"
)
->
GetDataType
());
block
->
Var
(
bias_grad
)
->
SetDataType
(
block
->
Var
(
"Input"
)
->
GetDataType
());
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpMaker
,
REGISTER_OPERATOR
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpGradDescMaker
,
ops
::
NCEOpMaker
);
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
nce_grad
,
ops
::
NCEOpGrad
,
ops
::
NCEOpGradVarTypeInference
);
REGISTER_OPERATOR
(
nce_grad
,
ops
::
NCEOpGrad
);
REGISTER_OP_CPU_KERNEL
(
nce
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
REGISTER_OP_CPU_KERNEL
(
nce
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
nce_grad
,
REGISTER_OP_CPU_KERNEL
(
nce_grad
,
...
...
paddle/fluid/operators/nce_op.h
浏览文件 @
487ee36a
...
@@ -16,26 +16,32 @@ limitations under the License. */
...
@@ -16,26 +16,32 @@ limitations under the License. */
#include <math.h>
#include <math.h>
#include <random>
#include <random>
#include <set>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "unsupported/Eigen/CXX11/Tensor"
#include "unsupported/Eigen/CXX11/Tensor"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
SelectedRows
=
framework
::
SelectedRows
;
using
Sampler
=
math
::
Sampler
;
using
Sampler
=
math
::
Sampler
;
using
DDim
=
framework
::
DDim
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
void
PrepareSamples
(
const
framework
::
ExecutionContext
&
context
,
void
PrepareSamples
(
const
framework
::
ExecutionContext
&
context
,
Sampler
*
sampler
)
{
Sampler
*
sampler
)
{
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
auto
label_dims
=
label
->
dims
();
auto
label_dims
=
label
->
dims
();
// int num_total_classes = context.Attr<int>("num_total_classes");
// int num_total_classes = context.Attr<int>("num_total_classes");
// for unitest
// for unitest
...
@@ -44,7 +50,7 @@ void PrepareSamples(const framework::ExecutionContext& context,
...
@@ -44,7 +50,7 @@ void PrepareSamples(const framework::ExecutionContext& context,
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
auto
sample_labels_dims
=
sample_labels
->
dims
();
auto
sample_labels_dims
=
sample_labels
->
dims
();
int64_t
*
sample_labels_data
=
int64_t
*
sample_labels_data
=
sample_labels
->
mutable_data
<
int64_t
>
(
context
.
GetPlace
());
sample_labels
->
mutable_data
<
int64_t
>
(
context
.
GetPlace
());
int
num_label
=
label_dims
.
size
()
==
2
?
label_dims
[
1
]
:
1
;
int
num_label
=
label_dims
.
size
()
==
2
?
label_dims
[
1
]
:
1
;
...
@@ -70,13 +76,13 @@ void PrepareSamples(const framework::ExecutionContext& context,
...
@@ -70,13 +76,13 @@ void PrepareSamples(const framework::ExecutionContext& context,
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
NCEKernel
:
public
framework
::
OpKernel
<
T
>
{
class
NCEKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
int
num_total_classes
=
context
.
Attr
<
int
>
(
"num_total_classes"
);
int
num_total_classes
=
context
.
Attr
<
int
>
(
"num_total_classes"
);
int
num_neg_samples
=
context
.
Attr
<
int
>
(
"num_neg_samples"
);
int
num_neg_samples
=
context
.
Attr
<
int
>
(
"num_neg_samples"
);
Sampler
*
sampler
;
Sampler
*
sampler
;
switch
(
sampler_type
)
{
switch
(
sampler_type
)
{
case
0
:
{
case
0
:
{
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
...
@@ -87,11 +93,19 @@ class NCEKernel : public framework::OpKernel<T> {
...
@@ -87,11 +93,19 @@ class NCEKernel : public framework::OpKernel<T> {
break
;
break
;
}
}
case
2
:
{
case
2
:
{
auto
custom_dist
=
context
.
Input
<
Tensor
>
(
"CustomDistribution"
);
auto
dist_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistProbs"
);
const
float
*
custom_dist_data
=
custom_dist
->
data
<
float
>
();
auto
dist_alias
=
context
.
Input
<
Tensor
>
(
"CustomDistAlias"
);
PADDLE_ENFORCE_EQ
(
custom_dist
->
numel
(),
num_total_classes
);
auto
dist_alias_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistAliasProbs"
);
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
custom_dist_data
,
seed
);
PADDLE_ENFORCE_EQ
(
dist_probs
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias_probs
->
numel
(),
num_total_classes
);
const
float
*
probs_data
=
dist_probs
->
data
<
float
>
();
const
int
*
alias_data
=
dist_alias
->
data
<
int
>
();
const
float
*
alias_probs_data
=
dist_alias_probs
->
data
<
float
>
();
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
probs_data
,
alias_data
,
alias_probs_data
,
seed
);
break
;
break
;
}
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
...
@@ -99,17 +113,17 @@ class NCEKernel : public framework::OpKernel<T> {
...
@@ -99,17 +113,17 @@ class NCEKernel : public framework::OpKernel<T> {
PrepareSamples
<
DeviceContext
,
T
>
(
context
,
sampler
);
PrepareSamples
<
DeviceContext
,
T
>
(
context
,
sampler
);
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
auto
sample_out
=
context
.
Output
<
Tensor
>
(
"SampleLogits"
);
auto
sample_out
=
context
.
Output
<
Tensor
>
(
"SampleLogits"
);
T
*
sample_out_data
=
sample_out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
sample_out_data
=
sample_out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
const
T
*
sample_weight_data
=
nullptr
;
const
T
*
sample_weight_data
=
nullptr
;
if
(
sample_weight
!=
nullptr
)
{
if
(
sample_weight
!=
nullptr
)
{
sample_weight_data
=
sample_weight
->
data
<
T
>
();
sample_weight_data
=
sample_weight
->
data
<
T
>
();
}
}
auto
out
=
context
.
Output
<
Tensor
>
(
"Cost"
);
auto
out
=
context
.
Output
<
Tensor
>
(
"Cost"
);
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int64_t
num_true_class
=
1
;
int64_t
num_true_class
=
1
;
if
(
label
!=
nullptr
)
{
if
(
label
!=
nullptr
)
{
num_true_class
=
label
->
dims
()[
1
];
num_true_class
=
label
->
dims
()[
1
];
...
@@ -119,7 +133,7 @@ class NCEKernel : public framework::OpKernel<T> {
...
@@ -119,7 +133,7 @@ class NCEKernel : public framework::OpKernel<T> {
// forward bias
// forward bias
auto
bias
=
context
.
Input
<
Tensor
>
(
"Bias"
);
auto
bias
=
context
.
Input
<
Tensor
>
(
"Bias"
);
if
(
bias
!=
nullptr
)
{
if
(
bias
!=
nullptr
)
{
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
bias_data
=
bias
->
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
sample_out_data
[
i
]
=
bias_data
[
sample_labels_data
[
i
]];
sample_out_data
[
i
]
=
bias_data
[
sample_labels_data
[
i
]];
}
}
...
@@ -158,16 +172,16 @@ class NCEKernel : public framework::OpKernel<T> {
...
@@ -158,16 +172,16 @@ class NCEKernel : public framework::OpKernel<T> {
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
NCEGradKernel
:
public
framework
::
OpKernel
<
T
>
{
class
NCEGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
d_out
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Cost"
));
auto
d_out
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Cost"
));
const
T
*
d_out_data
=
d_out
->
data
<
T
>
();
const
T
*
d_out_data
=
d_out
->
data
<
T
>
();
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
sample_out
=
context
.
Input
<
Tensor
>
(
"SampleLogits"
);
auto
sample_out
=
context
.
Input
<
Tensor
>
(
"SampleLogits"
);
const
T
*
sample_out_data
=
sample_out
->
data
<
T
>
();
const
T
*
sample_out_data
=
sample_out
->
data
<
T
>
();
auto
sample_labels
=
context
.
Input
<
Tensor
>
(
"SampleLabels"
);
auto
sample_labels
=
context
.
Input
<
Tensor
>
(
"SampleLabels"
);
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
const
T
*
sample_weight_data
=
nullptr
;
const
T
*
sample_weight_data
=
nullptr
;
if
(
sample_weight
!=
nullptr
)
{
if
(
sample_weight
!=
nullptr
)
{
sample_weight_data
=
sample_weight
->
data
<
T
>
();
sample_weight_data
=
sample_weight
->
data
<
T
>
();
}
}
...
@@ -180,7 +194,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
...
@@ -180,7 +194,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
Sampler
*
sampler
;
Sampler
*
sampler
;
switch
(
sampler_type
)
{
switch
(
sampler_type
)
{
case
0
:
{
case
0
:
{
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
...
@@ -191,11 +205,19 @@ class NCEGradKernel : public framework::OpKernel<T> {
...
@@ -191,11 +205,19 @@ class NCEGradKernel : public framework::OpKernel<T> {
break
;
break
;
}
}
case
2
:
{
case
2
:
{
auto
custom_dist
=
context
.
Input
<
Tensor
>
(
"CustomDistribution"
);
auto
dist_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistProbs"
);
const
float
*
custom_dist_data
=
custom_dist
->
data
<
float
>
();
auto
dist_alias
=
context
.
Input
<
Tensor
>
(
"CustomDistAlias"
);
PADDLE_ENFORCE_EQ
(
custom_dist
->
numel
(),
num_total_classes
);
auto
dist_alias_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistAliasProbs"
);
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
custom_dist_data
,
seed
);
PADDLE_ENFORCE_EQ
(
dist_probs
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias_probs
->
numel
(),
num_total_classes
);
const
float
*
probs_data
=
dist_probs
->
data
<
float
>
();
const
int
*
alias_data
=
dist_alias
->
data
<
int
>
();
const
float
*
alias_probs_data
=
dist_alias_probs
->
data
<
float
>
();
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
probs_data
,
alias_data
,
alias_probs_data
,
seed
);
break
;
break
;
}
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
...
@@ -203,7 +225,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
...
@@ -203,7 +225,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
// T b = 1. / num_total_classes * num_neg_samples;
// T b = 1. / num_total_classes * num_neg_samples;
Tensor
sample_grad
;
// tmp tensor
Tensor
sample_grad
;
// tmp tensor
T
*
sample_grad_data
=
T
*
sample_grad_data
=
sample_grad
.
mutable_data
<
T
>
(
sample_labels
->
dims
(),
context
.
GetPlace
());
sample_grad
.
mutable_data
<
T
>
(
sample_labels
->
dims
(),
context
.
GetPlace
());
// backward cost
// backward cost
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
...
@@ -217,32 +239,105 @@ class NCEGradKernel : public framework::OpKernel<T> {
...
@@ -217,32 +239,105 @@ class NCEGradKernel : public framework::OpKernel<T> {
:
w
*
(
o
*
(
1
-
o
)
/
(
o
+
b
));
:
w
*
(
o
*
(
1
-
o
)
/
(
o
+
b
));
sample_grad_data
[
i
]
*=
d_out_data
[
sample_idx
];
sample_grad_data
[
i
]
*=
d_out_data
[
sample_idx
];
}
}
// get d_bias
auto
d_bias
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
if
(
d_bias
!=
nullptr
)
{
T
*
d_bias_data
=
d_bias
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
!
is_sparse
)
{
std
::
fill
(
d_bias_data
,
d_bias_data
+
d_bias
->
numel
(),
0.0
);
// get d_bias
auto
d_bias
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
if
(
d_bias
!=
nullptr
)
{
T
*
d_bias_data
=
d_bias
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
d_bias
->
numel
(),
0.0
);
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
sample_labels_data
[
i
]]
+=
sample_grad_data
[
i
];
}
}
// get d_w
auto
d_w
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
if
(
d_w
!=
nullptr
)
{
auto
d_w_data
=
d_w
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_w
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_w
);
auto
x_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Input"
)));
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_w_matrix
.
chip
(
sample_labels_data
[
i
],
0
)
+=
x_matrix
.
chip
(
static_cast
<
int
>
(
i
/
sample_labels
->
dims
()[
1
]),
0
)
*
sample_grad_data
[
i
];
}
}
}
else
{
std
::
vector
<
int64_t
>
labels
;
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
sample_labels_data
[
i
]]
+=
sample_grad_data
[
i
]
;
labels
.
push_back
(
sample_labels_data
[
i
])
;
}
}
}
std
::
set
<
T
>
st
(
labels
.
begin
(),
labels
.
end
());
// get d_w
labels
.
assign
(
st
.
begin
(),
st
.
end
());
auto
d_w
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
if
(
d_w
!=
nullptr
)
{
auto
*
bias_var
=
context
.
InputVar
(
"Bias"
);
auto
d_w_data
=
d_w
->
mutable_data
<
T
>
(
context
.
GetPlace
());
DDim
bias_dim
;
std
::
fill
(
d_w_data
,
d_w_data
+
d_w
->
numel
(),
0.0
);
if
(
bias_var
->
IsType
<
LoDTensor
>
())
{
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_w
);
bias_dim
=
context
.
Input
<
LoDTensor
>
(
"Bias"
)
->
dims
();
}
else
if
(
bias_var
->
IsType
<
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"Bias"
);
bias_dim
=
table_t
->
value
().
dims
();
}
else
{
PADDLE_THROW
(
"The parameter Bias of a NCE_OP "
"must be either LoDTensor or SelectedRows"
);
}
auto
d_bias
=
context
.
Output
<
SelectedRows
>
(
framework
::
GradVarName
(
"Bias"
));
d_bias
->
set_rows
(
labels
);
d_bias
->
set_height
(
bias_dim
[
0
]);
d_bias
->
mutable_value
()
->
Resize
(
{
static_cast
<
int64_t
>
(
labels
.
size
()),
bias_dim
[
1
]});
T
*
d_bias_data
=
d_bias
->
mutable_value
()
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
labels
.
size
(),
0.0
);
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
d_bias
->
Index
(
sample_labels_data
[
i
])]
+=
sample_grad_data
[
i
];
}
auto
*
table_var
=
context
.
InputVar
(
"Weight"
);
DDim
table_dim
;
if
(
table_var
->
IsType
<
LoDTensor
>
())
{
table_dim
=
context
.
Input
<
LoDTensor
>
(
"Weight"
)
->
dims
();
}
else
if
(
table_var
->
IsType
<
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"Weight"
);
table_dim
=
table_t
->
value
().
dims
();
}
else
{
PADDLE_THROW
(
"The parameter Weight of a NCE_OP "
"must be either LoDTensor or SelectedRows"
);
}
auto
d_w
=
context
.
Output
<
SelectedRows
>
(
framework
::
GradVarName
(
"Weight"
));
d_w
->
set_rows
(
labels
);
d_w
->
set_height
(
table_dim
[
0
]);
auto
*
d_table_value
=
d_w
->
mutable_value
();
d_table_value
->
Resize
(
{
static_cast
<
int64_t
>
(
labels
.
size
()),
table_dim
[
1
]});
auto
d_w_data
=
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_table_value
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_table_value
);
auto
x_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Input"
)));
auto
x_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Input"
)));
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_w_matrix
.
chip
(
sample_labels_data
[
i
]
,
0
)
+=
d_w_matrix
.
chip
(
d_w
->
Index
(
sample_labels_data
[
i
])
,
0
)
+=
x_matrix
.
chip
(
static_cast
<
int
>
(
i
/
sample_labels
->
dims
()[
1
]),
0
)
*
x_matrix
.
chip
(
static_cast
<
int
>
(
i
/
sample_labels
->
dims
()[
1
]),
0
)
*
sample_grad_data
[
i
];
sample_grad_data
[
i
];
}
}
}
}
// get d_x
// get d_x
auto
d_x
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
auto
d_x
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
if
(
d_x
!=
nullptr
)
{
if
(
d_x
!=
nullptr
)
{
auto
*
d_x_data
=
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
d_x_data
=
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_x_data
,
d_x_data
+
d_x
->
numel
(),
0.0
);
std
::
fill
(
d_x_data
,
d_x_data
+
d_x
->
numel
(),
0.0
);
auto
d_x_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_x
);
auto
d_x_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_x
);
auto
w_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Weight"
)));
auto
w_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Weight"
)));
...
@@ -251,6 +346,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
...
@@ -251,6 +346,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
w_matrix
.
chip
(
sample_labels_data
[
i
],
0
)
*
sample_grad_data
[
i
];
w_matrix
.
chip
(
sample_labels_data
[
i
],
0
)
*
sample_grad_data
[
i
];
}
}
}
}
delete
sampler
;
delete
sampler
;
}
}
};
};
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
487ee36a
...
@@ -4549,7 +4549,8 @@ def nce(input,
...
@@ -4549,7 +4549,8 @@ def nce(input,
name
=
None
,
name
=
None
,
sampler
=
"uniform"
,
sampler
=
"uniform"
,
custom_dist
=
None
,
custom_dist
=
None
,
seed
=
0
):
seed
=
0
,
is_sparse
=
False
):
"""
"""
${comment}
${comment}
...
@@ -4575,11 +4576,12 @@ def nce(input,
...
@@ -4575,11 +4576,12 @@ def nce(input,
sampler (str): The sampler used to sample class from negtive classes.
sampler (str): The sampler used to sample class from negtive classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'.
default: 'uniform'.
custom_dist (
Variable): A tensor with shape [num_total_classes]
.
custom_dist (
float[]): A float[] with size=num_total_classes
.
It is used when sampler is set to 'custom_dist'.
It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probsbility of i-th class to be sampled.
custom_dist[i] is the probsbility of i-th class to be sampled.
default: None.
default: None.
seed (int): The seed used in sampler. default: 0.
seed (int): The seed used in sampler. default: 0.
is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows.
Returns:
Returns:
Variable: The output nce loss.
Variable: The output nce loss.
...
@@ -4631,12 +4633,7 @@ def nce(input,
...
@@ -4631,12 +4633,7 @@ def nce(input,
shape
=
[
num_total_classes
,
dim
],
shape
=
[
num_total_classes
,
dim
],
is_bias
=
False
,
is_bias
=
False
,
dtype
=
input
.
dtype
)
dtype
=
input
.
dtype
)
inputs
=
{
inputs
=
{}
'Input'
:
input
,
'Label'
:
label
,
'Weight'
:
w
,
'SampleWeight'
:
sample_weight
if
sample_weight
is
not
None
else
[]
}
if
helper
.
bias_attr
:
if
helper
.
bias_attr
:
b
=
helper
.
create_parameter
(
b
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
attr
=
helper
.
bias_attr
,
...
@@ -4648,18 +4645,10 @@ def nce(input,
...
@@ -4648,18 +4645,10 @@ def nce(input,
sample_logits
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
sample_logits
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
sample_labels
=
helper
.
create_variable_for_type_inference
(
dtype
=
label
.
dtype
)
sample_labels
=
helper
.
create_variable_for_type_inference
(
dtype
=
label
.
dtype
)
if
num_neg_samples
is
None
:
inputs
[
'Input'
]
=
input
num_neg_samples
=
10
inputs
[
'Label'
]
=
label
else
:
inputs
[
'Weight'
]
=
w
num_neg_samples
=
int
(
num_neg_samples
)
inputs
[
'SampleWeight'
]
=
sample_weight
if
sample_weight
is
not
None
else
[]
inputs
=
{
'Input'
:
input
,
'Label'
:
label
,
'Weight'
:
w
,
'Bias'
:
b
,
'SampleWeight'
:
sample_weight
if
sample_weight
is
not
None
else
[]
}
if
sampler
==
"uniform"
:
if
sampler
==
"uniform"
:
sampler
=
0
sampler
=
0
...
@@ -4667,17 +4656,73 @@ def nce(input,
...
@@ -4667,17 +4656,73 @@ def nce(input,
sampler
=
1
sampler
=
1
elif
sampler
==
"custom_dist"
:
elif
sampler
==
"custom_dist"
:
assert
custom_dist
is
not
None
assert
custom_dist
is
not
None
assert
isinstance
(
custom_dist
,
Variable
)
# assert isinstance(custom_dist, Variable)
inputs
[
'CustomDistribution'
]
=
custom_dist
custom_dist_len
=
len
(
custom_dist
)
alias_probs_
=
[
0
]
*
custom_dist_len
alias_
=
[
0
]
*
custom_dist_len
bigs
=
[]
littles
=
[]
for
i
in
range
(
custom_dist_len
):
normal_prob
=
custom_dist
[
i
]
*
custom_dist_len
if
normal_prob
-
1.0
>
1e-4
:
bigs
.
append
((
i
,
normal_prob
))
elif
1.0
-
normal_prob
>
1e-4
:
littles
.
append
((
i
,
normal_prob
))
else
:
alias_probs_
[
i
]
=
normal_prob
alias_
[
i
]
=
-
1
while
len
(
bigs
)
and
len
(
littles
):
big
=
bigs
.
pop
(
0
)
little
=
littles
.
pop
(
0
)
big_idx
=
big
[
0
]
big_prob
=
big
[
1
]
alias_probs_
[
little
[
0
]]
=
little
[
1
]
alias_
[
little
[
0
]]
=
big_idx
big_left
=
big
[
1
]
+
little
[
1
]
-
1
if
big_left
-
1.0
>
1e-4
:
bigs
.
append
((
big_idx
,
big_left
))
elif
1.0
-
big_left
>
1e-4
:
littles
.
append
((
big_idx
,
big_left
))
else
:
alias_probs_
[
big_idx
]
=
big_left
alias_
[
big_idx
]
=
-
1
if
len
(
bigs
):
big
=
bigs
.
pop
(
0
)
alias_probs_
[
big
[
0
]]
=
1.0
alias_
[
big
[
0
]]
=
-
1
if
len
(
littles
):
little
=
littles
.
pop
(
0
)
alias_probs_
[
little
[
0
]]
=
1.0
alias_
[
little
[
0
]]
=
-
1
probs
=
assign
(
input
=
np
.
array
(
custom_dist
).
astype
(
'float32'
))
custom_alias
=
assign
(
input
=
np
.
array
(
alias_
).
astype
(
'int32'
))
custom_alias_probs
=
assign
(
input
=
np
.
array
(
alias_probs_
).
astype
(
'float32'
))
inputs
[
'CustomDistProbs'
]
=
probs
inputs
[
'CustomDistAlias'
]
=
custom_alias
inputs
[
'CustomDistAliasProbs'
]
=
custom_alias_probs
sampler
=
2
sampler
=
2
else
:
else
:
raise
Exception
(
"Unsupported sampler type."
)
raise
Exception
(
"Unsupported sampler type."
)
if
num_neg_samples
is
None
:
num_neg_samples
=
10
else
:
num_neg_samples
=
int
(
num_neg_samples
)
attrs
=
{
attrs
=
{
'num_total_classes'
:
int
(
num_total_classes
),
'num_total_classes'
:
int
(
num_total_classes
),
'num_neg_samples'
:
num_neg_samples
,
'num_neg_samples'
:
num_neg_samples
,
'seed'
:
seed
,
'seed'
:
seed
,
'sampler'
:
sampler
'sampler'
:
sampler
,
'is_sparse'
:
is_sparse
}
}
helper
.
append_op
(
helper
.
append_op
(
...
@@ -6629,7 +6674,7 @@ def crop(x, shape=None, offsets=None, name=None):
...
@@ -6629,7 +6674,7 @@ def crop(x, shape=None, offsets=None, name=None):
helper
=
LayerHelper
(
'crop'
,
**
locals
())
helper
=
LayerHelper
(
'crop'
,
**
locals
())
if
not
(
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
)
or
\
if
not
(
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
)
or
\
isinstance
(
shape
,
Variable
)):
isinstance
(
shape
,
Variable
)):
raise
ValueError
(
"The shape should be a list, tuple or Variable."
)
raise
ValueError
(
"The shape should be a list, tuple or Variable."
)
if
offsets
is
None
:
if
offsets
is
None
:
...
@@ -6751,7 +6796,7 @@ def affine_grid(theta, out_shape, name=None):
...
@@ -6751,7 +6796,7 @@ def affine_grid(theta, out_shape, name=None):
helper
=
LayerHelper
(
'affine_grid'
)
helper
=
LayerHelper
(
'affine_grid'
)
if
not
(
isinstance
(
out_shape
,
list
)
or
isinstance
(
out_shape
,
tuple
)
or
\
if
not
(
isinstance
(
out_shape
,
list
)
or
isinstance
(
out_shape
,
tuple
)
or
\
isinstance
(
out_shape
,
Variable
)):
isinstance
(
out_shape
,
Variable
)):
raise
ValueError
(
"The out_shape should be a list, tuple or Variable."
)
raise
ValueError
(
"The out_shape should be a list, tuple or Variable."
)
if
not
isinstance
(
theta
,
Variable
):
if
not
isinstance
(
theta
,
Variable
):
...
...
python/paddle/fluid/tests/unittests/test_nce.py
浏览文件 @
487ee36a
...
@@ -14,8 +14,12 @@
...
@@ -14,8 +14,12 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
numpy
as
np
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.initializer
as
initializer
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -59,7 +63,7 @@ def nce(input, weight, bias, sample_weight, labels, num_classes,
...
@@ -59,7 +63,7 @@ def nce(input, weight, bias, sample_weight, labels, num_classes,
class
TestNCE
(
OpTest
):
class
TestNCE
(
OpTest
):
def
generate_data
(
self
,
dim
,
batch_size
,
num_classes
,
num_true_class
,
def
generate_data
(
self
,
dim
,
batch_size
,
num_classes
,
num_true_class
,
num_neg_samples
):
num_neg_samples
,
is_sparse
):
input
=
np
.
random
.
randn
(
batch_size
,
dim
).
astype
(
np
.
float32
)
input
=
np
.
random
.
randn
(
batch_size
,
dim
).
astype
(
np
.
float32
)
weight
=
np
.
random
.
randn
(
num_classes
,
dim
).
astype
(
np
.
float32
)
weight
=
np
.
random
.
randn
(
num_classes
,
dim
).
astype
(
np
.
float32
)
bias
=
np
.
random
.
randn
(
num_classes
).
astype
(
np
.
float32
)
bias
=
np
.
random
.
randn
(
num_classes
).
astype
(
np
.
float32
)
...
@@ -70,7 +74,8 @@ class TestNCE(OpTest):
...
@@ -70,7 +74,8 @@ class TestNCE(OpTest):
'num_neg_samples'
:
num_neg_samples
,
'num_neg_samples'
:
num_neg_samples
,
'custom_neg_classes'
:
list
(
range
(
num_neg_samples
)),
'custom_neg_classes'
:
list
(
range
(
num_neg_samples
)),
'seed'
:
0
,
'seed'
:
0
,
'sampler'
:
0
'sampler'
:
0
,
'is_sparse'
:
is_sparse
}
}
self
.
inputs
=
{
self
.
inputs
=
{
'Input'
:
input
,
'Input'
:
input
,
...
@@ -81,7 +86,7 @@ class TestNCE(OpTest):
...
@@ -81,7 +86,7 @@ class TestNCE(OpTest):
}
}
def
set_data
(
self
):
def
set_data
(
self
):
self
.
generate_data
(
5
,
5
,
4
,
1
,
2
)
self
.
generate_data
(
5
,
5
,
4
,
1
,
2
,
False
)
def
compute
(
self
):
def
compute
(
self
):
out
=
nce
(
self
.
inputs
[
'Input'
],
self
.
inputs
[
'Weight'
],
out
=
nce
(
self
.
inputs
[
'Input'
],
self
.
inputs
[
'Weight'
],
...
@@ -107,9 +112,110 @@ class TestNCE(OpTest):
...
@@ -107,9 +112,110 @@ class TestNCE(OpTest):
[
"Input"
,
"Weight"
,
"Bias"
],
"Cost"
,
max_relative_error
=
0.02
)
[
"Input"
,
"Weight"
,
"Bias"
],
"Cost"
,
max_relative_error
=
0.02
)
class
TestNCECase1
(
TestNCE
):
class
TestNCECase1
Tensor
(
TestNCE
):
def
set_data
(
self
):
def
set_data
(
self
):
self
.
generate_data
(
10
,
20
,
10
,
2
,
5
)
self
.
generate_data
(
10
,
20
,
10
,
2
,
5
,
False
)
class
TestNCECase1SelectedRows
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
base_lr
=
0.0001
self
.
batch_size
=
8
@
staticmethod
def
get_place
():
place
=
fluid
.
core
.
CPUPlace
()
return
place
@
staticmethod
def
get_train_data
(
batch_size
):
batchs
=
[]
for
i
in
range
(
batch_size
):
input
=
np
.
random
.
randn
(
batch_size
,
10
).
astype
(
np
.
float32
)
labels
=
np
.
random
.
randint
(
0
,
20
,
(
batch_size
,
1
))
batchs
.
append
([
input
,
labels
])
return
batchs
def
get_optimizer
(
self
):
# SGD optimizer
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
self
.
base_lr
)
return
optimizer
def
train_network
(
self
,
num_total_classes
,
num_neg_samples
,
sampler
,
custom_dist
,
is_sparse
):
input
=
fluid
.
layers
.
data
(
name
=
"input"
,
shape
=
[
10
],
dtype
=
"float32"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
w_param
=
fluid
.
default_main_program
().
global_block
().
create_parameter
(
shape
=
[
num_total_classes
,
10
],
dtype
=
'float32'
,
name
=
'nce_w'
,
initializer
=
initializer
.
ConstantInitializer
())
b_param
=
fluid
.
default_main_program
().
global_block
().
create_parameter
(
shape
=
[
num_total_classes
,
1
],
dtype
=
'float32'
,
name
=
'nce_b'
,
initializer
=
initializer
.
ConstantInitializer
())
cost
=
fluid
.
layers
.
nce
(
input
=
input
,
label
=
label
,
num_total_classes
=
num_total_classes
,
sampler
=
sampler
,
custom_dist
=
custom_dist
,
sample_weight
=
None
,
param_attr
=
'nce_w'
,
bias_attr
=
'nce_b'
,
seed
=
1
,
num_neg_samples
=
num_neg_samples
,
is_sparse
=
is_sparse
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
# optimizer
optimizer
=
self
.
get_optimizer
()
optimizer
.
minimize
(
avg_cost
)
return
[
avg_cost
,
[
input
,
label
]]
def
test_input_is_selected_rows
(
self
):
place
=
self
.
get_place
()
exe
=
fluid
.
Executor
(
place
)
data
=
self
.
get_train_data
(
self
.
batch_size
)
nid_freq_arr
=
np
.
random
.
dirichlet
(
np
.
ones
(
20
)
*
1000
).
astype
(
'float32'
)
rets
=
[]
# for dense
dense_scope
=
fluid
.
core
.
Scope
()
dense_startup_program
=
fluid
.
framework
.
Program
()
dense_train_program
=
fluid
.
framework
.
Program
()
with
fluid
.
scope_guard
(
dense_scope
):
with
fluid
.
program_guard
(
dense_train_program
,
dense_startup_program
):
cost
,
feeds
=
self
.
train_network
(
20
,
5
,
"custom_dist"
,
nid_freq_arr
.
tolist
(),
False
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
exe
.
run
(
dense_startup_program
)
loss_val
=
exe
.
run
(
dense_train_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
.
name
])
rets
.
append
(
np
.
mean
(
loss_val
))
# for sparse
sparse_scope
=
fluid
.
core
.
Scope
()
sparse_startup_program
=
fluid
.
framework
.
Program
()
sparse_train_program
=
fluid
.
framework
.
Program
()
with
fluid
.
scope_guard
(
sparse_scope
):
with
fluid
.
program_guard
(
sparse_train_program
,
sparse_startup_program
):
cost
,
feeds
=
self
.
train_network
(
20
,
5
,
"custom_dist"
,
nid_freq_arr
.
tolist
(),
True
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
exe
.
run
(
sparse_startup_program
)
loss_val
=
exe
.
run
(
sparse_train_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
.
name
])
rets
.
append
(
np
.
mean
(
loss_val
))
self
.
assertEqual
(
rets
[
0
],
rets
[
1
])
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录