Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
487ee36a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
487ee36a
编写于
11月 27, 2018
作者:
P
phlrain
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_cudnn_lstm
上级
084ff657
56a4912b
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
398 addition
and
143 deletion
+398
-143
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/operators/math/sampler.cc
paddle/fluid/operators/math/sampler.cc
+9
-54
paddle/fluid/operators/math/sampler.h
paddle/fluid/operators/math/sampler.h
+9
-4
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+58
-10
paddle/fluid/operators/nce_op.h
paddle/fluid/operators/nce_op.h
+139
-43
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+70
-25
python/paddle/fluid/tests/unittests/test_nce.py
python/paddle/fluid/tests/unittests/test_nce.py
+112
-6
未找到文件。
paddle/fluid/API.spec
浏览文件 @
487ee36a
...
...
@@ -97,7 +97,7 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed'
], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0
))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed'
, 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False
))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
...
...
paddle/fluid/operators/math/sampler.cc
浏览文件 @
487ee36a
...
...
@@ -60,75 +60,30 @@ float LogUniformSampler::Probability(int64_t value) const {
return
(
log
((
value
+
2.0
)
/
(
value
+
1.0
)))
/
log_range_
;
}
CustomSampler
::
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
CustomSampler
::
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
const
int
*
alias
,
const
float
*
alias_probabilities
,
unsigned
int
seed
)
:
Sampler
(
range
,
seed
)
{
random_engine_
=
std
::
make_shared
<
std
::
mt19937
_64
>
(
seed_
);
random_engine_
=
std
::
make_shared
<
std
::
mt19937
>
(
seed_
);
real_dist_
=
std
::
make_shared
<
std
::
uniform_real_distribution
<>>
(
0
,
1
);
int_dist_
=
std
::
make_shared
<
std
::
uniform_int_distribution
<>>
(
0
,
range
);
alias_probs_
=
std
::
make_shared
<
std
::
vector
<
float
>>
(
range
+
1
);
alias_
=
std
::
make_shared
<
std
::
vector
<
int64_t
>>
(
range
+
1
);
probs_
=
std
::
make_shared
<
std
::
vector
<
float
>>
(
range
+
1
);
std
::
queue
<
std
::
pair
<
int64_t
,
float
>>
bigs
;
std
::
queue
<
std
::
pair
<
int64_t
,
float
>>
littles
;
for
(
int64_t
i
=
0
;
i
<=
range
;
++
i
)
{
(
*
probs_
)[
i
]
=
probabilities
[
i
];
float
normal_prob
=
probabilities
[
i
]
*
(
range
+
1
);
if
(
normal_prob
-
1.0
>
1e-4
)
{
bigs
.
emplace
(
i
,
normal_prob
);
}
else
if
(
1.0
-
normal_prob
>
1e-4
)
{
littles
.
emplace
(
i
,
normal_prob
);
}
else
{
(
*
alias_probs_
)[
i
]
=
normal_prob
;
(
*
alias_
)[
i
]
=
-
1
;
}
}
while
((
!
littles
.
empty
())
&&
(
!
bigs
.
empty
()))
{
auto
big
=
bigs
.
front
();
auto
little
=
littles
.
front
();
bigs
.
pop
();
littles
.
pop
();
(
*
alias_probs_
)[
little
.
first
]
=
little
.
second
;
(
*
alias_
)[
little
.
first
]
=
big
.
first
;
auto
big_left
=
big
.
second
-
(
1
-
little
.
second
);
if
(
big_left
-
1.0
>
1e-4
)
{
bigs
.
emplace
(
big
.
first
,
big_left
);
}
else
if
(
1.0
-
big_left
>
1e-4
)
{
littles
.
emplace
(
big
.
first
,
big_left
);
}
else
{
(
*
alias_probs_
)[
big
.
first
]
=
big_left
;
(
*
alias_
)[
big
.
first
]
=
-
1
;
}
}
if
(
!
littles
.
empty
())
{
// littles.second is close to 1.0
auto
little
=
littles
.
front
();
(
*
alias_probs_
)[
little
.
first
]
=
1.0
;
(
*
alias_
)[
little
.
first
]
=
-
1
;
}
if
(
!
bigs
.
empty
())
{
// bigs.second is close to 1.0
auto
big
=
bigs
.
front
();
(
*
alias_probs_
)[
big
.
first
]
=
1.0
;
(
*
alias_
)[
big
.
first
]
=
-
1
;
}
alias_probs_
=
alias_probabilities
;
probs_
=
probabilities
;
alias_
=
alias
;
}
int64_t
CustomSampler
::
Sample
()
const
{
auto
index
=
(
*
int_dist_
)(
*
random_engine_
);
auto
p
=
(
*
real_dist_
)(
*
random_engine_
);
if
(
p
>
(
*
alias_probs_
)
[
index
])
{
return
(
*
alias_
)
[
index
];
if
(
p
>
alias_probs_
[
index
])
{
return
alias_
[
index
];
}
else
{
return
index
;
}
}
float
CustomSampler
::
Probability
(
int64_t
value
)
const
{
return
(
*
probs_
)[
value
];
}
float
CustomSampler
::
Probability
(
int64_t
value
)
const
{
return
probs_
[
value
];
}
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sampler.h
浏览文件 @
487ee36a
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <memory>
#include <random>
...
...
@@ -38,9 +39,12 @@ class Sampler {
seed_
=
seed
;
}
}
virtual
~
Sampler
();
// Sample a single value
virtual
int64_t
Sample
()
const
=
0
;
// The probability that a single call to Sample() returns the given value.
virtual
float
Probability
(
int64_t
value
)
const
=
0
;
...
...
@@ -99,6 +103,7 @@ class LogUniformSampler : public Sampler {
class
CustomSampler
:
public
Sampler
{
public:
explicit
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
const
int
*
alias
,
const
float
*
alias_probabilities
,
unsigned
int
seed
=
0UL
);
~
CustomSampler
()
override
{}
...
...
@@ -108,10 +113,10 @@ class CustomSampler : public Sampler {
float
Probability
(
int64_t
value
)
const
override
;
private:
std
::
shared_ptr
<
std
::
vector
<
float
>>
alias_probs_
;
std
::
shared_ptr
<
std
::
vector
<
int64_t
>>
alias_
;
std
::
shared_ptr
<
std
::
vector
<
float
>>
probs_
;
std
::
shared_ptr
<
std
::
mt19937
_64
>
random_engine_
;
const
float
*
alias_probs_
;
const
int
*
alias_
;
const
float
*
probs_
;
std
::
shared_ptr
<
std
::
mt19937
>
random_engine_
;
std
::
shared_ptr
<
std
::
uniform_real_distribution
<>>
real_dist_
;
std
::
shared_ptr
<
std
::
uniform_int_distribution
<>>
int_dist_
;
};
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
487ee36a
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/nce_op.h"
#include <string>
#include <vector>
namespace
paddle
{
...
...
@@ -25,7 +26,7 @@ class NCEOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
...
...
@@ -67,7 +68,7 @@ class NCEOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
platform
::
CPUPlace
());
...
...
@@ -101,11 +102,24 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
.
AsDispensable
();
AddInput
(
"CustomDist
ribution
"
,
"CustomDist
Probs
"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddInput
(
"CustomDistAlias"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddInput
(
"CustomDistAliasProbs"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddOutput
(
"Cost"
,
"(Tensor) A tensor of shape [batch_size, 1]. Cost of samples."
);
AddOutput
(
"SampleLogits"
,
...
...
@@ -124,21 +138,22 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
"kernel to compute grads."
""
)
.
AsIntermediate
();
AddAttr
<
int
>
(
"num_total_classes"
,
"Total number of classes in all samples."
);
AddAttr
<
int
>
(
"num_neg_samples"
,
"The number of negative classes. The default value is 10."
)
.
SetDefault
(
10
);
AddAttr
<
int
>
(
"sampler"
,
"(int) Which sampler to be used to sample negative class."
"0: Uniform; 1: LogUniform; 2: CostumDist."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"seed"
,
"(int) The seed used in sampler. If it is 0, "
"the sampler will generate a seed randomly."
)
.
SetDefault
(
0
);
AddAttr
<
bool
>
(
"is_sparse"
,
"(boolean, default false) Sparse update."
)
.
SetDefault
(
false
);
AddAttr
<
std
::
vector
<
int
>>
(
"custom_neg_classes"
,
"This attribute only be used in unitest. Classes "
...
...
@@ -156,11 +171,19 @@ By default this operator uses a uniform distribution for sampling.
}
};
class
NCEOpGradDescMaker
:
public
framework
::
DefaultGradOpDescMaker
<
true
>
{
using
::
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>::
DefaultGradOpDescMaker
;
protected:
virtual
std
::
string
GradOpType
()
const
{
return
"nce_grad"
;
}
};
class
NCEOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cost"
));
...
...
@@ -190,20 +213,45 @@ class NCEOpGrad : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
platform
::
CPUPlace
());
}
};
class
NCEOpGradVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
weight_grad
=
op_desc
.
Output
(
framework
::
GradVarName
(
"Weight"
)).
front
();
auto
bias_grad
=
op_desc
.
Output
(
framework
::
GradVarName
(
"Bias"
)).
front
();
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
if
(
is_sparse
)
{
VLOG
(
30
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to SelectedRows"
;
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
VLOG
(
30
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to LoDTensor"
;
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
block
->
Var
(
weight_grad
)
->
SetDataType
(
block
->
Var
(
"Input"
)
->
GetDataType
());
block
->
Var
(
bias_grad
)
->
SetDataType
(
block
->
Var
(
"Input"
)
->
GetDataType
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
nce_grad
,
ops
::
NCEOpGrad
);
REGISTER_OPERATOR
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpGradDescMaker
,
ops
::
NCEOpMaker
);
REGISTER_OPERATOR
(
nce_grad
,
ops
::
NCEOpGrad
,
ops
::
NCEOpGradVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
nce
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
nce_grad
,
...
...
paddle/fluid/operators/nce_op.h
浏览文件 @
487ee36a
...
...
@@ -16,26 +16,32 @@ limitations under the License. */
#include <math.h>
#include <random>
#include <set>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
SelectedRows
=
framework
::
SelectedRows
;
using
Sampler
=
math
::
Sampler
;
using
DDim
=
framework
::
DDim
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
void
PrepareSamples
(
const
framework
::
ExecutionContext
&
context
,
Sampler
*
sampler
)
{
void
PrepareSamples
(
const
framework
::
ExecutionContext
&
context
,
Sampler
*
sampler
)
{
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
auto
label_dims
=
label
->
dims
();
// int num_total_classes = context.Attr<int>("num_total_classes");
// for unitest
...
...
@@ -44,7 +50,7 @@ void PrepareSamples(const framework::ExecutionContext& context,
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
auto
sample_labels_dims
=
sample_labels
->
dims
();
int64_t
*
sample_labels_data
=
int64_t
*
sample_labels_data
=
sample_labels
->
mutable_data
<
int64_t
>
(
context
.
GetPlace
());
int
num_label
=
label_dims
.
size
()
==
2
?
label_dims
[
1
]
:
1
;
...
...
@@ -70,13 +76,13 @@ void PrepareSamples(const framework::ExecutionContext& context,
template
<
typename
DeviceContext
,
typename
T
>
class
NCEKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
int
num_total_classes
=
context
.
Attr
<
int
>
(
"num_total_classes"
);
int
num_neg_samples
=
context
.
Attr
<
int
>
(
"num_neg_samples"
);
Sampler
*
sampler
;
Sampler
*
sampler
;
switch
(
sampler_type
)
{
case
0
:
{
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
...
...
@@ -87,11 +93,19 @@ class NCEKernel : public framework::OpKernel<T> {
break
;
}
case
2
:
{
auto
custom_dist
=
context
.
Input
<
Tensor
>
(
"CustomDistribution"
);
const
float
*
custom_dist_data
=
custom_dist
->
data
<
float
>
();
PADDLE_ENFORCE_EQ
(
custom_dist
->
numel
(),
num_total_classes
);
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
custom_dist_data
,
seed
);
auto
dist_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistProbs"
);
auto
dist_alias
=
context
.
Input
<
Tensor
>
(
"CustomDistAlias"
);
auto
dist_alias_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistAliasProbs"
);
PADDLE_ENFORCE_EQ
(
dist_probs
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias_probs
->
numel
(),
num_total_classes
);
const
float
*
probs_data
=
dist_probs
->
data
<
float
>
();
const
int
*
alias_data
=
dist_alias
->
data
<
int
>
();
const
float
*
alias_probs_data
=
dist_alias_probs
->
data
<
float
>
();
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
probs_data
,
alias_data
,
alias_probs_data
,
seed
);
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
...
...
@@ -99,17 +113,17 @@ class NCEKernel : public framework::OpKernel<T> {
PrepareSamples
<
DeviceContext
,
T
>
(
context
,
sampler
);
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
auto
sample_out
=
context
.
Output
<
Tensor
>
(
"SampleLogits"
);
T
*
sample_out_data
=
sample_out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
sample_out_data
=
sample_out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
const
T
*
sample_weight_data
=
nullptr
;
const
T
*
sample_weight_data
=
nullptr
;
if
(
sample_weight
!=
nullptr
)
{
sample_weight_data
=
sample_weight
->
data
<
T
>
();
}
auto
out
=
context
.
Output
<
Tensor
>
(
"Cost"
);
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int64_t
num_true_class
=
1
;
if
(
label
!=
nullptr
)
{
num_true_class
=
label
->
dims
()[
1
];
...
...
@@ -119,7 +133,7 @@ class NCEKernel : public framework::OpKernel<T> {
// forward bias
auto
bias
=
context
.
Input
<
Tensor
>
(
"Bias"
);
if
(
bias
!=
nullptr
)
{
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
bias_data
=
bias
->
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
sample_out_data
[
i
]
=
bias_data
[
sample_labels_data
[
i
]];
}
...
...
@@ -158,16 +172,16 @@ class NCEKernel : public framework::OpKernel<T> {
template
<
typename
DeviceContext
,
typename
T
>
class
NCEGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
d_out
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Cost"
));
const
T
*
d_out_data
=
d_out
->
data
<
T
>
();
const
T
*
d_out_data
=
d_out
->
data
<
T
>
();
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
sample_out
=
context
.
Input
<
Tensor
>
(
"SampleLogits"
);
const
T
*
sample_out_data
=
sample_out
->
data
<
T
>
();
const
T
*
sample_out_data
=
sample_out
->
data
<
T
>
();
auto
sample_labels
=
context
.
Input
<
Tensor
>
(
"SampleLabels"
);
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
const
T
*
sample_weight_data
=
nullptr
;
const
T
*
sample_weight_data
=
nullptr
;
if
(
sample_weight
!=
nullptr
)
{
sample_weight_data
=
sample_weight
->
data
<
T
>
();
}
...
...
@@ -180,7 +194,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
Sampler
*
sampler
;
Sampler
*
sampler
;
switch
(
sampler_type
)
{
case
0
:
{
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
...
...
@@ -191,11 +205,19 @@ class NCEGradKernel : public framework::OpKernel<T> {
break
;
}
case
2
:
{
auto
custom_dist
=
context
.
Input
<
Tensor
>
(
"CustomDistribution"
);
const
float
*
custom_dist_data
=
custom_dist
->
data
<
float
>
();
PADDLE_ENFORCE_EQ
(
custom_dist
->
numel
(),
num_total_classes
);
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
custom_dist_data
,
seed
);
auto
dist_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistProbs"
);
auto
dist_alias
=
context
.
Input
<
Tensor
>
(
"CustomDistAlias"
);
auto
dist_alias_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistAliasProbs"
);
PADDLE_ENFORCE_EQ
(
dist_probs
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias_probs
->
numel
(),
num_total_classes
);
const
float
*
probs_data
=
dist_probs
->
data
<
float
>
();
const
int
*
alias_data
=
dist_alias
->
data
<
int
>
();
const
float
*
alias_probs_data
=
dist_alias_probs
->
data
<
float
>
();
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
probs_data
,
alias_data
,
alias_probs_data
,
seed
);
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
...
...
@@ -203,7 +225,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
// T b = 1. / num_total_classes * num_neg_samples;
Tensor
sample_grad
;
// tmp tensor
T
*
sample_grad_data
=
T
*
sample_grad_data
=
sample_grad
.
mutable_data
<
T
>
(
sample_labels
->
dims
(),
context
.
GetPlace
());
// backward cost
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
...
...
@@ -217,32 +239,105 @@ class NCEGradKernel : public framework::OpKernel<T> {
:
w
*
(
o
*
(
1
-
o
)
/
(
o
+
b
));
sample_grad_data
[
i
]
*=
d_out_data
[
sample_idx
];
}
// get d_bias
auto
d_bias
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
if
(
d_bias
!=
nullptr
)
{
T
*
d_bias_data
=
d_bias
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
d_bias
->
numel
(),
0.0
);
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
if
(
!
is_sparse
)
{
// get d_bias
auto
d_bias
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
if
(
d_bias
!=
nullptr
)
{
T
*
d_bias_data
=
d_bias
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
d_bias
->
numel
(),
0.0
);
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
sample_labels_data
[
i
]]
+=
sample_grad_data
[
i
];
}
}
// get d_w
auto
d_w
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
if
(
d_w
!=
nullptr
)
{
auto
d_w_data
=
d_w
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_w
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_w
);
auto
x_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Input"
)));
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_w_matrix
.
chip
(
sample_labels_data
[
i
],
0
)
+=
x_matrix
.
chip
(
static_cast
<
int
>
(
i
/
sample_labels
->
dims
()[
1
]),
0
)
*
sample_grad_data
[
i
];
}
}
}
else
{
std
::
vector
<
int64_t
>
labels
;
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
sample_labels_data
[
i
]]
+=
sample_grad_data
[
i
]
;
labels
.
push_back
(
sample_labels_data
[
i
])
;
}
}
// get d_w
auto
d_w
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
if
(
d_w
!=
nullptr
)
{
auto
d_w_data
=
d_w
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_w
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_w
);
std
::
set
<
T
>
st
(
labels
.
begin
(),
labels
.
end
());
labels
.
assign
(
st
.
begin
(),
st
.
end
());
auto
*
bias_var
=
context
.
InputVar
(
"Bias"
);
DDim
bias_dim
;
if
(
bias_var
->
IsType
<
LoDTensor
>
())
{
bias_dim
=
context
.
Input
<
LoDTensor
>
(
"Bias"
)
->
dims
();
}
else
if
(
bias_var
->
IsType
<
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"Bias"
);
bias_dim
=
table_t
->
value
().
dims
();
}
else
{
PADDLE_THROW
(
"The parameter Bias of a NCE_OP "
"must be either LoDTensor or SelectedRows"
);
}
auto
d_bias
=
context
.
Output
<
SelectedRows
>
(
framework
::
GradVarName
(
"Bias"
));
d_bias
->
set_rows
(
labels
);
d_bias
->
set_height
(
bias_dim
[
0
]);
d_bias
->
mutable_value
()
->
Resize
(
{
static_cast
<
int64_t
>
(
labels
.
size
()),
bias_dim
[
1
]});
T
*
d_bias_data
=
d_bias
->
mutable_value
()
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
labels
.
size
(),
0.0
);
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
d_bias
->
Index
(
sample_labels_data
[
i
])]
+=
sample_grad_data
[
i
];
}
auto
*
table_var
=
context
.
InputVar
(
"Weight"
);
DDim
table_dim
;
if
(
table_var
->
IsType
<
LoDTensor
>
())
{
table_dim
=
context
.
Input
<
LoDTensor
>
(
"Weight"
)
->
dims
();
}
else
if
(
table_var
->
IsType
<
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"Weight"
);
table_dim
=
table_t
->
value
().
dims
();
}
else
{
PADDLE_THROW
(
"The parameter Weight of a NCE_OP "
"must be either LoDTensor or SelectedRows"
);
}
auto
d_w
=
context
.
Output
<
SelectedRows
>
(
framework
::
GradVarName
(
"Weight"
));
d_w
->
set_rows
(
labels
);
d_w
->
set_height
(
table_dim
[
0
]);
auto
*
d_table_value
=
d_w
->
mutable_value
();
d_table_value
->
Resize
(
{
static_cast
<
int64_t
>
(
labels
.
size
()),
table_dim
[
1
]});
auto
d_w_data
=
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_table_value
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_table_value
);
auto
x_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Input"
)));
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_w_matrix
.
chip
(
sample_labels_data
[
i
]
,
0
)
+=
d_w_matrix
.
chip
(
d_w
->
Index
(
sample_labels_data
[
i
])
,
0
)
+=
x_matrix
.
chip
(
static_cast
<
int
>
(
i
/
sample_labels
->
dims
()[
1
]),
0
)
*
sample_grad_data
[
i
];
}
}
// get d_x
auto
d_x
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
if
(
d_x
!=
nullptr
)
{
auto
*
d_x_data
=
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
d_x_data
=
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_x_data
,
d_x_data
+
d_x
->
numel
(),
0.0
);
auto
d_x_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_x
);
auto
w_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Weight"
)));
...
...
@@ -251,6 +346,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
w_matrix
.
chip
(
sample_labels_data
[
i
],
0
)
*
sample_grad_data
[
i
];
}
}
delete
sampler
;
}
};
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
487ee36a
...
...
@@ -4549,7 +4549,8 @@ def nce(input,
name
=
None
,
sampler
=
"uniform"
,
custom_dist
=
None
,
seed
=
0
):
seed
=
0
,
is_sparse
=
False
):
"""
${comment}
...
...
@@ -4575,11 +4576,12 @@ def nce(input,
sampler (str): The sampler used to sample class from negtive classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'.
custom_dist (
Variable): A tensor with shape [num_total_classes]
.
custom_dist (
float[]): A float[] with size=num_total_classes
.
It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probsbility of i-th class to be sampled.
default: None.
seed (int): The seed used in sampler. default: 0.
is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows.
Returns:
Variable: The output nce loss.
...
...
@@ -4631,12 +4633,7 @@ def nce(input,
shape
=
[
num_total_classes
,
dim
],
is_bias
=
False
,
dtype
=
input
.
dtype
)
inputs
=
{
'Input'
:
input
,
'Label'
:
label
,
'Weight'
:
w
,
'SampleWeight'
:
sample_weight
if
sample_weight
is
not
None
else
[]
}
inputs
=
{}
if
helper
.
bias_attr
:
b
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
...
...
@@ -4648,18 +4645,10 @@ def nce(input,
sample_logits
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
sample_labels
=
helper
.
create_variable_for_type_inference
(
dtype
=
label
.
dtype
)
if
num_neg_samples
is
None
:
num_neg_samples
=
10
else
:
num_neg_samples
=
int
(
num_neg_samples
)
inputs
=
{
'Input'
:
input
,
'Label'
:
label
,
'Weight'
:
w
,
'Bias'
:
b
,
'SampleWeight'
:
sample_weight
if
sample_weight
is
not
None
else
[]
}
inputs
[
'Input'
]
=
input
inputs
[
'Label'
]
=
label
inputs
[
'Weight'
]
=
w
inputs
[
'SampleWeight'
]
=
sample_weight
if
sample_weight
is
not
None
else
[]
if
sampler
==
"uniform"
:
sampler
=
0
...
...
@@ -4667,17 +4656,73 @@ def nce(input,
sampler
=
1
elif
sampler
==
"custom_dist"
:
assert
custom_dist
is
not
None
assert
isinstance
(
custom_dist
,
Variable
)
inputs
[
'CustomDistribution'
]
=
custom_dist
# assert isinstance(custom_dist, Variable)
custom_dist_len
=
len
(
custom_dist
)
alias_probs_
=
[
0
]
*
custom_dist_len
alias_
=
[
0
]
*
custom_dist_len
bigs
=
[]
littles
=
[]
for
i
in
range
(
custom_dist_len
):
normal_prob
=
custom_dist
[
i
]
*
custom_dist_len
if
normal_prob
-
1.0
>
1e-4
:
bigs
.
append
((
i
,
normal_prob
))
elif
1.0
-
normal_prob
>
1e-4
:
littles
.
append
((
i
,
normal_prob
))
else
:
alias_probs_
[
i
]
=
normal_prob
alias_
[
i
]
=
-
1
while
len
(
bigs
)
and
len
(
littles
):
big
=
bigs
.
pop
(
0
)
little
=
littles
.
pop
(
0
)
big_idx
=
big
[
0
]
big_prob
=
big
[
1
]
alias_probs_
[
little
[
0
]]
=
little
[
1
]
alias_
[
little
[
0
]]
=
big_idx
big_left
=
big
[
1
]
+
little
[
1
]
-
1
if
big_left
-
1.0
>
1e-4
:
bigs
.
append
((
big_idx
,
big_left
))
elif
1.0
-
big_left
>
1e-4
:
littles
.
append
((
big_idx
,
big_left
))
else
:
alias_probs_
[
big_idx
]
=
big_left
alias_
[
big_idx
]
=
-
1
if
len
(
bigs
):
big
=
bigs
.
pop
(
0
)
alias_probs_
[
big
[
0
]]
=
1.0
alias_
[
big
[
0
]]
=
-
1
if
len
(
littles
):
little
=
littles
.
pop
(
0
)
alias_probs_
[
little
[
0
]]
=
1.0
alias_
[
little
[
0
]]
=
-
1
probs
=
assign
(
input
=
np
.
array
(
custom_dist
).
astype
(
'float32'
))
custom_alias
=
assign
(
input
=
np
.
array
(
alias_
).
astype
(
'int32'
))
custom_alias_probs
=
assign
(
input
=
np
.
array
(
alias_probs_
).
astype
(
'float32'
))
inputs
[
'CustomDistProbs'
]
=
probs
inputs
[
'CustomDistAlias'
]
=
custom_alias
inputs
[
'CustomDistAliasProbs'
]
=
custom_alias_probs
sampler
=
2
else
:
raise
Exception
(
"Unsupported sampler type."
)
if
num_neg_samples
is
None
:
num_neg_samples
=
10
else
:
num_neg_samples
=
int
(
num_neg_samples
)
attrs
=
{
'num_total_classes'
:
int
(
num_total_classes
),
'num_neg_samples'
:
num_neg_samples
,
'seed'
:
seed
,
'sampler'
:
sampler
'sampler'
:
sampler
,
'is_sparse'
:
is_sparse
}
helper
.
append_op
(
...
...
@@ -6629,7 +6674,7 @@ def crop(x, shape=None, offsets=None, name=None):
helper
=
LayerHelper
(
'crop'
,
**
locals
())
if
not
(
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
)
or
\
isinstance
(
shape
,
Variable
)):
isinstance
(
shape
,
Variable
)):
raise
ValueError
(
"The shape should be a list, tuple or Variable."
)
if
offsets
is
None
:
...
...
@@ -6751,7 +6796,7 @@ def affine_grid(theta, out_shape, name=None):
helper
=
LayerHelper
(
'affine_grid'
)
if
not
(
isinstance
(
out_shape
,
list
)
or
isinstance
(
out_shape
,
tuple
)
or
\
isinstance
(
out_shape
,
Variable
)):
isinstance
(
out_shape
,
Variable
)):
raise
ValueError
(
"The out_shape should be a list, tuple or Variable."
)
if
not
isinstance
(
theta
,
Variable
):
...
...
python/paddle/fluid/tests/unittests/test_nce.py
浏览文件 @
487ee36a
...
...
@@ -14,8 +14,12 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.initializer
as
initializer
from
op_test
import
OpTest
...
...
@@ -59,7 +63,7 @@ def nce(input, weight, bias, sample_weight, labels, num_classes,
class
TestNCE
(
OpTest
):
def
generate_data
(
self
,
dim
,
batch_size
,
num_classes
,
num_true_class
,
num_neg_samples
):
num_neg_samples
,
is_sparse
):
input
=
np
.
random
.
randn
(
batch_size
,
dim
).
astype
(
np
.
float32
)
weight
=
np
.
random
.
randn
(
num_classes
,
dim
).
astype
(
np
.
float32
)
bias
=
np
.
random
.
randn
(
num_classes
).
astype
(
np
.
float32
)
...
...
@@ -70,7 +74,8 @@ class TestNCE(OpTest):
'num_neg_samples'
:
num_neg_samples
,
'custom_neg_classes'
:
list
(
range
(
num_neg_samples
)),
'seed'
:
0
,
'sampler'
:
0
'sampler'
:
0
,
'is_sparse'
:
is_sparse
}
self
.
inputs
=
{
'Input'
:
input
,
...
...
@@ -81,7 +86,7 @@ class TestNCE(OpTest):
}
def
set_data
(
self
):
self
.
generate_data
(
5
,
5
,
4
,
1
,
2
)
self
.
generate_data
(
5
,
5
,
4
,
1
,
2
,
False
)
def
compute
(
self
):
out
=
nce
(
self
.
inputs
[
'Input'
],
self
.
inputs
[
'Weight'
],
...
...
@@ -107,9 +112,110 @@ class TestNCE(OpTest):
[
"Input"
,
"Weight"
,
"Bias"
],
"Cost"
,
max_relative_error
=
0.02
)
class
TestNCECase1
(
TestNCE
):
class
TestNCECase1
Tensor
(
TestNCE
):
def
set_data
(
self
):
self
.
generate_data
(
10
,
20
,
10
,
2
,
5
)
self
.
generate_data
(
10
,
20
,
10
,
2
,
5
,
False
)
class
TestNCECase1SelectedRows
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
base_lr
=
0.0001
self
.
batch_size
=
8
@
staticmethod
def
get_place
():
place
=
fluid
.
core
.
CPUPlace
()
return
place
@
staticmethod
def
get_train_data
(
batch_size
):
batchs
=
[]
for
i
in
range
(
batch_size
):
input
=
np
.
random
.
randn
(
batch_size
,
10
).
astype
(
np
.
float32
)
labels
=
np
.
random
.
randint
(
0
,
20
,
(
batch_size
,
1
))
batchs
.
append
([
input
,
labels
])
return
batchs
def
get_optimizer
(
self
):
# SGD optimizer
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
self
.
base_lr
)
return
optimizer
def
train_network
(
self
,
num_total_classes
,
num_neg_samples
,
sampler
,
custom_dist
,
is_sparse
):
input
=
fluid
.
layers
.
data
(
name
=
"input"
,
shape
=
[
10
],
dtype
=
"float32"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
w_param
=
fluid
.
default_main_program
().
global_block
().
create_parameter
(
shape
=
[
num_total_classes
,
10
],
dtype
=
'float32'
,
name
=
'nce_w'
,
initializer
=
initializer
.
ConstantInitializer
())
b_param
=
fluid
.
default_main_program
().
global_block
().
create_parameter
(
shape
=
[
num_total_classes
,
1
],
dtype
=
'float32'
,
name
=
'nce_b'
,
initializer
=
initializer
.
ConstantInitializer
())
cost
=
fluid
.
layers
.
nce
(
input
=
input
,
label
=
label
,
num_total_classes
=
num_total_classes
,
sampler
=
sampler
,
custom_dist
=
custom_dist
,
sample_weight
=
None
,
param_attr
=
'nce_w'
,
bias_attr
=
'nce_b'
,
seed
=
1
,
num_neg_samples
=
num_neg_samples
,
is_sparse
=
is_sparse
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
# optimizer
optimizer
=
self
.
get_optimizer
()
optimizer
.
minimize
(
avg_cost
)
return
[
avg_cost
,
[
input
,
label
]]
def
test_input_is_selected_rows
(
self
):
place
=
self
.
get_place
()
exe
=
fluid
.
Executor
(
place
)
data
=
self
.
get_train_data
(
self
.
batch_size
)
nid_freq_arr
=
np
.
random
.
dirichlet
(
np
.
ones
(
20
)
*
1000
).
astype
(
'float32'
)
rets
=
[]
# for dense
dense_scope
=
fluid
.
core
.
Scope
()
dense_startup_program
=
fluid
.
framework
.
Program
()
dense_train_program
=
fluid
.
framework
.
Program
()
with
fluid
.
scope_guard
(
dense_scope
):
with
fluid
.
program_guard
(
dense_train_program
,
dense_startup_program
):
cost
,
feeds
=
self
.
train_network
(
20
,
5
,
"custom_dist"
,
nid_freq_arr
.
tolist
(),
False
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
exe
.
run
(
dense_startup_program
)
loss_val
=
exe
.
run
(
dense_train_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
.
name
])
rets
.
append
(
np
.
mean
(
loss_val
))
# for sparse
sparse_scope
=
fluid
.
core
.
Scope
()
sparse_startup_program
=
fluid
.
framework
.
Program
()
sparse_train_program
=
fluid
.
framework
.
Program
()
with
fluid
.
scope_guard
(
sparse_scope
):
with
fluid
.
program_guard
(
sparse_train_program
,
sparse_startup_program
):
cost
,
feeds
=
self
.
train_network
(
20
,
5
,
"custom_dist"
,
nid_freq_arr
.
tolist
(),
True
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
exe
.
run
(
sparse_startup_program
)
loss_val
=
exe
.
run
(
sparse_train_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
.
name
])
rets
.
append
(
np
.
mean
(
loss_val
))
self
.
assertEqual
(
rets
[
0
],
rets
[
1
])
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录