Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
87648f8e
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
87648f8e
编写于
11月 27, 2018
作者:
J
JiabinYang
浏览文件
操作
浏览文件
下载
差异文件
merge develop, test=develop
上级
c3c3c0b3
db9284ec
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
665 addition
and
173 deletion
+665
-173
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-2
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+2
-1
paddle/fluid/framework/details/all_reduce_deps_pass.cc
paddle/fluid/framework/details/all_reduce_deps_pass.cc
+125
-0
paddle/fluid/framework/details/all_reduce_deps_pass.h
paddle/fluid/framework/details/all_reduce_deps_pass.h
+33
-0
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+21
-0
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+1
-0
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+5
-5
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+2
-1
paddle/fluid/memory/allocation/best_fit_allocator_test.cc
paddle/fluid/memory/allocation/best_fit_allocator_test.cc
+4
-4
paddle/fluid/memory/allocation/best_fit_allocator_test.cu
paddle/fluid/memory/allocation/best_fit_allocator_test.cu
+4
-4
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+4
-0
paddle/fluid/operators/dropout_op_test.cc
paddle/fluid/operators/dropout_op_test.cc
+2
-0
paddle/fluid/operators/math/sampler.cc
paddle/fluid/operators/math/sampler.cc
+9
-54
paddle/fluid/operators/math/sampler.h
paddle/fluid/operators/math/sampler.h
+9
-4
paddle/fluid/operators/math/sequence_pooling.cu
paddle/fluid/operators/math/sequence_pooling.cu
+1
-2
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+58
-10
paddle/fluid/operators/nce_op.h
paddle/fluid/operators/nce_op.h
+139
-43
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+2
-2
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+9
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+112
-25
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-7
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+1
-1
python/paddle/fluid/tests/unittests/test_nce.py
python/paddle/fluid/tests/unittests/test_nce.py
+112
-6
未找到文件。
cmake/inference_lib.cmake
浏览文件 @
87648f8e
...
...
@@ -186,8 +186,7 @@ set(module "inference")
copy
(
inference_lib DEPS
${
inference_deps
}
SRCS
${
src_dir
}
/
${
module
}
/*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/libpaddle_fluid.*
${
src_dir
}
/
${
module
}
/api/paddle_*.h
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
DSTS
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
${
dst_dir
}
/
${
module
}
)
set
(
module
"platform"
)
...
...
paddle/fluid/API.spec
浏览文件 @
87648f8e
...
...
@@ -97,8 +97,8 @@ paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_ti
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'non_leaf_num', 'ptable', 'pcode', 'is_costum', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, False, False))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
87648f8e
...
...
@@ -39,11 +39,12 @@ if (WITH_GPU)
endif
()
cc_library
(
sequential_execution_pass SRCS sequential_execution_pass.cc DEPS graph graph_helper pass
)
cc_library
(
all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS graph graph_helper pass
)
cc_library
(
multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle fused_broadcast_op_handle
)
set
(
SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass
)
set
(
SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass
all_reduce_deps_pass
)
if
(
WITH_GPU
)
list
(
APPEND SSA_GRAPH_EXECUTOR_DEPS reference_count_pass
)
endif
()
...
...
paddle/fluid/framework/details/all_reduce_deps_pass.cc
0 → 100644
浏览文件 @
87648f8e
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/details/all_reduce_deps_pass.h"
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/op_graph_view.h"
#include "paddle/fluid/framework/details/var_handle.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_proto_maker.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
static
constexpr
char
kAllOpDescs
[]
=
"all_op_descs"
;
VarHandle
*
GetValidInput
(
const
OpHandleBase
*
a
)
{
for
(
auto
p
:
a
->
Inputs
())
{
VarHandle
*
b
=
dynamic_cast
<
VarHandle
*>
(
p
);
if
(
b
)
{
return
b
;
}
}
return
nullptr
;
}
std
::
unique_ptr
<
ir
::
Graph
>
AllReduceDepsPass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
auto
graph_ops
=
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph
);
// get vars order
int
order
=
0
;
std
::
unordered_map
<
std
::
string
,
int
>
vars
;
// TODO(gongwb): use graph topology sort to find the order of operators.
// Note that must assert topology sort is stable
auto
&
ops
=
Get
<
const
std
::
vector
<
OpDesc
*>>
(
kAllOpDescs
);
for
(
auto
*
op_desc
:
ops
)
{
auto
outputs
=
op_desc
->
Outputs
();
for
(
auto
&
o_it
:
outputs
)
{
for
(
auto
&
v
:
o_it
.
second
)
{
// values
vars
[
v
]
=
order
;
}
}
order
++
;
}
std
::
vector
<
OpHandleBase
*>
dist_ops
;
// get allreduce ops.
for
(
auto
&
op
:
graph_ops
)
{
// FIXME(gongwb):add broad cast.
if
(
op
->
Name
()
==
"all_reduce"
||
op
->
Name
()
==
"reduce"
)
{
dist_ops
.
push_back
(
op
);
}
}
VLOG
(
10
)
<<
"dist_ops size:"
<<
dist_ops
.
size
()
<<
std
::
endl
;
std
::
sort
(
dist_ops
.
begin
(),
dist_ops
.
end
(),
[
&
](
OpHandleBase
*
op1
,
OpHandleBase
*
op2
)
{
VarHandle
*
i0
=
dynamic_cast
<
VarHandle
*>
(
GetValidInput
(
op1
));
VarHandle
*
i1
=
dynamic_cast
<
VarHandle
*>
(
GetValidInput
(
op2
));
PADDLE_ENFORCE
(
i0
!=
nullptr
&&
i1
!=
nullptr
,
"%s convert to %s error"
,
op1
->
DebugString
(),
op2
->
DebugString
());
auto
l_it
=
vars
.
find
(
i0
->
name_
);
auto
r_it
=
vars
.
find
(
i1
->
name_
);
if
(
l_it
->
second
<
r_it
->
second
)
return
true
;
if
(
l_it
->
second
==
r_it
->
second
)
{
return
i0
->
name_
<
i1
->
name_
;
}
return
false
;
});
// add dependency.
auto
&
sorted_ops
=
dist_ops
;
for
(
size_t
i
=
1
;
i
<
sorted_ops
.
size
();
++
i
)
{
auto
*
dep_var
=
new
DummyVarHandle
(
graph
->
CreateControlDepVar
());
auto
*
pre_op
=
sorted_ops
[
i
-
1
];
auto
*
op
=
sorted_ops
[
i
];
pre_op
->
AddOutput
(
dep_var
);
op
->
AddInput
(
dep_var
);
graph
->
Get
<
GraphDepVars
>
(
kGraphDepVars
).
emplace
(
dep_var
);
VLOG
(
10
)
<<
"add all_reduce sequential dependencies between "
<<
pre_op
<<
" and "
<<
op
;
VLOG
(
10
)
<<
"pre_op:"
<<
pre_op
->
DebugString
()
<<
", op:"
<<
op
->
DebugString
();
}
return
graph
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
all_reduce_deps_pass
,
paddle
::
framework
::
details
::
AllReduceDepsPass
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kAllOpDescs
);
paddle/fluid/framework/details/all_reduce_deps_pass.h
0 → 100644
浏览文件 @
87648f8e
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
// TODO(gongwb): overlap allreduce with backward computation.
class
AllReduceDepsPass
:
public
ir
::
Pass
{
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
override
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
87648f8e
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
#include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/sequential_execution_pass.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
...
...
@@ -24,6 +25,10 @@ namespace paddle {
namespace
framework
{
namespace
details
{
static
inline
bool
SeqOnlyAllReduceOps
(
const
BuildStrategy
&
strategy
)
{
return
(
!
strategy
.
enable_sequential_execution_
&&
strategy
.
num_trainers_
>
1
);
}
class
ParallelExecutorPassBuilder
:
public
ir
::
PassBuilder
{
public:
explicit
ParallelExecutorPassBuilder
(
const
BuildStrategy
&
strategy
)
...
...
@@ -70,6 +75,10 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
// Verify that the graph is correct for multi-device executor.
AppendPass
(
"multi_devices_check_pass"
);
if
(
SeqOnlyAllReduceOps
(
strategy
))
{
AppendPass
(
"all_reduce_deps_pass"
);
}
if
(
strategy_
.
remove_unnecessary_lock_
)
{
AppendPass
(
"modify_op_lock_and_record_event_pass"
);
}
...
...
@@ -124,6 +133,17 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass
->
SetNotOwned
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
,
nctx
);
#endif
}
else
if
(
pass
->
Type
()
==
"sequential_execution_pass"
)
{
VLOG
(
1
)
<<
"set enable_sequential_execution:"
<<
enable_sequential_execution_
;
pass
->
Erase
(
kAllOpDescs
);
pass
->
Set
<
const
std
::
vector
<
OpDesc
*>>
(
kAllOpDescs
,
new
std
::
vector
<
OpDesc
*>
(
main_program
.
Block
(
0
).
AllOps
()));
}
else
if
(
pass
->
Type
()
==
"all_reduce_deps_pass"
)
{
VLOG
(
1
)
<<
"SeqOnlyAllReduceOps:"
<<
SeqOnlyAllReduceOps
(
*
this
)
<<
", num_trainers:"
<<
num_trainers_
;
pass
->
Erase
(
kAllOpDescs
);
pass
->
Set
<
const
std
::
vector
<
OpDesc
*>>
(
kAllOpDescs
,
...
...
@@ -144,4 +164,5 @@ USE_PASS(multi_devices_pass);
USE_PASS
(
multi_devices_check_pass
);
USE_PASS
(
multi_devices_print_pass
);
USE_PASS
(
sequential_execution_pass
);
USE_PASS
(
all_reduce_deps_pass
);
USE_PASS
(
modify_op_lock_and_record_event_pass
);
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
87648f8e
...
...
@@ -73,6 +73,7 @@ struct BuildStrategy {
bool
fuse_broadcast_op_
{
false
};
int
num_trainers_
{
1
};
bool
remove_unnecessary_lock_
{
false
};
// NOTE:
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
87648f8e
...
...
@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h"
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h"
#endif
...
...
@@ -54,7 +54,7 @@ class ParallelExecutorPrivate {
Scope
*
global_scope_
;
// not owned
std
::
unique_ptr
<
details
::
SSAGraphExecutor
>
executor_
;
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
platform
::
NCCLContextMap
>
nccl_ctxs_
;
#endif
bool
own_local_scope_
;
...
...
@@ -104,7 +104,7 @@ ParallelExecutor::ParallelExecutor(
if
(
member_
->
use_cuda_
)
{
// Bcast Parameters to all GPUs
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto
*
nccl_id_var
=
scope
->
FindVar
(
NCCL_ID_VARNAME
);
ncclUniqueId
*
nccl_id
=
nullptr
;
if
(
nccl_id_var
!=
nullptr
)
{
...
...
@@ -124,7 +124,7 @@ ParallelExecutor::ParallelExecutor(
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
...
...
@@ -213,7 +213,7 @@ void ParallelExecutor::BCastParamsToDevices(
}
auto
&
dims
=
main_tensor
.
dims
();
if
(
paddle
::
platform
::
is_gpu_place
(
main_tensor
.
place
()))
{
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
vector
<
void
*>
buffers
;
size_t
numel
=
main_tensor
.
numel
();
ncclDataType_t
data_type
=
platform
::
ToNCCLDataType
(
main_tensor
.
type
());
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
87648f8e
...
...
@@ -76,7 +76,8 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
((
size_t
)
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
static_cast
<
size_t
>
(
5UL
),
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
"data: "
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
...
...
paddle/fluid/memory/allocation/best_fit_allocator_test.cc
浏览文件 @
87648f8e
...
...
@@ -99,9 +99,8 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
LockedAllocator
locked_allocator
(
std
::
move
(
best_fit_allocator
));
auto
th_main
=
[
&
]
{
std
::
random_device
dev
;
std
::
default_random_engine
engine
(
dev
());
auto
th_main
=
[
&
](
std
::
random_device
::
result_type
seed
)
{
std
::
default_random_engine
engine
(
seed
);
std
::
uniform_int_distribution
<
size_t
>
dist
(
1U
,
1024U
);
for
(
size_t
i
=
0
;
i
<
128
;
++
i
)
{
...
...
@@ -125,7 +124,8 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
{
std
::
vector
<
std
::
thread
>
threads
;
for
(
size_t
i
=
0
;
i
<
1024
;
++
i
)
{
threads
.
emplace_back
(
th_main
);
std
::
random_device
dev
;
threads
.
emplace_back
(
th_main
,
dev
());
}
for
(
auto
&
th
:
threads
)
{
th
.
join
();
...
...
paddle/fluid/memory/allocation/best_fit_allocator_test.cu
浏览文件 @
87648f8e
...
...
@@ -41,9 +41,8 @@ TEST(BestFitAllocator, concurrent_cuda) {
LockedAllocator
concurrent_allocator
(
std
::
unique_ptr
<
Allocator
>
(
new
BestFitAllocator
(
cuda_allocation
.
get
())));
auto
th_main
=
[
&
]
{
std
::
random_device
dev
;
std
::
default_random_engine
engine
(
dev
());
auto
th_main
=
[
&
](
std
::
random_device
::
result_type
seed
)
{
std
::
default_random_engine
engine
(
seed
);
std
::
uniform_int_distribution
<
size_t
>
dist
(
1U
,
1024U
);
platform
::
CUDAPlace
gpu
(
0
);
platform
::
CUDADeviceContext
dev_ctx
(
gpu
);
...
...
@@ -75,7 +74,8 @@ TEST(BestFitAllocator, concurrent_cuda) {
{
std
::
vector
<
std
::
thread
>
threads
;
for
(
size_t
i
=
0
;
i
<
1024
;
++
i
)
{
threads
.
emplace_back
(
th_main
);
std
::
random_device
dev
;
threads
.
emplace_back
(
th_main
,
dev
());
}
for
(
auto
&
th
:
threads
)
{
th
.
join
();
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
87648f8e
...
...
@@ -86,7 +86,11 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
munlock
(
p
,
size
);
#endif
}
#ifdef _WIN32
_aligned_free
(
p
);
#else
free
(
p
);
#endif
}
bool
CPUAllocator
::
UseGpu
()
const
{
return
false
;
}
...
...
paddle/fluid/operators/dropout_op_test.cc
浏览文件 @
87648f8e
...
...
@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _WIN32
#include <unistd.h>
#endif
#include <string>
#include <thread> // NOLINT
...
...
paddle/fluid/operators/math/sampler.cc
浏览文件 @
87648f8e
...
...
@@ -60,75 +60,30 @@ float LogUniformSampler::Probability(int64_t value) const {
return
(
log
((
value
+
2.0
)
/
(
value
+
1.0
)))
/
log_range_
;
}
CustomSampler
::
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
CustomSampler
::
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
const
int
*
alias
,
const
float
*
alias_probabilities
,
unsigned
int
seed
)
:
Sampler
(
range
,
seed
)
{
random_engine_
=
std
::
make_shared
<
std
::
mt19937
_64
>
(
seed_
);
random_engine_
=
std
::
make_shared
<
std
::
mt19937
>
(
seed_
);
real_dist_
=
std
::
make_shared
<
std
::
uniform_real_distribution
<>>
(
0
,
1
);
int_dist_
=
std
::
make_shared
<
std
::
uniform_int_distribution
<>>
(
0
,
range
);
alias_probs_
=
std
::
make_shared
<
std
::
vector
<
float
>>
(
range
+
1
);
alias_
=
std
::
make_shared
<
std
::
vector
<
int64_t
>>
(
range
+
1
);
probs_
=
std
::
make_shared
<
std
::
vector
<
float
>>
(
range
+
1
);
std
::
queue
<
std
::
pair
<
int64_t
,
float
>>
bigs
;
std
::
queue
<
std
::
pair
<
int64_t
,
float
>>
littles
;
for
(
int64_t
i
=
0
;
i
<=
range
;
++
i
)
{
(
*
probs_
)[
i
]
=
probabilities
[
i
];
float
normal_prob
=
probabilities
[
i
]
*
(
range
+
1
);
if
(
normal_prob
-
1.0
>
1e-4
)
{
bigs
.
emplace
(
i
,
normal_prob
);
}
else
if
(
1.0
-
normal_prob
>
1e-4
)
{
littles
.
emplace
(
i
,
normal_prob
);
}
else
{
(
*
alias_probs_
)[
i
]
=
normal_prob
;
(
*
alias_
)[
i
]
=
-
1
;
}
}
while
((
!
littles
.
empty
())
&&
(
!
bigs
.
empty
()))
{
auto
big
=
bigs
.
front
();
auto
little
=
littles
.
front
();
bigs
.
pop
();
littles
.
pop
();
(
*
alias_probs_
)[
little
.
first
]
=
little
.
second
;
(
*
alias_
)[
little
.
first
]
=
big
.
first
;
auto
big_left
=
big
.
second
-
(
1
-
little
.
second
);
if
(
big_left
-
1.0
>
1e-4
)
{
bigs
.
emplace
(
big
.
first
,
big_left
);
}
else
if
(
1.0
-
big_left
>
1e-4
)
{
littles
.
emplace
(
big
.
first
,
big_left
);
}
else
{
(
*
alias_probs_
)[
big
.
first
]
=
big_left
;
(
*
alias_
)[
big
.
first
]
=
-
1
;
}
}
if
(
!
littles
.
empty
())
{
// littles.second is close to 1.0
auto
little
=
littles
.
front
();
(
*
alias_probs_
)[
little
.
first
]
=
1.0
;
(
*
alias_
)[
little
.
first
]
=
-
1
;
}
if
(
!
bigs
.
empty
())
{
// bigs.second is close to 1.0
auto
big
=
bigs
.
front
();
(
*
alias_probs_
)[
big
.
first
]
=
1.0
;
(
*
alias_
)[
big
.
first
]
=
-
1
;
}
alias_probs_
=
alias_probabilities
;
probs_
=
probabilities
;
alias_
=
alias
;
}
int64_t
CustomSampler
::
Sample
()
const
{
auto
index
=
(
*
int_dist_
)(
*
random_engine_
);
auto
p
=
(
*
real_dist_
)(
*
random_engine_
);
if
(
p
>
(
*
alias_probs_
)
[
index
])
{
return
(
*
alias_
)
[
index
];
if
(
p
>
alias_probs_
[
index
])
{
return
alias_
[
index
];
}
else
{
return
index
;
}
}
float
CustomSampler
::
Probability
(
int64_t
value
)
const
{
return
(
*
probs_
)[
value
];
}
float
CustomSampler
::
Probability
(
int64_t
value
)
const
{
return
probs_
[
value
];
}
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sampler.h
浏览文件 @
87648f8e
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <memory>
#include <random>
...
...
@@ -38,9 +39,12 @@ class Sampler {
seed_
=
seed
;
}
}
virtual
~
Sampler
();
// Sample a single value
virtual
int64_t
Sample
()
const
=
0
;
// The probability that a single call to Sample() returns the given value.
virtual
float
Probability
(
int64_t
value
)
const
=
0
;
...
...
@@ -99,6 +103,7 @@ class LogUniformSampler : public Sampler {
class
CustomSampler
:
public
Sampler
{
public:
explicit
CustomSampler
(
int64_t
range
,
const
float
*
probabilities
,
const
int
*
alias
,
const
float
*
alias_probabilities
,
unsigned
int
seed
=
0UL
);
~
CustomSampler
()
override
{}
...
...
@@ -108,10 +113,10 @@ class CustomSampler : public Sampler {
float
Probability
(
int64_t
value
)
const
override
;
private:
std
::
shared_ptr
<
std
::
vector
<
float
>>
alias_probs_
;
std
::
shared_ptr
<
std
::
vector
<
int64_t
>>
alias_
;
std
::
shared_ptr
<
std
::
vector
<
float
>>
probs_
;
std
::
shared_ptr
<
std
::
mt19937
_64
>
random_engine_
;
const
float
*
alias_probs_
;
const
int
*
alias_
;
const
float
*
probs_
;
std
::
shared_ptr
<
std
::
mt19937
>
random_engine_
;
std
::
shared_ptr
<
std
::
uniform_real_distribution
<>>
real_dist_
;
std
::
shared_ptr
<
std
::
uniform_int_distribution
<>>
int_dist_
;
};
...
...
paddle/fluid/operators/math/sequence_pooling.cu
浏览文件 @
87648f8e
...
...
@@ -16,13 +16,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_pooling.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/macros.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
#define FLT_MAX __FLT_MAX__
template
<
typename
T
>
struct
MaxPoolFunctor
{
HOSTDEVICE
void
operator
()(
const
T
*
input
,
const
size_t
start
,
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
87648f8e
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/nce_op.h"
#include <string>
#include <vector>
namespace
paddle
{
...
...
@@ -25,7 +26,7 @@ class NCEOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Label"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
...
...
@@ -67,7 +68,7 @@ class NCEOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
platform
::
CPUPlace
());
...
...
@@ -101,11 +102,24 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
.
AsDispensable
();
AddInput
(
"CustomDist
ribution
"
,
"CustomDist
Probs
"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddInput
(
"CustomDistAlias"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddInput
(
"CustomDistAliasProbs"
,
"(Tensor) It is used in 'CostumDist' sampler. "
"It is a tensor with shape [num_total_classes]."
"The i-th element is the probsbility of the i-th class being sampled."
)
.
AsDispensable
();
AddOutput
(
"Cost"
,
"(Tensor) A tensor of shape [batch_size, 1]. Cost of samples."
);
AddOutput
(
"SampleLogits"
,
...
...
@@ -124,21 +138,22 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
"kernel to compute grads."
""
)
.
AsIntermediate
();
AddAttr
<
int
>
(
"num_total_classes"
,
"Total number of classes in all samples."
);
AddAttr
<
int
>
(
"num_neg_samples"
,
"The number of negative classes. The default value is 10."
)
.
SetDefault
(
10
);
AddAttr
<
int
>
(
"sampler"
,
"(int) Which sampler to be used to sample negative class."
"0: Uniform; 1: LogUniform; 2: CostumDist."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"seed"
,
"(int) The seed used in sampler. If it is 0, "
"the sampler will generate a seed randomly."
)
.
SetDefault
(
0
);
AddAttr
<
bool
>
(
"is_sparse"
,
"(boolean, default false) Sparse update."
)
.
SetDefault
(
false
);
AddAttr
<
std
::
vector
<
int
>>
(
"custom_neg_classes"
,
"This attribute only be used in unitest. Classes "
...
...
@@ -156,11 +171,19 @@ By default this operator uses a uniform distribution for sampling.
}
};
class
NCEOpGradDescMaker
:
public
framework
::
DefaultGradOpDescMaker
<
true
>
{
using
::
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>::
DefaultGradOpDescMaker
;
protected:
virtual
std
::
string
GradOpType
()
const
{
return
"nce_grad"
;
}
};
class
NCEOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Cost"
));
...
...
@@ -190,20 +213,45 @@ class NCEOpGrad : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
platform
::
CPUPlace
());
}
};
class
NCEOpGradVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
weight_grad
=
op_desc
.
Output
(
framework
::
GradVarName
(
"Weight"
)).
front
();
auto
bias_grad
=
op_desc
.
Output
(
framework
::
GradVarName
(
"Bias"
)).
front
();
auto
attr
=
op_desc
.
GetAttr
(
"is_sparse"
);
bool
is_sparse
=
boost
::
get
<
bool
>
(
attr
);
if
(
is_sparse
)
{
VLOG
(
30
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to SelectedRows"
;
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
VLOG
(
30
)
<<
"nce_op_grad op "
<<
weight_grad
<<
" and "
<<
bias_grad
<<
" is set to LoDTensor"
;
block
->
Var
(
weight_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
block
->
Var
(
bias_grad
)
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
block
->
Var
(
weight_grad
)
->
SetDataType
(
block
->
Var
(
"Input"
)
->
GetDataType
());
block
->
Var
(
bias_grad
)
->
SetDataType
(
block
->
Var
(
"Input"
)
->
GetDataType
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
nce_grad
,
ops
::
NCEOpGrad
);
REGISTER_OPERATOR
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpGradDescMaker
,
ops
::
NCEOpMaker
);
REGISTER_OPERATOR
(
nce_grad
,
ops
::
NCEOpGrad
,
ops
::
NCEOpGradVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
nce
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
nce_grad
,
...
...
paddle/fluid/operators/nce_op.h
浏览文件 @
87648f8e
...
...
@@ -16,26 +16,32 @@ limitations under the License. */
#include <math.h>
#include <random>
#include <set>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
SelectedRows
=
framework
::
SelectedRows
;
using
Sampler
=
math
::
Sampler
;
using
DDim
=
framework
::
DDim
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
>
void
PrepareSamples
(
const
framework
::
ExecutionContext
&
context
,
Sampler
*
sampler
)
{
void
PrepareSamples
(
const
framework
::
ExecutionContext
&
context
,
Sampler
*
sampler
)
{
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
const
int64_t
*
label_data
=
label
->
data
<
int64_t
>
();
auto
label_dims
=
label
->
dims
();
// int num_total_classes = context.Attr<int>("num_total_classes");
// for unitest
...
...
@@ -44,7 +50,7 @@ void PrepareSamples(const framework::ExecutionContext& context,
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
auto
sample_labels_dims
=
sample_labels
->
dims
();
int64_t
*
sample_labels_data
=
int64_t
*
sample_labels_data
=
sample_labels
->
mutable_data
<
int64_t
>
(
context
.
GetPlace
());
int
num_label
=
label_dims
.
size
()
==
2
?
label_dims
[
1
]
:
1
;
...
...
@@ -70,13 +76,13 @@ void PrepareSamples(const framework::ExecutionContext& context,
template
<
typename
DeviceContext
,
typename
T
>
class
NCEKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
int
num_total_classes
=
context
.
Attr
<
int
>
(
"num_total_classes"
);
int
num_neg_samples
=
context
.
Attr
<
int
>
(
"num_neg_samples"
);
Sampler
*
sampler
;
Sampler
*
sampler
;
switch
(
sampler_type
)
{
case
0
:
{
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
...
...
@@ -87,11 +93,19 @@ class NCEKernel : public framework::OpKernel<T> {
break
;
}
case
2
:
{
auto
custom_dist
=
context
.
Input
<
Tensor
>
(
"CustomDistribution"
);
const
float
*
custom_dist_data
=
custom_dist
->
data
<
float
>
();
PADDLE_ENFORCE_EQ
(
custom_dist
->
numel
(),
num_total_classes
);
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
custom_dist_data
,
seed
);
auto
dist_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistProbs"
);
auto
dist_alias
=
context
.
Input
<
Tensor
>
(
"CustomDistAlias"
);
auto
dist_alias_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistAliasProbs"
);
PADDLE_ENFORCE_EQ
(
dist_probs
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias_probs
->
numel
(),
num_total_classes
);
const
float
*
probs_data
=
dist_probs
->
data
<
float
>
();
const
int
*
alias_data
=
dist_alias
->
data
<
int
>
();
const
float
*
alias_probs_data
=
dist_alias_probs
->
data
<
float
>
();
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
probs_data
,
alias_data
,
alias_probs_data
,
seed
);
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
...
...
@@ -99,17 +113,17 @@ class NCEKernel : public framework::OpKernel<T> {
PrepareSamples
<
DeviceContext
,
T
>
(
context
,
sampler
);
auto
sample_labels
=
context
.
Output
<
Tensor
>
(
"SampleLabels"
);
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
auto
sample_out
=
context
.
Output
<
Tensor
>
(
"SampleLogits"
);
T
*
sample_out_data
=
sample_out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
sample_out_data
=
sample_out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
const
T
*
sample_weight_data
=
nullptr
;
const
T
*
sample_weight_data
=
nullptr
;
if
(
sample_weight
!=
nullptr
)
{
sample_weight_data
=
sample_weight
->
data
<
T
>
();
}
auto
out
=
context
.
Output
<
Tensor
>
(
"Cost"
);
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
T
*
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int64_t
num_true_class
=
1
;
if
(
label
!=
nullptr
)
{
num_true_class
=
label
->
dims
()[
1
];
...
...
@@ -119,7 +133,7 @@ class NCEKernel : public framework::OpKernel<T> {
// forward bias
auto
bias
=
context
.
Input
<
Tensor
>
(
"Bias"
);
if
(
bias
!=
nullptr
)
{
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
bias_data
=
bias
->
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
sample_out_data
[
i
]
=
bias_data
[
sample_labels_data
[
i
]];
}
...
...
@@ -158,16 +172,16 @@ class NCEKernel : public framework::OpKernel<T> {
template
<
typename
DeviceContext
,
typename
T
>
class
NCEGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
d_out
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Cost"
));
const
T
*
d_out_data
=
d_out
->
data
<
T
>
();
const
T
*
d_out_data
=
d_out
->
data
<
T
>
();
auto
label
=
context
.
Input
<
Tensor
>
(
"Label"
);
auto
sample_out
=
context
.
Input
<
Tensor
>
(
"SampleLogits"
);
const
T
*
sample_out_data
=
sample_out
->
data
<
T
>
();
const
T
*
sample_out_data
=
sample_out
->
data
<
T
>
();
auto
sample_labels
=
context
.
Input
<
Tensor
>
(
"SampleLabels"
);
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
const
int64_t
*
sample_labels_data
=
sample_labels
->
data
<
int64_t
>
();
auto
sample_weight
=
context
.
Input
<
Tensor
>
(
"SampleWeight"
);
const
T
*
sample_weight_data
=
nullptr
;
const
T
*
sample_weight_data
=
nullptr
;
if
(
sample_weight
!=
nullptr
)
{
sample_weight_data
=
sample_weight
->
data
<
T
>
();
}
...
...
@@ -180,7 +194,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
int
sampler_type
=
context
.
Attr
<
int
>
(
"sampler"
);
int
seed
=
context
.
Attr
<
int
>
(
"seed"
);
Sampler
*
sampler
;
Sampler
*
sampler
;
switch
(
sampler_type
)
{
case
0
:
{
sampler
=
new
math
::
UniformSampler
(
num_total_classes
-
1
,
seed
);
...
...
@@ -191,11 +205,19 @@ class NCEGradKernel : public framework::OpKernel<T> {
break
;
}
case
2
:
{
auto
custom_dist
=
context
.
Input
<
Tensor
>
(
"CustomDistribution"
);
const
float
*
custom_dist_data
=
custom_dist
->
data
<
float
>
();
PADDLE_ENFORCE_EQ
(
custom_dist
->
numel
(),
num_total_classes
);
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
custom_dist_data
,
seed
);
auto
dist_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistProbs"
);
auto
dist_alias
=
context
.
Input
<
Tensor
>
(
"CustomDistAlias"
);
auto
dist_alias_probs
=
context
.
Input
<
Tensor
>
(
"CustomDistAliasProbs"
);
PADDLE_ENFORCE_EQ
(
dist_probs
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias
->
numel
(),
num_total_classes
);
PADDLE_ENFORCE_EQ
(
dist_alias_probs
->
numel
(),
num_total_classes
);
const
float
*
probs_data
=
dist_probs
->
data
<
float
>
();
const
int
*
alias_data
=
dist_alias
->
data
<
int
>
();
const
float
*
alias_probs_data
=
dist_alias_probs
->
data
<
float
>
();
sampler
=
new
math
::
CustomSampler
(
num_total_classes
-
1
,
probs_data
,
alias_data
,
alias_probs_data
,
seed
);
break
;
}
default:
{
PADDLE_THROW
(
"Unsupported SamplerType."
);
}
...
...
@@ -203,7 +225,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
// T b = 1. / num_total_classes * num_neg_samples;
Tensor
sample_grad
;
// tmp tensor
T
*
sample_grad_data
=
T
*
sample_grad_data
=
sample_grad
.
mutable_data
<
T
>
(
sample_labels
->
dims
(),
context
.
GetPlace
());
// backward cost
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
...
...
@@ -217,32 +239,105 @@ class NCEGradKernel : public framework::OpKernel<T> {
:
w
*
(
o
*
(
1
-
o
)
/
(
o
+
b
));
sample_grad_data
[
i
]
*=
d_out_data
[
sample_idx
];
}
// get d_bias
auto
d_bias
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
if
(
d_bias
!=
nullptr
)
{
T
*
d_bias_data
=
d_bias
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
d_bias
->
numel
(),
0.0
);
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
if
(
!
is_sparse
)
{
// get d_bias
auto
d_bias
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
if
(
d_bias
!=
nullptr
)
{
T
*
d_bias_data
=
d_bias
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
d_bias
->
numel
(),
0.0
);
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
sample_labels_data
[
i
]]
+=
sample_grad_data
[
i
];
}
}
// get d_w
auto
d_w
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
if
(
d_w
!=
nullptr
)
{
auto
d_w_data
=
d_w
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_w
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_w
);
auto
x_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Input"
)));
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_w_matrix
.
chip
(
sample_labels_data
[
i
],
0
)
+=
x_matrix
.
chip
(
static_cast
<
int
>
(
i
/
sample_labels
->
dims
()[
1
]),
0
)
*
sample_grad_data
[
i
];
}
}
}
else
{
std
::
vector
<
int64_t
>
labels
;
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
sample_labels_data
[
i
]]
+=
sample_grad_data
[
i
]
;
labels
.
push_back
(
sample_labels_data
[
i
])
;
}
}
// get d_w
auto
d_w
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
if
(
d_w
!=
nullptr
)
{
auto
d_w_data
=
d_w
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_w
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_w
);
std
::
set
<
T
>
st
(
labels
.
begin
(),
labels
.
end
());
labels
.
assign
(
st
.
begin
(),
st
.
end
());
auto
*
bias_var
=
context
.
InputVar
(
"Bias"
);
DDim
bias_dim
;
if
(
bias_var
->
IsType
<
LoDTensor
>
())
{
bias_dim
=
context
.
Input
<
LoDTensor
>
(
"Bias"
)
->
dims
();
}
else
if
(
bias_var
->
IsType
<
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"Bias"
);
bias_dim
=
table_t
->
value
().
dims
();
}
else
{
PADDLE_THROW
(
"The parameter Bias of a NCE_OP "
"must be either LoDTensor or SelectedRows"
);
}
auto
d_bias
=
context
.
Output
<
SelectedRows
>
(
framework
::
GradVarName
(
"Bias"
));
d_bias
->
set_rows
(
labels
);
d_bias
->
set_height
(
bias_dim
[
0
]);
d_bias
->
mutable_value
()
->
Resize
(
{
static_cast
<
int64_t
>
(
labels
.
size
()),
bias_dim
[
1
]});
T
*
d_bias_data
=
d_bias
->
mutable_value
()
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_bias_data
,
d_bias_data
+
labels
.
size
(),
0.0
);
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_bias_data
[
d_bias
->
Index
(
sample_labels_data
[
i
])]
+=
sample_grad_data
[
i
];
}
auto
*
table_var
=
context
.
InputVar
(
"Weight"
);
DDim
table_dim
;
if
(
table_var
->
IsType
<
LoDTensor
>
())
{
table_dim
=
context
.
Input
<
LoDTensor
>
(
"Weight"
)
->
dims
();
}
else
if
(
table_var
->
IsType
<
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
SelectedRows
>
(
"Weight"
);
table_dim
=
table_t
->
value
().
dims
();
}
else
{
PADDLE_THROW
(
"The parameter Weight of a NCE_OP "
"must be either LoDTensor or SelectedRows"
);
}
auto
d_w
=
context
.
Output
<
SelectedRows
>
(
framework
::
GradVarName
(
"Weight"
));
d_w
->
set_rows
(
labels
);
d_w
->
set_height
(
table_dim
[
0
]);
auto
*
d_table_value
=
d_w
->
mutable_value
();
d_table_value
->
Resize
(
{
static_cast
<
int64_t
>
(
labels
.
size
()),
table_dim
[
1
]});
auto
d_w_data
=
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_w_data
,
d_w_data
+
d_table_value
->
numel
(),
0.0
);
auto
d_w_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_table_value
);
auto
x_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Input"
)));
for
(
int64_t
i
=
0
;
i
<
sample_labels
->
numel
();
++
i
)
{
d_w_matrix
.
chip
(
sample_labels_data
[
i
]
,
0
)
+=
d_w_matrix
.
chip
(
d_w
->
Index
(
sample_labels_data
[
i
])
,
0
)
+=
x_matrix
.
chip
(
static_cast
<
int
>
(
i
/
sample_labels
->
dims
()[
1
]),
0
)
*
sample_grad_data
[
i
];
}
}
// get d_x
auto
d_x
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Input"
));
if
(
d_x
!=
nullptr
)
{
auto
*
d_x_data
=
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
d_x_data
=
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
fill
(
d_x_data
,
d_x_data
+
d_x
->
numel
(),
0.0
);
auto
d_x_matrix
=
EigenMatrix
<
T
>::
From
(
*
d_x
);
auto
w_matrix
=
EigenMatrix
<
T
>::
From
(
*
(
context
.
Input
<
Tensor
>
(
"Weight"
)));
...
...
@@ -251,6 +346,7 @@ class NCEGradKernel : public framework::OpKernel<T> {
w_matrix
.
chip
(
sample_labels_data
[
i
],
0
)
*
sample_grad_data
[
i
];
}
}
delete
sampler
;
}
};
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
87648f8e
...
...
@@ -20,12 +20,12 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#ifndef _WIN32
const
float
fraction_of_gpu_memory_to_use
=
0.92
f
;
const
expr
static
float
fraction_of_gpu_memory_to_use
=
0.92
f
;
#else
// fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle
const
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
const
expr
static
float
fraction_of_gpu_memory_to_use
=
0.5
f
;
#endif
DEFINE_double
(
fraction_of_gpu_memory_to_use
,
fraction_of_gpu_memory_to_use
,
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
87648f8e
...
...
@@ -29,8 +29,16 @@ limitations under the License. */
namespace
pybind11
{
namespace
detail
{
#if !defined(PYBIND11_HIDDEN)
#ifdef _WIN32
#define PYBIND11_HIDDEN __declspec(dllexport)
#else
#define PYBIND11_HIDDEN __attribute__((visibility("hidden")))
#endif
#endif
// Can be replaced by a generic lambda in C++14
struct
__attribute__
((
visibility
(
"hidden"
)))
paddle_variant_caster_visitor
struct
PYBIND11_HIDDEN
paddle_variant_caster_visitor
:
public
boost
::
static_visitor
<
handle
>
{
return_value_policy
policy
;
handle
parent
;
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
87648f8e
...
...
@@ -860,6 +860,12 @@ All parameter, weight, gradient are variables in Paddle.
self
.
remove_unnecessary_lock_
=
b
;
},
R"DOC(The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default False.)DOC"
)
.
def_property
(
"num_trainers"
,
[](
const
BuildStrategy
&
self
)
{
return
self
.
num_trainers_
;
},
[](
BuildStrategy
&
self
,
int
num_trainers
)
{
self
.
num_trainers_
=
num_trainers
;
})
.
def_property
(
"fuse_elewise_add_act_ops"
,
[](
const
BuildStrategy
&
self
)
{
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
87648f8e
...
...
@@ -4394,7 +4394,8 @@ def nce(input,
name
=
None
,
sampler
=
"uniform"
,
custom_dist
=
None
,
seed
=
0
):
seed
=
0
,
is_sparse
=
False
):
"""
${comment}
...
...
@@ -4420,11 +4421,12 @@ def nce(input,
sampler (str): The sampler used to sample class from negtive classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'.
custom_dist (
Variable): A tensor with shape [num_total_classes]
.
custom_dist (
float[]): A float[] with size=num_total_classes
.
It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probsbility of i-th class to be sampled.
default: None.
seed (int): The seed used in sampler. default: 0.
is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows.
Returns:
Variable: The output nce loss.
...
...
@@ -4476,12 +4478,7 @@ def nce(input,
shape
=
[
num_total_classes
,
dim
],
is_bias
=
False
,
dtype
=
input
.
dtype
)
inputs
=
{
'Input'
:
input
,
'Label'
:
label
,
'Weight'
:
w
,
'SampleWeight'
:
sample_weight
if
sample_weight
is
not
None
else
[]
}
inputs
=
{}
if
helper
.
bias_attr
:
b
=
helper
.
create_parameter
(
attr
=
helper
.
bias_attr
,
...
...
@@ -4493,18 +4490,10 @@ def nce(input,
sample_logits
=
helper
.
create_variable_for_type_inference
(
dtype
=
input
.
dtype
)
sample_labels
=
helper
.
create_variable_for_type_inference
(
dtype
=
label
.
dtype
)
if
num_neg_samples
is
None
:
num_neg_samples
=
10
else
:
num_neg_samples
=
int
(
num_neg_samples
)
inputs
=
{
'Input'
:
input
,
'Label'
:
label
,
'Weight'
:
w
,
'Bias'
:
b
,
'SampleWeight'
:
sample_weight
if
sample_weight
is
not
None
else
[]
}
inputs
[
'Input'
]
=
input
inputs
[
'Label'
]
=
label
inputs
[
'Weight'
]
=
w
inputs
[
'SampleWeight'
]
=
sample_weight
if
sample_weight
is
not
None
else
[]
if
sampler
==
"uniform"
:
sampler
=
0
...
...
@@ -4512,17 +4501,73 @@ def nce(input,
sampler
=
1
elif
sampler
==
"custom_dist"
:
assert
custom_dist
is
not
None
assert
isinstance
(
custom_dist
,
Variable
)
inputs
[
'CustomDistribution'
]
=
custom_dist
# assert isinstance(custom_dist, Variable)
custom_dist_len
=
len
(
custom_dist
)
alias_probs_
=
[
0
]
*
custom_dist_len
alias_
=
[
0
]
*
custom_dist_len
bigs
=
[]
littles
=
[]
for
i
in
range
(
custom_dist_len
):
normal_prob
=
custom_dist
[
i
]
*
custom_dist_len
if
normal_prob
-
1.0
>
1e-4
:
bigs
.
append
((
i
,
normal_prob
))
elif
1.0
-
normal_prob
>
1e-4
:
littles
.
append
((
i
,
normal_prob
))
else
:
alias_probs_
[
i
]
=
normal_prob
alias_
[
i
]
=
-
1
while
len
(
bigs
)
and
len
(
littles
):
big
=
bigs
.
pop
(
0
)
little
=
littles
.
pop
(
0
)
big_idx
=
big
[
0
]
big_prob
=
big
[
1
]
alias_probs_
[
little
[
0
]]
=
little
[
1
]
alias_
[
little
[
0
]]
=
big_idx
big_left
=
big
[
1
]
+
little
[
1
]
-
1
if
big_left
-
1.0
>
1e-4
:
bigs
.
append
((
big_idx
,
big_left
))
elif
1.0
-
big_left
>
1e-4
:
littles
.
append
((
big_idx
,
big_left
))
else
:
alias_probs_
[
big_idx
]
=
big_left
alias_
[
big_idx
]
=
-
1
if
len
(
bigs
):
big
=
bigs
.
pop
(
0
)
alias_probs_
[
big
[
0
]]
=
1.0
alias_
[
big
[
0
]]
=
-
1
if
len
(
littles
):
little
=
littles
.
pop
(
0
)
alias_probs_
[
little
[
0
]]
=
1.0
alias_
[
little
[
0
]]
=
-
1
probs
=
assign
(
input
=
np
.
array
(
custom_dist
).
astype
(
'float32'
))
custom_alias
=
assign
(
input
=
np
.
array
(
alias_
).
astype
(
'int32'
))
custom_alias_probs
=
assign
(
input
=
np
.
array
(
alias_probs_
).
astype
(
'float32'
))
inputs
[
'CustomDistProbs'
]
=
probs
inputs
[
'CustomDistAlias'
]
=
custom_alias
inputs
[
'CustomDistAliasProbs'
]
=
custom_alias_probs
sampler
=
2
else
:
raise
Exception
(
"Unsupported sampler type."
)
if
num_neg_samples
is
None
:
num_neg_samples
=
10
else
:
num_neg_samples
=
int
(
num_neg_samples
)
attrs
=
{
'num_total_classes'
:
int
(
num_total_classes
),
'num_neg_samples'
:
num_neg_samples
,
'seed'
:
seed
,
'sampler'
:
sampler
'sampler'
:
sampler
,
'is_sparse'
:
is_sparse
}
helper
.
append_op
(
...
...
@@ -6525,7 +6570,7 @@ def crop(x, shape=None, offsets=None, name=None):
helper
=
LayerHelper
(
'crop'
,
**
locals
())
if
not
(
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
)
or
\
isinstance
(
shape
,
Variable
)):
isinstance
(
shape
,
Variable
)):
raise
ValueError
(
"The shape should be a list, tuple or Variable."
)
if
offsets
is
None
:
...
...
@@ -6647,7 +6692,7 @@ def affine_grid(theta, out_shape, name=None):
helper
=
LayerHelper
(
'affine_grid'
)
if
not
(
isinstance
(
out_shape
,
list
)
or
isinstance
(
out_shape
,
tuple
)
or
\
isinstance
(
out_shape
,
Variable
)):
isinstance
(
out_shape
,
Variable
)):
raise
ValueError
(
"The out_shape should be a list, tuple or Variable."
)
if
not
isinstance
(
theta
,
Variable
):
...
...
@@ -6888,6 +6933,13 @@ def elu(x, alpha=1.0, name=None):
Returns:
output(${out_type}): ${out_comment}
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[3,10,32,32], dtype="float32")
y = fluid.layers.elu(x, alpha=0.2)
"""
helper
=
LayerHelper
(
'elu'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
...
...
@@ -6911,6 +6963,13 @@ def relu6(x, threshold=6.0, name=None):
Returns:
output(${out_type}): ${out_comment}
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[3,10,32,32], dtype="float32")
y = fluid.layers.relu6(x, threshold=6.0)
"""
helper
=
LayerHelper
(
'relu6'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
...
...
@@ -6934,6 +6993,13 @@ def pow(x, factor=1.0, name=None):
Returns:
output(${out_type}): ${out_comment}
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[3,10,32,32], dtype="float32")
y = fluid.layers.pow(x, factor=2.0)
"""
helper
=
LayerHelper
(
'pow'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
...
...
@@ -6958,6 +7024,13 @@ def stanh(x, scale_a=2.0 / 3.0, scale_b=1.7159, name=None):
Returns:
output(${out_type}): ${out_comment}
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[3,10,32,32], dtype="float32")
y = fluid.layers.stanh(x, scale_a=0.67, scale_b=1.72)
"""
helper
=
LayerHelper
(
'stanh'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
...
...
@@ -6983,6 +7056,13 @@ def hard_sigmoid(x, slope=0.2, offset=0.5, name=None):
Returns:
output(${out_type}): ${out_comment}
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[3,10,32,32], dtype="float32")
y = fluid.layers.hard_sigmoid(x, slope=0.3, offset=0.8)
"""
helper
=
LayerHelper
(
'hard_sigmoid'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
...
...
@@ -7007,6 +7087,13 @@ def swish(x, beta=1.0, name=None):
Returns:
output(${out_type}): ${out_comment}
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[3,10,32,32], dtype="float32")
y = fluid.layers.swish(x, beta=2.0)
"""
helper
=
LayerHelper
(
'swish'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
87648f8e
...
...
@@ -124,16 +124,11 @@ class ParallelExecutor(object):
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
exec_strategy
.
num_threads
=
cpu_num
*
2
# Set 1 thread num under nccl2 distribute
# env to make sure all gpus run ops in same order.
if
num_trainers
>
1
:
assert
(
use_cuda
)
# FIXME(gongwb): avoid this set.
exec_strategy
.
num_threads
=
1
if
build_strategy
is
None
:
build_strategy
=
BuildStrategy
()
build_strategy
.
num_trainers
=
num_trainers
main
=
main_program
main
=
main
if
main
else
framework
.
default_main_program
()
if
scope
==
None
:
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
87648f8e
...
...
@@ -63,7 +63,7 @@ function(py_test_modules TARGET_NAME)
set
(
multiValueArgs MODULES DEPS ENVS
)
cmake_parse_arguments
(
py_test_modules
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND env PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_modules_ENVS
}
COMMAND
${
CMAKE_COMMAND
}
-E
env PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_modules_ENVS
}
${
PYTHON_EXECUTABLE
}
${
PADDLE_SOURCE_DIR
}
/tools/test_runner.py
${
py_test_modules_MODULES
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
if
(
py_test_modules_SERIAL
)
...
...
python/paddle/fluid/tests/unittests/test_nce.py
浏览文件 @
87648f8e
...
...
@@ -14,8 +14,12 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.initializer
as
initializer
from
op_test
import
OpTest
...
...
@@ -59,7 +63,7 @@ def nce(input, weight, bias, sample_weight, labels, num_classes,
class
TestNCE
(
OpTest
):
def
generate_data
(
self
,
dim
,
batch_size
,
num_classes
,
num_true_class
,
num_neg_samples
):
num_neg_samples
,
is_sparse
):
input
=
np
.
random
.
randn
(
batch_size
,
dim
).
astype
(
np
.
float32
)
weight
=
np
.
random
.
randn
(
num_classes
,
dim
).
astype
(
np
.
float32
)
bias
=
np
.
random
.
randn
(
num_classes
).
astype
(
np
.
float32
)
...
...
@@ -70,7 +74,8 @@ class TestNCE(OpTest):
'num_neg_samples'
:
num_neg_samples
,
'custom_neg_classes'
:
list
(
range
(
num_neg_samples
)),
'seed'
:
0
,
'sampler'
:
0
'sampler'
:
0
,
'is_sparse'
:
is_sparse
}
self
.
inputs
=
{
'Input'
:
input
,
...
...
@@ -81,7 +86,7 @@ class TestNCE(OpTest):
}
def
set_data
(
self
):
self
.
generate_data
(
5
,
5
,
4
,
1
,
2
)
self
.
generate_data
(
5
,
5
,
4
,
1
,
2
,
False
)
def
compute
(
self
):
out
=
nce
(
self
.
inputs
[
'Input'
],
self
.
inputs
[
'Weight'
],
...
...
@@ -107,9 +112,110 @@ class TestNCE(OpTest):
[
"Input"
,
"Weight"
,
"Bias"
],
"Cost"
,
max_relative_error
=
0.02
)
class
TestNCECase1
(
TestNCE
):
class
TestNCECase1
Tensor
(
TestNCE
):
def
set_data
(
self
):
self
.
generate_data
(
10
,
20
,
10
,
2
,
5
)
self
.
generate_data
(
10
,
20
,
10
,
2
,
5
,
False
)
class
TestNCECase1SelectedRows
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
base_lr
=
0.0001
self
.
batch_size
=
8
@
staticmethod
def
get_place
():
place
=
fluid
.
core
.
CPUPlace
()
return
place
@
staticmethod
def
get_train_data
(
batch_size
):
batchs
=
[]
for
i
in
range
(
batch_size
):
input
=
np
.
random
.
randn
(
batch_size
,
10
).
astype
(
np
.
float32
)
labels
=
np
.
random
.
randint
(
0
,
20
,
(
batch_size
,
1
))
batchs
.
append
([
input
,
labels
])
return
batchs
def
get_optimizer
(
self
):
# SGD optimizer
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
self
.
base_lr
)
return
optimizer
def
train_network
(
self
,
num_total_classes
,
num_neg_samples
,
sampler
,
custom_dist
,
is_sparse
):
input
=
fluid
.
layers
.
data
(
name
=
"input"
,
shape
=
[
10
],
dtype
=
"float32"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
w_param
=
fluid
.
default_main_program
().
global_block
().
create_parameter
(
shape
=
[
num_total_classes
,
10
],
dtype
=
'float32'
,
name
=
'nce_w'
,
initializer
=
initializer
.
ConstantInitializer
())
b_param
=
fluid
.
default_main_program
().
global_block
().
create_parameter
(
shape
=
[
num_total_classes
,
1
],
dtype
=
'float32'
,
name
=
'nce_b'
,
initializer
=
initializer
.
ConstantInitializer
())
cost
=
fluid
.
layers
.
nce
(
input
=
input
,
label
=
label
,
num_total_classes
=
num_total_classes
,
sampler
=
sampler
,
custom_dist
=
custom_dist
,
sample_weight
=
None
,
param_attr
=
'nce_w'
,
bias_attr
=
'nce_b'
,
seed
=
1
,
num_neg_samples
=
num_neg_samples
,
is_sparse
=
is_sparse
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
# optimizer
optimizer
=
self
.
get_optimizer
()
optimizer
.
minimize
(
avg_cost
)
return
[
avg_cost
,
[
input
,
label
]]
def
test_input_is_selected_rows
(
self
):
place
=
self
.
get_place
()
exe
=
fluid
.
Executor
(
place
)
data
=
self
.
get_train_data
(
self
.
batch_size
)
nid_freq_arr
=
np
.
random
.
dirichlet
(
np
.
ones
(
20
)
*
1000
).
astype
(
'float32'
)
rets
=
[]
# for dense
dense_scope
=
fluid
.
core
.
Scope
()
dense_startup_program
=
fluid
.
framework
.
Program
()
dense_train_program
=
fluid
.
framework
.
Program
()
with
fluid
.
scope_guard
(
dense_scope
):
with
fluid
.
program_guard
(
dense_train_program
,
dense_startup_program
):
cost
,
feeds
=
self
.
train_network
(
20
,
5
,
"custom_dist"
,
nid_freq_arr
.
tolist
(),
False
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
exe
.
run
(
dense_startup_program
)
loss_val
=
exe
.
run
(
dense_train_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
.
name
])
rets
.
append
(
np
.
mean
(
loss_val
))
# for sparse
sparse_scope
=
fluid
.
core
.
Scope
()
sparse_startup_program
=
fluid
.
framework
.
Program
()
sparse_train_program
=
fluid
.
framework
.
Program
()
with
fluid
.
scope_guard
(
sparse_scope
):
with
fluid
.
program_guard
(
sparse_train_program
,
sparse_startup_program
):
cost
,
feeds
=
self
.
train_network
(
20
,
5
,
"custom_dist"
,
nid_freq_arr
.
tolist
(),
True
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
exe
.
run
(
sparse_startup_program
)
loss_val
=
exe
.
run
(
sparse_train_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
cost
.
name
])
rets
.
append
(
np
.
mean
(
loss_val
))
self
.
assertEqual
(
rets
[
0
],
rets
[
1
])
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录