Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
85ddb5c7
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
85ddb5c7
编写于
9月 27, 2018
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into optimize-opyreader
上级
accf3f75
43a3af86
变更
55
隐藏空白更改
内联
并排
Showing
55 changed file
with
2499 addition
and
690 deletion
+2499
-690
doc/CMakeLists.txt
doc/CMakeLists.txt
+1
-3
paddle/fluid/API.spec
paddle/fluid/API.spec
+8
-7
paddle/fluid/framework/details/cow_ptr.h
paddle/fluid/framework/details/cow_ptr.h
+19
-61
paddle/fluid/framework/details/cow_ptr_test.cc
paddle/fluid/framework/details/cow_ptr_test.cc
+8
-0
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+16
-0
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+362
-241
paddle/fluid/framework/op_info.h
paddle/fluid/framework/op_info.h
+6
-11
paddle/fluid/framework/selected_rows_test.cc
paddle/fluid/framework/selected_rows_test.cc
+8
-1
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+3
-2
paddle/fluid/operators/detection/CMakeLists.txt
paddle/fluid/operators/detection/CMakeLists.txt
+7
-1
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+4
-3
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+449
-0
paddle/fluid/operators/detection_map_op.h
paddle/fluid/operators/detection_map_op.h
+15
-13
paddle/fluid/operators/extract_rows_op.cc
paddle/fluid/operators/extract_rows_op.cc
+1
-1
paddle/fluid/operators/lookup_table_op.cu
paddle/fluid/operators/lookup_table_op.cu
+2
-4
paddle/fluid/operators/math/selected_rows_functor.cu
paddle/fluid/operators/math/selected_rows_functor.cu
+4
-6
paddle/fluid/operators/read_op.cc
paddle/fluid/operators/read_op.cc
+0
-2
paddle/fluid/operators/sampling_id_op.cc
paddle/fluid/operators/sampling_id_op.cc
+8
-7
paddle/fluid/operators/scale_op.cc
paddle/fluid/operators/scale_op.cc
+4
-2
paddle/fluid/operators/sgd_op.cu
paddle/fluid/operators/sgd_op.cu
+1
-1
paddle/fluid/operators/shrink_rnn_memory_op.cc
paddle/fluid/operators/shrink_rnn_memory_op.cc
+8
-21
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+7
-7
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+0
-5
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+0
-5
paddle/fluid/platform/for_range.h
paddle/fluid/platform/for_range.h
+10
-29
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+0
-17
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+0
-3
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+17
-6
python/paddle/dataset/common.py
python/paddle/dataset/common.py
+6
-4
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+6
-1
python/paddle/fluid/contrib/op_frequence.py
python/paddle/fluid/contrib/op_frequence.py
+104
-0
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+3
-3
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+0
-1
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+263
-104
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+2
-7
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+2
-1
python/paddle/fluid/tests/unittests/dist_ctr.py
python/paddle/fluid/tests/unittests/dist_ctr.py
+109
-0
python/paddle/fluid/tests/unittests/dist_ctr_reader.py
python/paddle/fluid/tests/unittests/dist_ctr_reader.py
+172
-0
python/paddle/fluid/tests/unittests/dist_mnist.py
python/paddle/fluid/tests/unittests/dist_mnist.py
+3
-3
python/paddle/fluid/tests/unittests/dist_simnet_bow.py
python/paddle/fluid/tests/unittests/dist_simnet_bow.py
+238
-0
python/paddle/fluid/tests/unittests/dist_text_classification.py
.../paddle/fluid/tests/unittests/dist_text_classification.py
+231
-0
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+8
-4
python/paddle/fluid/tests/unittests/dist_word2vec.py
python/paddle/fluid/tests/unittests/dist_word2vec.py
+3
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+1
-1
python/paddle/fluid/tests/unittests/test_detection_map_op.py
python/paddle/fluid/tests/unittests/test_detection_map_op.py
+3
-2
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+157
-92
python/paddle/fluid/tests/unittests/test_dist_ctr.py
python/paddle/fluid/tests/unittests/test_dist_ctr.py
+31
-0
python/paddle/fluid/tests/unittests/test_dist_mnist.py
python/paddle/fluid/tests/unittests/test_dist_mnist.py
+2
-2
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+3
-1
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
+79
-0
python/paddle/fluid/tests/unittests/test_dist_text_classification.py
...le/fluid/tests/unittests/test_dist_text_classification.py
+40
-0
python/paddle/fluid/tests/unittests/test_dist_word2vec.py
python/paddle/fluid/tests/unittests/test_dist_word2vec.py
+1
-1
python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
...addle/fluid/tests/unittests/test_generate_proposals_op.py
+1
-2
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+60
-1
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+3
-1
未找到文件。
doc/CMakeLists.txt
浏览文件 @
85ddb5c7
add_custom_target
(
paddle_apis ALL
DEPENDS paddle_v2_apis
paddle_fluid_apis
)
DEPENDS paddle_v2_apis
)
add_custom_target
(
paddle_docs ALL
DEPENDS paddle_v2_docs paddle_v2_docs_cn
paddle_fluid_docs paddle_fluid_docs_cn
paddle_mobile_docs paddle_mobile_docs_cn
)
add_subdirectory
(
v2
)
add_subdirectory
(
fluid
)
add_subdirectory
(
mobile
)
paddle/fluid/API.spec
浏览文件 @
85ddb5c7
...
...
@@ -153,6 +153,13 @@ paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False))
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.sum ArgSpec(args=['x', 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
...
...
@@ -224,13 +231,6 @@ paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwarg
paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.gaussian_random ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sampling_id ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
...
...
@@ -298,6 +298,7 @@ paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs
paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/details/cow_ptr.h
浏览文件 @
85ddb5c7
...
...
@@ -20,79 +20,37 @@ namespace paddle {
namespace
framework
{
namespace
details
{
// Change it to thread safe flags if needed.
class
ThreadUnsafeOwnershipFlags
{
template
<
class
T
>
class
COWPtr
{
public:
explicit
ThreadUnsafeOwnershipFlags
(
bool
flag
)
:
flag_
(
flag
)
{}
ThreadUnsafeOwnershipFlags
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
&
operator
=
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
(
ThreadUnsafeOwnershipFlags
&&
other
)
=
default
;
void
SetOwnership
(
bool
flag
)
{
flag_
=
flag
;
}
// Invoke the callback if it is not owned.
template
<
typename
Callback
>
void
AcquireOwnershipOnce
(
Callback
acquire
)
{
if
(
!
flag_
)
{
acquire
();
flag_
=
true
;
}
}
typedef
std
::
shared_ptr
<
T
>
RefPtr
;
private:
bool
flag_
;
};
RefPtr
m_sp
;
// Copy-On-Write pointer.
// It will hold a T* pointer, and only copy once when `MutableData` is invoked.
//
// The template parameter OwnershipFlags should have:
// * a constructor takes a bool. True if own.
// * SetOwnership(bool flag).
// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not
// owned.
//
// https://en.wikipedia.org/wiki/Copy-on-write
template
<
typename
T
,
typename
OwnershipFlags
=
ThreadUnsafeOwnershipFlags
>
class
COWPtr
{
public:
// Ctor from raw pointer.
explicit
COWPtr
(
T
*
ptr
)
:
payload_
(
ptr
),
ownership_
{
true
}
{}
COWPtr
()
:
m_sp
(
nullptr
)
{}
explicit
COWPtr
(
T
*
t
)
:
m_sp
(
t
)
{}
// Move methods. Steal ownership from origin
COWPtr
(
COWPtr
&&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
std
::
move
(
other
.
ownership_
)}
{}
COWPtr
&
operator
=
(
COWPtr
&&
origin
)
=
default
;
const
T
&
Data
()
const
{
return
*
m_sp
;
}
// Copy methods. Not own payload
COWPtr
(
const
COWPtr
&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
false
}
{}
COWPtr
&
operator
=
(
const
COWPtr
&
other
)
{
payload_
=
other
.
payload_
;
ownership_
.
SetOwnership
(
false
);
return
*
this
;
}
// Access read only data.
const
T
&
Data
()
const
{
return
*
payload_
;
}
// Access mutable data. If the data is not owned, the data will be copied
// before.
T
*
MutableData
()
{
ownership_
.
AcquireOwnershipOnce
(
[
this
]
{
payload_
.
reset
(
new
T
(
*
payload_
));
});
return
payload_
.
get
();
DetachIfNotUnique
();
return
m_sp
.
get
();
}
private:
// Actual data pointer.
std
::
shared_ptr
<
T
>
payload_
;
void
DetachIfNotUnique
()
{
T
*
tmp
=
m_sp
.
get
();
if
(
!
(
tmp
==
nullptr
||
m_sp
.
unique
()))
{
Detach
();
}
}
// Ownership flag.
OwnershipFlags
ownership_
;
void
Detach
()
{
T
*
tmp
=
m_sp
.
get
();
m_sp
=
RefPtr
(
new
T
(
*
tmp
));
}
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/cow_ptr_test.cc
浏览文件 @
85ddb5c7
...
...
@@ -30,6 +30,14 @@ TEST(COWPtr, all) {
ASSERT_EQ
(
ptr2
.
Data
(),
10
);
}
TEST
(
COWPtr
,
change_old
)
{
COWPtr
<
int
>
ptr
(
new
int
{
0
});
COWPtr
<
int
>
ptr2
=
ptr
;
*
ptr
.
MutableData
()
=
10
;
ASSERT_EQ
(
ptr2
.
Data
(),
0
);
ASSERT_EQ
(
ptr
.
Data
(),
10
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
85ddb5c7
...
...
@@ -257,6 +257,22 @@ std::unique_ptr<ir::Graph> AttentionLSTMFusePass::ApplyImpl(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PDPattern
external_pattern
,
subblock_pattern
;
// Use the following variables to tell whether this model is RNN1.
// This fuse can only works on the RNN1 model.
std
::
unordered_set
<
std
::
string
>
specified_vars
({
"data_lod_attention"
,
"cell_init"
,
"hidden_init"
,
"data"
,
"week"
,
"minute"
});
int
count
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
specified_vars
.
count
(
node
->
Name
()))
{
++
count
;
}
}
if
(
count
<
specified_vars
.
size
())
{
return
graph
;
}
// Continue to fuse.
FindWhileOp
(
graph
.
get
());
return
graph
;
}
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
85ddb5c7
...
...
@@ -17,10 +17,13 @@
#include <algorithm>
#include <initializer_list>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h"
#include "glog/logging.h"
...
...
@@ -28,206 +31,436 @@ namespace paddle {
namespace
framework
{
#if defined(PADDLE_WITH_CUDA)
namespace
details
{
struct
CUDABuffer
{
void
*
data_
{
nullptr
};
size_t
size_
{
0
};
platform
::
CUDAPlace
place_
;
CUDABuffer
()
{}
CUDABuffer
(
platform
::
Place
place
,
size_t
size
)
:
size_
(
size
),
place_
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
data_
=
memory
::
Alloc
(
place_
,
size
);
}
~
CUDABuffer
()
{
ClearMemory
();
}
CUDABuffer
(
const
CUDABuffer
&
o
)
=
delete
;
CUDABuffer
&
operator
=
(
const
CUDABuffer
&
o
)
=
delete
;
void
Resize
(
platform
::
Place
place
,
size_t
size
)
{
ClearMemory
();
place_
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
data_
=
memory
::
Alloc
(
place_
,
size
);
PADDLE_ENFORCE_NOT_NULL
(
data_
);
size_
=
size
;
}
void
Swap
(
CUDABuffer
&
o
)
{
std
::
swap
(
data_
,
o
.
data_
);
std
::
swap
(
place_
,
o
.
place_
);
std
::
swap
(
size_
,
o
.
size_
);
}
private:
void
ClearMemory
()
const
{
if
(
data_
!=
nullptr
)
{
memory
::
Free
(
place_
,
data_
);
}
}
};
}
// namespace details
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template
<
typename
T
>
class
Vector
{
public:
using
value_type
=
T
;
using
iterator
=
typename
std
::
vector
<
T
>::
iterator
;
using
const_iterator
=
typename
std
::
vector
<
T
>::
const_iterator
;
// Default ctor. Create empty Vector
Vector
()
{
InitEmpty
();
}
private:
// The actual class to implement vector logic
class
VectorData
{
public:
VectorData
()
:
flag_
(
kDataInCPU
)
{}
VectorData
(
size_t
count
,
const
T
&
value
)
:
cpu_
(
count
,
value
),
flag_
(
kDataInCPU
)
{}
VectorData
(
std
::
initializer_list
<
T
>
init
)
:
cpu_
(
init
),
flag_
(
kDataInCPU
)
{}
template
<
typename
U
>
explicit
VectorData
(
const
std
::
vector
<
U
>
&
dat
)
:
cpu_
(
dat
),
flag_
(
kDataInCPU
)
{}
~
VectorData
()
{}
VectorData
(
const
VectorData
&
o
)
{
o
.
ImmutableCPU
();
cpu_
=
o
.
cpu_
;
flag_
=
kDataInCPU
;
}
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
InitEmpty
();
if
(
count
!=
0
)
{
resize
(
count
);
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
ptr
[
i
]
=
value
;
VectorData
&
operator
=
(
const
VectorData
&
o
)
{
o
.
ImmutableCPU
();
cpu_
=
o
.
cpu_
;
flag_
=
kDataInCPU
;
details
::
CUDABuffer
null
;
gpu_
.
Swap
(
null
);
return
*
this
;
}
T
&
operator
[](
size_t
i
)
{
MutableCPU
();
return
cpu_
[
i
];
}
const
T
&
operator
[](
size_t
i
)
const
{
ImmutableCPU
();
return
cpu_
[
i
];
}
size_t
size
()
const
{
return
cpu_
.
size
();
}
iterator
begin
()
{
MutableCPU
();
return
cpu_
.
begin
();
}
iterator
end
()
{
MutableCPU
();
return
cpu_
.
end
();
}
T
&
front
()
{
MutableCPU
();
return
cpu_
.
front
();
}
T
&
back
()
{
MutableCPU
();
return
cpu_
.
back
();
}
const_iterator
begin
()
const
{
ImmutableCPU
();
return
cpu_
.
begin
();
}
const_iterator
end
()
const
{
ImmutableCPU
();
return
cpu_
.
end
();
}
const
T
&
back
()
const
{
ImmutableCPU
();
return
cpu_
.
back
();
}
T
*
data
()
{
return
&
(
*
this
)[
0
];
}
const
T
*
data
()
const
{
return
&
(
*
this
)[
0
];
}
const
T
&
front
()
const
{
ImmutableCPU
();
return
cpu_
.
front
();
}
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
MutableCPU
();
cpu_
.
assign
(
begin
,
end
);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void
push_back
(
T
elem
)
{
MutableCPU
();
cpu_
.
push_back
(
elem
);
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
MutableCPU
();
auto
out_it
=
std
::
back_inserter
<
std
::
vector
<
T
>>
(
this
->
cpu_
);
std
::
copy
(
begin
,
end
,
out_it
);
}
// resize the vector
void
resize
(
size_t
size
)
{
MutableCPU
();
cpu_
.
resize
(
size
);
}
// get cuda ptr. immutable
const
T
*
CUDAData
(
platform
::
Place
place
)
const
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
"CUDA Data must on CUDA place"
);
ImmutableCUDA
(
place
);
return
reinterpret_cast
<
T
*>
(
gpu_
.
data_
);
}
// get cuda ptr. mutable
T
*
CUDAMutableData
(
platform
::
Place
place
)
{
const
T
*
ptr
=
CUDAData
(
place
);
flag_
=
kDirty
|
kDataInCUDA
;
return
const_cast
<
T
*>
(
ptr
);
}
// clear
void
clear
()
{
cpu_
.
clear
();
flag_
=
kDirty
|
kDataInCPU
;
}
size_t
capacity
()
const
{
return
cpu_
.
capacity
();
}
// reserve data
void
reserve
(
size_t
size
)
const
{
cpu_
.
reserve
(
size
);
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator
std
::
vector
<
T
>
()
const
{
ImmutableCPU
();
return
cpu_
;
}
bool
operator
==
(
const
VectorData
&
other
)
const
{
ImmutableCPU
();
other
.
ImmutableCPU
();
return
cpu_
==
other
.
cpu_
;
}
std
::
mutex
&
Mutex
()
const
{
return
mtx_
;
}
std
::
unique_ptr
<
platform
::
CUDAPlace
>
CUDAPlace
()
const
{
if
(
gpu_
.
data_
==
nullptr
)
{
return
nullptr
;
}
else
{
return
std
::
unique_ptr
<
platform
::
CUDAPlace
>
(
new
platform
::
CUDAPlace
(
gpu_
.
place_
));
}
}
}
// Ctor with init_list
Vector
(
std
::
initializer_list
<
T
>
init
)
{
if
(
init
.
size
()
==
0
)
{
InitEmpty
();
}
else
{
InitByIter
(
init
.
size
(),
init
.
begin
(),
init
.
end
());
private:
enum
DataFlag
{
kDataInCPU
=
0x01
,
kDataInCUDA
=
0x02
,
// kDirty means the data has been changed in one device.
kDirty
=
0x10
};
void
CopyToCPU
()
const
{
// COPY GPU Data To CPU
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
Place
(
gpu_
.
place_
)));
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
.
data_
;
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
gpu_
.
place_
,
src
,
gpu_
.
size_
,
stream
);
dev_ctx
->
Wait
();
}
void
MutableCPU
()
{
if
(
IsInCUDA
()
&&
IsDirty
())
{
CopyToCPU
();
}
flag_
=
kDirty
|
kDataInCPU
;
}
}
void
ImmutableCUDA
(
platform
::
Place
place
)
const
{
if
(
IsDirty
())
{
if
(
IsInCPU
())
{
CopyCPUDataToCUDA
(
place
);
UnsetFlag
(
kDirty
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
IsInCUDA
()
&&
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
PADDLE_THROW
(
"This situation should not happen"
);
// Still dirty
}
else
{
// Dirty && DataInCUDA && Device is same
// Do nothing
}
}
else
{
if
(
!
IsInCUDA
())
{
// Even data is not dirty. However, data is not in CUDA. Copy data.
CopyCPUDataToCUDA
(
place
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
PADDLE_THROW
(
"This situation should not happen."
);
}
else
{
// Not Dirty && DataInCUDA && Device is same
// Do nothing.
}
}
}
void
CopyCPUDataToCUDA
(
const
platform
::
Place
&
place
)
const
{
void
*
src
=
cpu_
.
data
();
gpu_
.
Resize
(
place
,
cpu_
.
size
()
*
sizeof
(
T
));
void
*
dst
=
gpu_
.
data_
;
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
gpu_
.
place_
,
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
.
size_
,
stream
);
}
void
ImmutableCPU
()
const
{
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or
// CPU has no data.
CopyToCPU
();
UnsetFlag
(
kDirty
);
}
SetFlag
(
kDataInCPU
);
}
void
UnsetFlag
(
int
flag
)
const
{
flag_
&=
~
flag
;
}
void
SetFlag
(
int
flag
)
const
{
flag_
|=
flag
;
}
bool
IsDirty
()
const
{
return
flag_
&
kDirty
;
}
bool
IsInCUDA
()
const
{
return
flag_
&
kDataInCUDA
;
}
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
details
::
CUDABuffer
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
};
public:
// Default ctor. Create empty Vector
Vector
()
:
m_
(
new
VectorData
())
{}
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
:
m_
(
new
VectorData
(
count
,
value
))
{}
// Ctor with init_list
Vector
(
std
::
initializer_list
<
T
>
init
)
:
m_
(
new
VectorData
(
init
))
{}
// implicit cast from std::vector.
template
<
typename
U
>
Vector
(
const
std
::
vector
<
U
>
&
dat
)
{
// NOLINT
if
(
dat
.
size
()
==
0
)
{
InitEmpty
();
}
else
{
InitByIter
(
dat
.
size
(),
dat
.
begin
(),
dat
.
end
());
}
Vector
(
const
std
::
vector
<
U
>
&
dat
)
:
m_
(
new
VectorData
(
dat
))
{
// NOLINT
}
// Copy ctor
Vector
(
const
Vector
<
T
>
&
other
)
{
this
->
operator
=
(
other
)
;
}
Vector
(
const
Vector
<
T
>
&
other
)
{
m_
=
other
.
m_
;
}
// Copy operator
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
if
(
other
.
size
()
!=
0
)
{
this
->
InitByIter
(
other
.
size
(),
other
.
begin
(),
other
.
end
());
}
else
{
InitEmpty
();
}
m_
=
other
.
m_
;
return
*
this
;
}
// Move ctor
Vector
(
Vector
<
T
>
&&
other
)
{
this
->
size_
=
other
.
size_
;
this
->
flag_
=
other
.
flag_
;
if
(
other
.
cuda_vec_
.
memory_size
())
{
this
->
cuda_vec_
.
ShareDataWith
(
other
.
cuda_vec_
);
}
if
(
other
.
cpu_vec_
.
memory_size
())
{
this
->
cpu_vec_
.
ShareDataWith
(
other
.
cpu_vec_
);
}
}
Vector
(
Vector
<
T
>
&&
other
)
{
m_
=
std
::
move
(
other
.
m_
);
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
MutableCPU
();
return
const_cast
<
T
*>
(
cpu_vec_
.
data
<
T
>
())[
i
];
}
T
&
operator
[](
size_t
i
)
{
return
(
*
m_
.
MutableData
())[
i
];
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
ImmutableCPU
();
return
cpu_vec_
.
data
<
T
>
()[
i
];
}
const
T
&
operator
[](
size_t
i
)
const
{
return
m_
.
Data
()[
i
];
}
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
size_
;
}
size_t
size
()
const
{
return
m_
.
Data
().
size
()
;
}
T
*
begin
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
iterator
begin
()
{
return
m_
.
MutableData
()
->
begin
(
);
}
T
*
end
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
}
iterator
end
()
{
return
m_
.
MutableData
()
->
end
();
}
T
&
front
()
{
return
*
begin
();
}
T
&
front
()
{
return
m_
.
MutableData
()
->
front
();
}
T
&
back
()
{
auto
it
=
end
();
--
it
;
return
*
it
;
}
T
&
back
()
{
return
m_
.
MutableData
()
->
back
();
}
const
T
*
begin
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
const_iterator
begin
()
const
{
return
m_
.
Data
().
begin
();
}
const
T
*
end
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
}
const_iterator
end
()
const
{
return
m_
.
Data
().
end
();
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
_iterator
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
_iterator
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
auto
it
=
end
();
--
it
;
return
*
it
;
}
const
T
&
back
()
const
{
return
m_
.
Data
().
back
();
}
T
*
data
()
{
return
begin
();
}
T
*
data
()
{
return
m_
.
MutableData
()
->
data
();
}
const
T
*
data
()
const
{
return
begin
();
}
const
T
*
data
()
const
{
return
m_
.
Data
().
data
();
}
const
T
&
front
()
const
{
return
*
begin
();
}
const
T
&
front
()
const
{
return
m_
.
Data
().
front
();
}
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
InitByIter
(
end
-
begin
,
begin
,
end
);
m_
.
MutableData
()
->
assign
(
begin
,
end
);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void
push_back
(
T
elem
)
{
if
(
size_
+
1
>
capacity
())
{
reserve
((
size_
+
1
)
<<
1
);
}
*
end
()
=
elem
;
++
size_
;
}
void
push_back
(
T
elem
)
{
m_
.
MutableData
()
->
push_back
(
elem
);
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
size_t
pre_size
=
size_
;
resize
(
pre_size
+
(
end
-
begin
));
T
*
ptr
=
this
->
begin
()
+
pre_size
;
for
(;
begin
<
end
;
++
begin
,
++
ptr
)
{
*
ptr
=
*
begin
;
}
m_
.
MutableData
()
->
Extend
(
begin
,
end
);
}
// resize the vector
void
resize
(
size_t
size
)
{
if
(
size
+
1
<=
capacity
())
{
size_
=
size
;
}
else
{
MutableCPU
();
Tensor
cpu_tensor
;
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
cpu_tensor
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
const
T
*
old_ptr
=
cpu_vec_
.
memory_size
()
==
0
?
nullptr
:
cpu_vec_
.
data
<
T
>
();
if
(
old_ptr
!=
nullptr
)
{
std
::
copy
(
old_ptr
,
old_ptr
+
size_
,
ptr
);
}
size_
=
size
;
cpu_vec_
.
ShareDataWith
(
cpu_tensor
);
if
(
m_
.
Data
().
size
()
!=
size
)
{
m_
.
MutableData
()
->
resize
(
size
);
}
}
// get cuda ptr. immutable
const
T
*
CUDAData
(
platform
::
Place
place
)
const
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
"CUDA Data must on CUDA place"
);
ImmutableCUDA
(
place
);
return
cuda_vec_
.
data
<
T
>
();
{
auto
&
mtx
=
m_
.
Data
().
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
.
Data
().
CUDAPlace
();
if
(
cuda_place
==
nullptr
||
*
cuda_place
==
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
return
m_
.
Data
().
CUDAData
(
place
);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_
.
Detach
();
return
CUDAData
(
place
);
}
// get cuda ptr. mutable
T
*
CUDAMutableData
(
platform
::
Place
place
)
{
const
T
*
ptr
=
CUDAData
(
place
);
flag_
=
kDirty
|
kDataInCUDA
;
return
const_cast
<
T
*>
(
ptr
);
{
auto
&
mtx
=
m_
.
Data
().
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
.
Data
().
CUDAPlace
();
if
(
cuda_place
==
nullptr
||
*
cuda_place
==
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
return
m_
.
MutableData
()
->
CUDAMutableData
(
place
);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_
.
Detach
();
return
CUDAMutableData
(
place
);
}
// clear
void
clear
()
{
size_
=
0
;
flag_
=
kDirty
|
kDataInCPU
;
}
void
clear
()
{
m_
.
MutableData
()
->
clear
();
}
size_t
capacity
()
const
{
return
cpu_vec_
.
memory_size
()
/
SizeOfType
(
typeid
(
T
));
}
size_t
capacity
()
const
{
return
m_
.
Data
().
capacity
();
}
// reserve data
void
reserve
(
size_t
size
)
{
size_t
pre_size
=
size_
;
resize
(
size
);
resize
(
pre_size
);
}
void
reserve
(
size_t
size
)
{
m_
.
Data
().
reserve
(
size
);
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
...
...
@@ -248,12 +481,7 @@ class Vector {
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator
std
::
vector
<
T
>
()
const
{
std
::
vector
<
T
>
result
;
result
.
resize
(
size
());
std
::
copy
(
begin
(),
end
(),
result
.
begin
());
return
result
;
}
operator
std
::
vector
<
T
>
()
const
{
return
m_
.
Data
();
}
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
...
...
@@ -267,118 +495,11 @@ class Vector {
return
true
;
}
private:
void
InitEmpty
()
{
size_
=
0
;
flag_
=
kDataInCPU
;
}
template
<
typename
Iter
>
void
InitByIter
(
size_t
size
,
Iter
begin
,
Iter
end
)
{
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
this
->
cpu_vec_
.
template
mutable_data
<
T
>(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
*
ptr
++
=
*
begin
++
;
}
flag_
=
kDataInCPU
|
kDirty
;
size_
=
size
;
}
enum
DataFlag
{
kDataInCPU
=
0x01
,
kDataInCUDA
=
0x02
,
// kDirty means the data has been changed in one device.
kDirty
=
0x10
};
void
CopyToCPU
()
const
{
// COPY GPU Data To CPU
TensorCopy
(
cuda_vec_
,
platform
::
CPUPlace
(),
&
cpu_vec_
);
WaitPlace
(
cuda_vec_
.
place
());
}
void
MutableCPU
()
{
if
(
IsInCUDA
()
&&
IsDirty
())
{
CopyToCPU
();
}
flag_
=
kDirty
|
kDataInCPU
;
}
void
ImmutableCUDA
(
platform
::
Place
place
)
const
{
if
(
IsDirty
())
{
if
(
IsInCPU
())
{
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
cuda_vec_
);
WaitPlace
(
place
);
UnsetFlag
(
kDirty
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
IsInCUDA
()
&&
!
(
place
==
cuda_vec_
.
place
()))
{
framework
::
Tensor
tmp
;
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
WaitPlace
(
cuda_vec_
.
place
());
cuda_vec_
.
ShareDataWith
(
tmp
);
// Still dirty
}
else
{
// Dirty && DataInCUDA && Device is same
// Do nothing
}
}
else
{
if
(
!
IsInCUDA
())
{
// Even data is not dirty. However, data is not in CUDA. Copy data.
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
cuda_vec_
);
WaitPlace
(
place
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
!
(
place
==
cuda_vec_
.
place
()))
{
framework
::
Tensor
tmp
;
WaitPlace
(
cuda_vec_
.
place
());
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
WaitPlace
(
cuda_vec_
.
place
());
WaitPlace
(
place
);
cuda_vec_
.
ShareDataWith
(
tmp
);
}
else
{
// Not Dirty && DataInCUDA && Device is same
// Do nothing.
}
}
}
void
ImmutableCPU
()
const
{
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or CPU has no data.
CopyToCPU
();
UnsetFlag
(
kDirty
);
}
SetFlag
(
kDataInCPU
);
}
void
UnsetFlag
(
int
flag
)
const
{
flag_
&=
~
flag
;
}
void
SetFlag
(
int
flag
)
const
{
flag_
|=
flag
;
}
const
void
*
Handle
()
const
{
return
&
m_
.
Data
();
}
bool
IsDirty
()
const
{
return
flag_
&
kDirty
;
}
bool
IsInCUDA
()
const
{
return
flag_
&
kDataInCUDA
;
}
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
static
void
WaitPlace
(
const
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
platform
::
DeviceContextPool
::
Instance
()
.
Get
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
->
Wait
();
}
}
static
T
&
EmptyDummy
()
{
static
T
dummy
=
T
();
return
dummy
;
}
mutable
int
flag_
;
mutable
Tensor
cpu_vec_
;
mutable
Tensor
cuda_vec_
;
size_t
size_
;
private:
// Vector is an COW object.
mutable
details
::
COWPtr
<
VectorData
>
m_
;
};
#else // PADDLE_WITH_CUDA
...
...
paddle/fluid/framework/op_info.h
浏览文件 @
85ddb5c7
...
...
@@ -38,31 +38,27 @@ struct OpInfo {
OpAttrChecker
*
checker_
{
nullptr
};
InferVarTypeFN
infer_var_type_
;
InferShapeFN
infer_shape_
;
std
::
string
op_type_
;
bool
HasOpProtoAndChecker
()
const
{
return
proto_
!=
nullptr
&&
checker_
!=
nullptr
;
}
const
proto
::
OpProto
&
Proto
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
proto_
,
"Operator %s Proto has not been registered"
,
op_type_
);
PADDLE_ENFORCE_NOT_NULL
(
proto_
,
"Operator Proto has not been registered"
);
PADDLE_ENFORCE
(
proto_
->
IsInitialized
(),
"Operator %s Proto must be initialized in op info"
,
op_type_
);
"Operator Proto must be initialized in op info"
);
return
*
proto_
;
}
const
OpCreator
&
Creator
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
creator_
,
"Operator %s Creator has not been registered"
,
op_type_
);
PADDLE_ENFORCE_NOT_NULL
(
creator_
,
"Operator Creator has not been registered"
);
return
creator_
;
}
const
GradOpMakerFN
&
GradOpMaker
()
const
{
PADDLE_ENFORCE_NOT_NULL
(
grad_op_maker_
,
"Operator %s GradOpMaker has not been registered."
,
op_type_
);
"Operator GradOpMaker has not been registered."
);
return
grad_op_maker_
;
}
...
...
@@ -77,9 +73,8 @@ class OpInfoMap {
return
map_
.
find
(
op_type
)
!=
map_
.
end
();
}
void
Insert
(
const
std
::
string
&
type
,
OpInfo
info
)
{
void
Insert
(
const
std
::
string
&
type
,
const
OpInfo
&
info
)
{
PADDLE_ENFORCE
(
!
Has
(
type
),
"Operator %s has been registered"
,
type
);
info
.
op_type_
=
type
;
map_
.
insert
({
type
,
info
});
}
...
...
paddle/fluid/framework/selected_rows_test.cc
浏览文件 @
85ddb5c7
...
...
@@ -27,8 +27,11 @@ class SelectedRowsTester : public ::testing::Test {
selected_rows_
.
reset
(
new
SelectedRows
(
rows
,
height
));
Tensor
*
value
=
selected_rows_
->
mutable_value
();
value
->
mutable_data
<
float
>
(
auto
*
data
=
value
->
mutable_data
<
float
>
(
make_ddim
({
static_cast
<
int64_t
>
(
rows
.
size
()),
row_numel
}),
place_
);
for
(
int64_t
i
=
0
;
i
<
value
->
numel
();
++
i
)
{
data
[
i
]
=
static_cast
<
float
>
(
i
);
}
}
protected:
...
...
@@ -60,6 +63,10 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
ASSERT_EQ
(
selected_rows_
->
height
(),
dst_tensor
.
height
());
ASSERT_EQ
(
selected_rows_
->
value
().
dims
(),
dst_tensor
.
value
().
dims
());
ASSERT_EQ
(
selected_rows_
->
GetCompleteDims
(),
dst_tensor
.
GetCompleteDims
());
auto
*
dst_data
=
dst_tensor
.
value
().
data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
dst_tensor
.
value
().
numel
();
++
i
)
{
ASSERT_EQ
(
dst_data
[
i
],
static_cast
<
float
>
(
i
));
}
}
TEST
(
SelectedRows
,
SparseTable
)
{
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
85ddb5c7
...
...
@@ -212,10 +212,11 @@ struct AnalysisConfig : public NativeConfig {
kExclude
// Specify the disabled passes in `ir_passes`.
};
// Determine whether to perform graph optimization.
bool
enable_ir_optim
=
true
;
// Manually determine the IR passes to run.
IrPassMode
ir_mode
{
IrPassMode
::
kExclude
};
// attention lstm fuse works only on some specific models, disable as default.
std
::
vector
<
std
::
string
>
ir_passes
{
"attention_lstm_fuse_pass"
};
std
::
vector
<
std
::
string
>
ir_passes
;
// NOTE this is just for internal development, please not use it.
bool
_use_mkldnn
{
false
};
...
...
paddle/fluid/operators/detection/CMakeLists.txt
浏览文件 @
85ddb5c7
...
...
@@ -30,7 +30,13 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
polygon_box_transform_op.cu
)
detection_library
(
rpn_target_assign_op SRCS rpn_target_assign_op.cc
)
detection_library
(
generate_proposal_labels_op SRCS generate_proposal_labels_op.cc
)
detection_library
(
generate_proposals_op SRCS generate_proposals_op.cc
)
if
(
WITH_GPU
)
detection_library
(
generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub
)
else
()
detection_library
(
generate_proposals_op SRCS generate_proposals_op.cc
)
endif
()
detection_library
(
roi_perspective_transform_op SRCS roi_perspective_transform_op.cc roi_perspective_transform_op.cu
)
#Export local libraries to parent
set
(
DETECTION_LIBRARY
${
LOCAL_DETECTION_LIBS
}
PARENT_SCOPE
)
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
85ddb5c7
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/math/math_function.h"
...
...
@@ -69,7 +70,7 @@ class GenerateProposalsOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Anchors"
)
->
type
()),
platform
::
CPUPlace
());
ctx
.
device_context
());
}
};
...
...
@@ -162,7 +163,7 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes,
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
T
*
boxes_data
=
boxes
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
im_scale
=
im_info_data
[
2
];
keep
->
Resize
({
boxes
->
dims
()[
0
]
,
1
});
keep
->
Resize
({
boxes
->
dims
()[
0
]});
min_size
=
std
::
max
(
min_size
,
1.0
f
);
int
*
keep_data
=
keep
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
...
...
@@ -463,7 +464,7 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
int
>
(
"post_nms_topN"
,
"post_nms_topN"
);
AddAttr
<
float
>
(
"nms_thresh"
,
"nms_thres"
);
AddAttr
<
float
>
(
"min_size"
,
"min size"
);
AddAttr
<
float
>
(
"eta"
,
"
eta
"
);
AddAttr
<
float
>
(
"eta"
,
"
The parameter for adaptive NMS.
"
);
AddComment
(
R"DOC(
Generate Proposals OP
...
...
paddle/fluid/operators/detection/generate_proposals_op.cu
0 → 100644
浏览文件 @
85ddb5c7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stdio.h>
#include <string>
#include <vector>
#include "cub/cub.cuh"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/gather.cu.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
namespace
{
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
int
const
kThreadsPerBlock
=
sizeof
(
uint64_t
)
*
8
;
template
<
typename
T
>
__global__
void
RangeInitKernel
(
const
T
start
,
const
T
delta
,
const
int
size
,
T
*
out
)
{
CUDA_1D_KERNEL_LOOP
(
i
,
size
)
{
out
[
i
]
=
start
+
i
*
delta
;
}
}
template
<
typename
T
>
void
SortDescending
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
value
,
Tensor
*
value_out
,
Tensor
*
index_out
)
{
int
num
=
value
.
numel
();
Tensor
index_in_t
;
int
*
idx_in
=
index_in_t
.
mutable_data
<
int
>
({
num
},
ctx
.
GetPlace
());
int
block
=
512
;
auto
stream
=
ctx
.
stream
();
RangeInitKernel
<<<
DIVUP
(
num
,
block
),
block
,
0
,
stream
>>>
(
0
,
1
,
num
,
idx_in
);
int
*
idx_out
=
index_out
->
mutable_data
<
int
>
({
num
},
ctx
.
GetPlace
());
const
T
*
keys_in
=
value
.
data
<
T
>
();
T
*
keys_out
=
value_out
->
mutable_data
<
T
>
({
num
},
ctx
.
GetPlace
());
// Determine temporary device storage requirements
void
*
d_temp_storage
=
NULL
;
size_t
temp_storage_bytes
=
0
;
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
d_temp_storage
,
temp_storage_bytes
,
keys_in
,
keys_out
,
idx_in
,
idx_out
,
num
);
// Allocate temporary storage
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
d_temp_storage
=
memory
::
Alloc
(
place
,
temp_storage_bytes
);
// Run sorting operation
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
d_temp_storage
,
temp_storage_bytes
,
keys_in
,
keys_out
,
idx_in
,
idx_out
,
num
);
memory
::
Free
(
place
,
d_temp_storage
);
}
template
<
typename
T
>
__device__
__forceinline__
T
Min
(
T
x
,
T
y
)
{
return
x
<
y
?
x
:
y
;
}
template
<
typename
T
>
__device__
__forceinline__
T
Max
(
T
x
,
T
y
)
{
return
x
>
y
?
x
:
y
;
}
template
<
typename
T
>
__global__
void
BoxDecodeAndClipKernel
(
const
T
*
anchor
,
const
T
*
deltas
,
const
T
*
var
,
const
int
*
index
,
const
T
*
im_info
,
const
int
num
,
T
*
proposals
)
{
T
kBBoxClipDefault
=
log
(
1000.0
/
16.0
);
CUDA_1D_KERNEL_LOOP
(
i
,
num
)
{
int
k
=
index
[
i
]
*
4
;
T
axmin
=
anchor
[
k
];
T
aymin
=
anchor
[
k
+
1
];
T
axmax
=
anchor
[
k
+
2
];
T
aymax
=
anchor
[
k
+
3
];
T
w
=
axmax
-
axmin
+
1.0
;
T
h
=
aymax
-
aymin
+
1.0
;
T
cx
=
axmin
+
0.5
*
w
;
T
cy
=
aymin
+
0.5
*
h
;
T
dxmin
=
deltas
[
k
];
T
dymin
=
deltas
[
k
+
1
];
T
dxmax
=
deltas
[
k
+
2
];
T
dymax
=
deltas
[
k
+
3
];
T
d_cx
=
0.
,
d_cy
=
0.
,
d_w
=
0.
,
d_h
=
0.
;
if
(
var
)
{
d_cx
=
cx
+
dxmin
*
w
*
var
[
k
];
d_cy
=
cy
+
dymin
*
h
*
var
[
k
+
1
];
d_w
=
exp
(
Min
<
T
>
(
dxmax
*
var
[
k
+
2
],
kBBoxClipDefault
))
*
w
;
d_h
=
exp
(
Min
<
T
>
(
dymax
*
var
[
k
+
3
],
kBBoxClipDefault
))
*
h
;
}
else
{
d_cx
=
cx
+
dxmin
*
w
;
d_cy
=
cy
+
dymin
*
h
;
d_w
=
exp
(
Min
<
T
>
(
dxmax
,
kBBoxClipDefault
))
*
w
;
d_h
=
exp
(
Min
<
T
>
(
dymax
,
kBBoxClipDefault
))
*
h
;
}
T
oxmin
=
d_cx
-
d_w
*
0.5
;
T
oymin
=
d_cy
-
d_h
*
0.5
;
T
oxmax
=
d_cx
+
d_w
*
0.5
-
1.
;
T
oymax
=
d_cy
+
d_h
*
0.5
-
1.
;
proposals
[
i
*
4
]
=
Max
<
T
>
(
Min
<
T
>
(
oxmin
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
1
]
=
Max
<
T
>
(
Min
<
T
>
(
oymin
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
+
2
]
=
Max
<
T
>
(
Min
<
T
>
(
oxmax
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
3
]
=
Max
<
T
>
(
Min
<
T
>
(
oymax
,
im_info
[
0
]
-
1.
),
0.
);
}
}
template
<
typename
T
,
int
BlockSize
>
__global__
void
FilterBBoxes
(
const
T
*
bboxes
,
const
T
*
im_info
,
const
T
min_size
,
const
int
num
,
int
*
keep_num
,
int
*
keep
)
{
T
im_h
=
im_info
[
0
];
T
im_w
=
im_info
[
1
];
T
im_scale
=
im_info
[
2
];
int
cnt
=
0
;
__shared__
int
keep_index
[
BlockSize
];
CUDA_1D_KERNEL_LOOP
(
i
,
num
)
{
keep_index
[
threadIdx
.
x
]
=
-
1
;
__syncthreads
();
int
k
=
i
*
4
;
T
xmin
=
bboxes
[
k
];
T
ymin
=
bboxes
[
k
+
1
];
T
xmax
=
bboxes
[
k
+
2
];
T
ymax
=
bboxes
[
k
+
3
];
T
w
=
xmax
-
xmin
+
1.0
;
T
h
=
ymax
-
ymin
+
1.0
;
T
cx
=
xmin
+
w
/
2.
;
T
cy
=
ymin
+
h
/
2.
;
T
w_s
=
(
xmax
-
xmin
)
/
im_scale
+
1.
;
T
h_s
=
(
ymax
-
ymin
)
/
im_scale
+
1.
;
if
(
w_s
>=
min_size
&&
h_s
>=
min_size
&&
cx
<=
im_w
&&
cy
<=
im_h
)
{
keep_index
[
threadIdx
.
x
]
=
i
;
}
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
{
int
size
=
(
num
-
i
)
<
BlockSize
?
num
-
i
:
BlockSize
;
for
(
int
j
=
0
;
j
<
size
;
++
j
)
{
if
(
keep_index
[
j
]
>
-
1
)
{
keep
[
cnt
++
]
=
keep_index
[
j
];
}
}
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
keep_num
[
0
]
=
cnt
;
}
}
__device__
inline
float
IoU
(
const
float
*
a
,
const
float
*
b
)
{
float
left
=
max
(
a
[
0
],
b
[
0
]),
right
=
min
(
a
[
2
],
b
[
2
]);
float
top
=
max
(
a
[
1
],
b
[
1
]),
bottom
=
min
(
a
[
3
],
b
[
3
]);
float
width
=
max
(
right
-
left
+
1
,
0.
f
),
height
=
max
(
bottom
-
top
+
1
,
0.
f
);
float
inter_s
=
width
*
height
;
float
s_a
=
(
a
[
2
]
-
a
[
0
]
+
1
)
*
(
a
[
3
]
-
a
[
1
]
+
1
);
float
s_b
=
(
b
[
2
]
-
b
[
0
]
+
1
)
*
(
b
[
3
]
-
b
[
1
]
+
1
);
return
inter_s
/
(
s_a
+
s_b
-
inter_s
);
}
__global__
void
NMSKernel
(
const
int
n_boxes
,
const
float
nms_overlap_thresh
,
const
float
*
dev_boxes
,
uint64_t
*
dev_mask
)
{
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
const
int
row_size
=
min
(
n_boxes
-
row_start
*
kThreadsPerBlock
,
kThreadsPerBlock
);
const
int
col_size
=
min
(
n_boxes
-
col_start
*
kThreadsPerBlock
,
kThreadsPerBlock
);
__shared__
float
block_boxes
[
kThreadsPerBlock
*
4
];
if
(
threadIdx
.
x
<
col_size
)
{
block_boxes
[
threadIdx
.
x
*
4
+
0
]
=
dev_boxes
[(
kThreadsPerBlock
*
col_start
+
threadIdx
.
x
)
*
4
+
0
];
block_boxes
[
threadIdx
.
x
*
4
+
1
]
=
dev_boxes
[(
kThreadsPerBlock
*
col_start
+
threadIdx
.
x
)
*
4
+
1
];
block_boxes
[
threadIdx
.
x
*
4
+
2
]
=
dev_boxes
[(
kThreadsPerBlock
*
col_start
+
threadIdx
.
x
)
*
4
+
2
];
block_boxes
[
threadIdx
.
x
*
4
+
3
]
=
dev_boxes
[(
kThreadsPerBlock
*
col_start
+
threadIdx
.
x
)
*
4
+
3
];
}
__syncthreads
();
if
(
threadIdx
.
x
<
row_size
)
{
const
int
cur_box_idx
=
kThreadsPerBlock
*
row_start
+
threadIdx
.
x
;
const
float
*
cur_box
=
dev_boxes
+
cur_box_idx
*
4
;
int
i
=
0
;
uint64_t
t
=
0
;
int
start
=
0
;
if
(
row_start
==
col_start
)
{
start
=
threadIdx
.
x
+
1
;
}
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
if
(
IoU
(
cur_box
,
block_boxes
+
i
*
4
)
>
nms_overlap_thresh
)
{
t
|=
1ULL
<<
i
;
}
}
const
int
col_blocks
=
DIVUP
(
n_boxes
,
kThreadsPerBlock
);
dev_mask
[
cur_box_idx
*
col_blocks
+
col_start
]
=
t
;
}
}
template
<
typename
T
>
void
NMS
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
proposals
,
const
Tensor
&
sorted_indices
,
const
T
nms_threshold
,
Tensor
*
keep_out
)
{
int
boxes_num
=
proposals
.
dims
()[
0
];
PADDLE_ENFORCE_EQ
(
boxes_num
,
sorted_indices
.
dims
()[
0
]);
const
int
col_blocks
=
DIVUP
(
boxes_num
,
kThreadsPerBlock
);
dim3
blocks
(
DIVUP
(
boxes_num
,
kThreadsPerBlock
),
DIVUP
(
boxes_num
,
kThreadsPerBlock
));
dim3
threads
(
kThreadsPerBlock
);
const
T
*
boxes
=
proposals
.
data
<
T
>
();
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
int
size_bytes
=
boxes_num
*
col_blocks
*
sizeof
(
uint64_t
);
uint64_t
*
d_mask
=
reinterpret_cast
<
uint64_t
*>
(
memory
::
Alloc
(
place
,
size_bytes
));
NMSKernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_threshold
,
boxes
,
d_mask
);
uint64_t
*
h_mask
=
reinterpret_cast
<
uint64_t
*>
(
memory
::
Alloc
(
platform
::
CPUPlace
(),
size_bytes
));
memory
::
Copy
(
platform
::
CPUPlace
(),
h_mask
,
place
,
d_mask
,
size_bytes
,
0
);
std
::
vector
<
uint64_t
>
remv
(
col_blocks
);
memset
(
&
remv
[
0
],
0
,
sizeof
(
uint64_t
)
*
col_blocks
);
std
::
vector
<
int
>
keep_vec
;
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
int
nblock
=
i
/
kThreadsPerBlock
;
int
inblock
=
i
%
kThreadsPerBlock
;
if
(
!
(
remv
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
++
num_to_keep
;
keep_vec
.
push_back
(
i
);
uint64_t
*
p
=
&
h_mask
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
)
{
remv
[
j
]
|=
p
[
j
];
}
}
}
int
*
keep
=
keep_out
->
mutable_data
<
int
>
({
num_to_keep
},
ctx
.
GetPlace
());
memory
::
Copy
(
place
,
keep
,
platform
::
CPUPlace
(),
keep_vec
.
data
(),
sizeof
(
int
)
*
num_to_keep
,
0
);
memory
::
Free
(
place
,
d_mask
);
memory
::
Free
(
platform
::
CPUPlace
(),
h_mask
);
}
template
<
typename
T
>
std
::
pair
<
Tensor
,
Tensor
>
ProposalForOneImage
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
im_info
,
const
Tensor
&
anchors
,
const
Tensor
&
variances
,
const
Tensor
&
bbox_deltas
,
// [M, 4]
const
Tensor
&
scores
,
// [N, 1]
int
pre_nms_top_n
,
int
post_nms_top_n
,
float
nms_thresh
,
float
min_size
,
float
eta
)
{
// 1. pre nms
Tensor
scores_sort
,
index_sort
;
SortDescending
<
T
>
(
ctx
,
scores
,
&
scores_sort
,
&
index_sort
);
int
num
=
scores
.
numel
();
int
pre_nms_num
=
(
pre_nms_top_n
<=
0
||
pre_nms_top_n
>
num
)
?
scores
.
numel
()
:
pre_nms_top_n
;
scores_sort
.
Resize
({
pre_nms_num
,
1
});
index_sort
.
Resize
({
pre_nms_num
,
1
});
// 2. box decode and clipping
Tensor
proposals
;
proposals
.
mutable_data
<
T
>
({
pre_nms_num
,
4
},
ctx
.
GetPlace
());
int
block
=
512
;
auto
stream
=
ctx
.
stream
();
BoxDecodeAndClipKernel
<
T
><<<
DIVUP
(
pre_nms_num
,
block
),
block
,
0
,
stream
>>>
(
anchors
.
data
<
T
>
(),
bbox_deltas
.
data
<
T
>
(),
variances
.
data
<
T
>
(),
index_sort
.
data
<
int
>
(),
im_info
.
data
<
T
>
(),
pre_nms_num
,
proposals
.
data
<
T
>
());
// 3. filter
Tensor
keep_index
,
keep_num_t
;
keep_index
.
mutable_data
<
int
>
({
pre_nms_num
},
ctx
.
GetPlace
());
keep_num_t
.
mutable_data
<
int
>
({
1
},
ctx
.
GetPlace
());
min_size
=
std
::
max
(
min_size
,
1.0
f
);
FilterBBoxes
<
T
,
512
><<<
1
,
512
,
0
,
stream
>>>
(
proposals
.
data
<
T
>
(),
im_info
.
data
<
T
>
(),
min_size
,
pre_nms_num
,
keep_num_t
.
data
<
int
>
(),
keep_index
.
data
<
int
>
());
int
keep_num
;
const
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
memory
::
Copy
(
platform
::
CPUPlace
(),
&
keep_num
,
gpu_place
,
keep_num_t
.
data
<
int
>
(),
sizeof
(
int
),
0
);
keep_index
.
Resize
({
keep_num
});
Tensor
scores_filter
,
proposals_filter
;
proposals_filter
.
mutable_data
<
T
>
({
keep_num
,
4
},
ctx
.
GetPlace
());
scores_filter
.
mutable_data
<
T
>
({
keep_num
,
1
},
ctx
.
GetPlace
());
GPUGather
<
T
>
(
ctx
,
proposals
,
keep_index
,
&
proposals_filter
);
GPUGather
<
T
>
(
ctx
,
scores_sort
,
keep_index
,
&
scores_filter
);
if
(
nms_thresh
<=
0
)
{
return
std
::
make_pair
(
proposals_filter
,
scores_filter
);
}
// 4. nms
Tensor
keep_nms
;
NMS
<
T
>
(
ctx
,
proposals_filter
,
keep_index
,
nms_thresh
,
&
keep_nms
);
if
(
post_nms_top_n
>
0
&&
post_nms_top_n
<
keep_nms
.
numel
())
{
keep_nms
.
Resize
({
post_nms_top_n
});
}
Tensor
scores_nms
,
proposals_nms
;
proposals_nms
.
mutable_data
<
T
>
({
keep_nms
.
numel
(),
4
},
ctx
.
GetPlace
());
scores_nms
.
mutable_data
<
T
>
({
keep_nms
.
numel
(),
1
},
ctx
.
GetPlace
());
GPUGather
<
T
>
(
ctx
,
proposals_filter
,
keep_nms
,
&
proposals_nms
);
GPUGather
<
T
>
(
ctx
,
scores_filter
,
keep_nms
,
&
scores_nms
);
return
std
::
make_pair
(
proposals_nms
,
scores_nms
);
}
}
// namespace
template
<
typename
DeviceContext
,
typename
T
>
class
CUDAGenerateProposalsKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
scores
=
context
.
Input
<
Tensor
>
(
"Scores"
);
auto
*
bbox_deltas
=
context
.
Input
<
Tensor
>
(
"BboxDeltas"
);
auto
*
im_info
=
context
.
Input
<
Tensor
>
(
"ImInfo"
);
auto
*
anchors
=
context
.
Input
<
Tensor
>
(
"Anchors"
);
auto
*
variances
=
context
.
Input
<
Tensor
>
(
"Variances"
);
auto
*
rpn_rois
=
context
.
Output
<
LoDTensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoDTensor
>
(
"RpnRoiProbs"
);
int
pre_nms_top_n
=
context
.
Attr
<
int
>
(
"pre_nms_topN"
);
int
post_nms_top_n
=
context
.
Attr
<
int
>
(
"post_nms_topN"
);
float
nms_thresh
=
context
.
Attr
<
float
>
(
"nms_thresh"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
PADDLE_ENFORCE_GE
(
eta
,
1.
,
"Not support adaptive NMS."
);
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
scores_dim
=
scores
->
dims
();
int64_t
num
=
scores_dim
[
0
];
int64_t
c_score
=
scores_dim
[
1
];
int64_t
h_score
=
scores_dim
[
2
];
int64_t
w_score
=
scores_dim
[
3
];
auto
bbox_dim
=
bbox_deltas
->
dims
();
int64_t
c_bbox
=
bbox_dim
[
1
];
int64_t
h_bbox
=
bbox_dim
[
2
];
int64_t
w_bbox
=
bbox_dim
[
3
];
Tensor
bbox_deltas_swap
,
scores_swap
;
bbox_deltas_swap
.
mutable_data
<
T
>
({
num
,
h_bbox
,
w_bbox
,
c_bbox
},
dev_ctx
.
GetPlace
());
scores_swap
.
mutable_data
<
T
>
({
num
,
h_score
,
w_score
,
c_score
},
dev_ctx
.
GetPlace
());
math
::
Transpose
<
DeviceContext
,
T
,
4
>
trans
;
std
::
vector
<
int
>
axis
=
{
0
,
2
,
3
,
1
};
trans
(
dev_ctx
,
*
bbox_deltas
,
&
bbox_deltas_swap
,
axis
);
trans
(
dev_ctx
,
*
scores
,
&
scores_swap
,
axis
);
Tensor
*
anchor
=
const_cast
<
framework
::
Tensor
*>
(
anchors
);
anchor
->
Resize
({
anchors
->
numel
()
/
4
,
4
});
Tensor
*
var
=
const_cast
<
framework
::
Tensor
*>
(
variances
);
var
->
Resize
({
var
->
numel
()
/
4
,
4
});
rpn_rois
->
mutable_data
<
T
>
({
bbox_deltas
->
numel
()
/
4
,
4
},
context
.
GetPlace
());
rpn_roi_probs
->
mutable_data
<
T
>
({
scores
->
numel
(),
1
},
context
.
GetPlace
());
T
*
rpn_rois_data
=
rpn_rois
->
data
<
T
>
();
T
*
rpn_roi_probs_data
=
rpn_roi_probs
->
data
<
T
>
();
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx
.
GetPlace
());
int64_t
num_proposals
=
0
;
std
::
vector
<
size_t
>
offset
(
1
,
0
);
for
(
int64_t
i
=
0
;
i
<
num
;
++
i
)
{
Tensor
im_info_slice
=
im_info
->
Slice
(
i
,
i
+
1
);
Tensor
bbox_deltas_slice
=
bbox_deltas_swap
.
Slice
(
i
,
i
+
1
);
Tensor
scores_slice
=
scores_swap
.
Slice
(
i
,
i
+
1
);
bbox_deltas_slice
.
Resize
({
h_bbox
*
w_bbox
*
c_bbox
/
4
,
4
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
std
::
pair
<
Tensor
,
Tensor
>
box_score_pair
=
ProposalForOneImage
<
T
>
(
dev_ctx
,
im_info_slice
,
*
anchor
,
*
var
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
Tensor
proposals
=
box_score_pair
.
first
;
Tensor
scores
=
box_score_pair
.
second
;
memory
::
Copy
(
place
,
rpn_rois_data
+
num_proposals
*
4
,
place
,
proposals
.
data
<
T
>
(),
sizeof
(
T
)
*
proposals
.
numel
(),
0
);
memory
::
Copy
(
place
,
rpn_roi_probs_data
+
num_proposals
,
place
,
scores
.
data
<
T
>
(),
sizeof
(
T
)
*
scores
.
numel
(),
0
);
num_proposals
+=
proposals
.
dims
()[
0
];
offset
.
emplace_back
(
num_proposals
);
}
framework
::
LoD
lod
;
lod
.
emplace_back
(
offset
);
rpn_rois
->
set_lod
(
lod
);
rpn_roi_probs
->
set_lod
(
lod
);
rpn_rois
->
Resize
({
num_proposals
,
4
});
rpn_roi_probs
->
Resize
({
num_proposals
,
1
});
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
generate_proposals
,
ops
::
CUDAGenerateProposalsKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/detection_map_op.h
浏览文件 @
85ddb5c7
...
...
@@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
ap_type
=
GetAPType
(
ctx
.
Attr
<
std
::
string
>
(
"ap_type"
));
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
auto
label_lod
=
in_label
->
lod
();
auto
detect_lod
=
in_detect
->
lod
();
auto
&
label_lod
=
in_label
->
lod
();
auto
&
detect_lod
=
in_detect
->
lod
();
PADDLE_ENFORCE_EQ
(
label_lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
label_lod
[
0
].
size
(),
detect_lod
[
0
].
size
(),
...
...
@@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
labels
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_label
);
auto
detect
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_detect
);
auto
label_lod
=
input_label
.
lod
();
auto
detect_lod
=
input_detect
.
lod
();
auto
&
label_lod
=
input_label
.
lod
();
auto
&
detect_lod
=
input_detect
.
lod
();
int
batch_size
=
label_lod
[
0
].
size
()
-
1
;
auto
label_index
=
label_lod
[
0
];
auto
&
label_index
=
label_lod
[
0
];
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
std
::
map
<
int
,
std
::
vector
<
Box
>>
boxes
;
...
...
@@ -274,7 +274,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
output_true_pos
->
set_lod
(
true_pos_lod
);
output_false_pos
->
set_lod
(
false_pos_lod
);
return
;
}
void
GetInputPos
(
const
framework
::
Tensor
&
input_pos_count
,
...
...
@@ -292,7 +291,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
SetData
=
[](
const
framework
::
LoDTensor
&
pos_tensor
,
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>&
pos
)
{
const
T
*
pos_data
=
pos_tensor
.
data
<
T
>
();
auto
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
auto
&
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
for
(
size_t
i
=
0
;
i
<
pos_data_lod
.
size
()
-
1
;
++
i
)
{
for
(
size_t
j
=
pos_data_lod
[
i
];
j
<
pos_data_lod
[
i
+
1
];
++
j
)
{
T
score
=
pos_data
[
j
*
2
];
...
...
@@ -317,20 +316,23 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>*
false_pos
)
const
{
int
batch_size
=
gt_boxes
.
size
();
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
auto
image_gt_boxes
=
gt_boxes
[
n
];
for
(
auto
it
=
image_gt_boxes
.
begin
();
it
!=
image_gt_boxes
.
end
();
++
it
)
{
auto
&
image_gt_boxes
=
gt_boxes
[
n
];
for
(
auto
&
image_gt_box
:
image_gt_boxes
)
{
size_t
count
=
0
;
auto
labeled_bboxes
=
it
->
second
;
auto
&
labeled_bboxes
=
image_gt_box
.
second
;
if
(
evaluate_difficult
)
{
count
=
labeled_bboxes
.
size
();
}
else
{
for
(
size_t
i
=
0
;
i
<
labeled_bboxes
.
size
();
++
i
)
if
(
!
(
labeled_bboxes
[
i
].
is_difficult
))
++
count
;
for
(
auto
&
box
:
labeled_bboxes
)
{
if
(
!
box
.
is_difficult
)
{
++
count
;
}
}
}
if
(
count
==
0
)
{
continue
;
}
int
label
=
i
t
->
first
;
int
label
=
i
mage_gt_box
.
first
;
if
(
label_pos_count
->
find
(
label
)
==
label_pos_count
->
end
())
{
(
*
label_pos_count
)[
label
]
=
count
;
}
else
{
...
...
paddle/fluid/operators/extract_rows_op.cc
浏览文件 @
85ddb5c7
...
...
@@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase {
auto
&
in
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
SelectedRows
>
();
auto
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
in_rows
=
in
.
rows
();
auto
&
in_rows
=
in
.
rows
();
auto
out_dim
=
framework
::
make_ddim
(
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
in_rows
.
size
()),
1
});
auto
dst_ptr
=
out
->
mutable_data
<
int64_t
>
(
out_dim
,
in
.
place
());
...
...
paddle/fluid/operators/lookup_table_op.cu
浏览文件 @
85ddb5c7
...
...
@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
());
// TODO(yuyang18): Strange code here.
memory
::
Copy
(
platform
::
CPUPlace
(),
new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
gpu_place
,
ids_data
,
ids_num
*
sizeof
(
int64_t
),
stream
);
memory
::
Copy
(
gpu_place
,
new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
gpu_place
,
ids_data
,
ids_num
*
sizeof
(
int64_t
),
stream
);
d_table
->
set_rows
(
new_rows
);
auto
*
d_table_value
=
d_table
->
mutable_value
();
...
...
paddle/fluid/operators/math/selected_rows_functor.cu
浏览文件 @
85ddb5c7
...
...
@@ -60,11 +60,9 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_place
));
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
).
stream
());
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
...
...
@@ -148,7 +146,7 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
auto
in1_height
=
input1
.
height
();
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
());
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
()
);
auto
&
in1_rows
=
input1
.
rows
(
);
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
auto
&
in1_value
=
input1
.
value
();
...
...
paddle/fluid/operators/read_op.cc
浏览文件 @
85ddb5c7
...
...
@@ -45,12 +45,10 @@ class ReadInferVarType : public framework::VarTypeInference {
framework
::
VarDesc
*
reader
=
block
->
FindVarRecursive
(
reader_name
);
auto
dtypes
=
reader
->
GetDataTypes
();
PADDLE_ENFORCE_EQ
(
dtypes
.
size
(),
out_names
.
size
());
auto
lod_levels
=
reader
->
GetLoDLevels
();
for
(
size_t
i
=
0
;
i
<
dtypes
.
size
();
++
i
)
{
framework
::
VarDesc
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_names
[
i
]);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
out
.
SetDataType
(
dtypes
[
i
]);
out
.
SetLoDLevel
(
lod_levels
[
i
]);
}
}
};
...
...
paddle/fluid/operators/sampling_id_op.cc
浏览文件 @
85ddb5c7
...
...
@@ -53,15 +53,16 @@ class SamplingIdOpMaker : public framework::OpProtoAndCheckerMaker {
SamplingId Operator.
A layer for sampling id from multinomial distribution from the
input. Sampling one id for one sample.)DOC"
);
AddAttr
<
float
>
(
"min"
,
"Minimum value of random.
[default 0.0]
."
)
AddAttr
<
float
>
(
"min"
,
"Minimum value of random.
(float, default 0.0)
."
)
.
SetDefault
(
0.0
f
);
AddAttr
<
float
>
(
"max"
,
"Maximun value of random.
[default 1.0]
."
)
AddAttr
<
float
>
(
"max"
,
"Maximun value of random.
(float, default 1.0)
."
)
.
SetDefault
(
1.0
f
);
AddAttr
<
int
>
(
"seed"
,
"Random seed used for the random number engine. "
"0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time. [default 0]."
)
AddAttr
<
int
>
(
"seed"
,
"Random seed used for the random number engine. "
"0 means use a seed generated by the system."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time. (int, default 0)."
)
.
SetDefault
(
0
);
}
};
...
...
paddle/fluid/operators/scale_op.cc
浏览文件 @
85ddb5c7
...
...
@@ -77,8 +77,10 @@ class ScaleOpVarTypeInference : public framework::VarTypeInference {
auto
out_var_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
*
out_var
=
block
->
FindVarRecursive
(
out_var_name
);
out_var
->
SetType
(
in_var
.
GetType
());
out_var
->
SetDataType
(
in_var
.
GetDataType
());
if
(
in_var_name
!=
out_var_name
)
{
out_var
->
SetType
(
in_var
.
GetType
());
out_var
->
SetDataType
(
in_var
.
GetDataType
());
}
}
};
...
...
paddle/fluid/operators/sgd_op.cu
浏览文件 @
85ddb5c7
...
...
@@ -88,7 +88,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
auto
&
in_value
=
grad
->
value
();
framework
::
Vector
<
int64_t
>
in_rows
(
grad
->
rows
()
);
auto
&
in_rows
=
grad
->
rows
(
);
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
);
...
...
paddle/fluid/operators/shrink_rnn_memory_op.cc
浏览文件 @
85ddb5c7
...
...
@@ -52,26 +52,16 @@ class ShrinkRNNMemoryOp : public ArrayOp {
size_t
height
=
dst_num_rows
;
// do shrink for the top level LoD
if
(
x_tensor
.
lod
().
size
()
>
0
&&
x_tensor
.
lod
()[
0
].
size
()
>
static_cast
<
size_t
>
(
dst_num_rows
))
{
if
(
x_tensor
.
lod
().
size
()
>
1
)
{
// MultiLevel LoD
auto
lod_offset
=
framework
::
GetSubLoDAndAbsoluteOffset
(
x_tensor
.
lod
(),
0
,
dst_num_rows
,
0
);
height
=
lod_offset
.
second
.
second
;
auto
out_lod
=
out_tensor
.
mutable_lod
();
framework
::
AppendLoD
(
out_lod
,
lod_offset
.
first
);
}
else
{
// Shrink LoD
auto
lod_item
=
x_tensor
.
lod
()[
0
];
lod_item
.
resize
(
dst_num_rows
+
1
);
out_tensor
.
set_lod
({
lod_item
});
const
auto
&
const_lod_item
=
lod_item
;
height
=
const_lod_item
.
back
();
}
auto
lod_offset
=
framework
::
GetSubLoDAndAbsoluteOffset
(
x_tensor
.
lod
(),
0
,
dst_num_rows
,
0
);
height
=
lod_offset
.
second
.
second
;
auto
out_lod
=
out_tensor
.
mutable_lod
();
framework
::
AppendLoD
(
out_lod
,
lod_offset
.
first
);
}
if
(
height
!=
0
)
{
if
(
dst_num_rows
!=
0
)
{
out_tensor
.
mutable_data
(
place
,
x_tensor
.
type
());
auto
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
);
framework
::
TensorCopy
(
x_tensor
.
Slice
(
0
,
height
),
place
,
*
dev_ctx
,
...
...
@@ -144,11 +134,8 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
}
else
{
auto
&
dout_tensor
=
dout_var
->
Get
<
framework
::
LoDTensor
>
();
auto
height
=
dout_tensor
.
dims
()[
0
];
if
(
height
!=
0
)
{
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
TensorCopy
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
}
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
TensorCopy
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
if
(
dx_tensor
.
dims
()[
0
]
>
height
)
{
auto
rest_tensor
=
dx_tensor
.
Slice
(
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
dx_tensor
.
dims
()[
0
]));
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
85ddb5c7
...
...
@@ -32,7 +32,7 @@ class SumKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
in_vars
=
context
.
MultiInputVar
(
"X"
);
int
N
=
in_vars
.
size
();
size_t
in_num
=
in_vars
.
size
();
auto
out_var
=
context
.
OutputVar
(
"Out"
);
bool
in_place
=
out_var
==
in_vars
[
0
];
...
...
@@ -53,7 +53,7 @@ class SumKernel : public framework::OpKernel<T> {
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
// If in_place, just skip the first tensor
for
(
int
i
=
in_place
?
1
:
0
;
i
<
N
;
i
++
)
{
for
(
size_t
i
=
in_place
?
1
:
0
;
i
<
in_num
;
i
++
)
{
if
(
in_vars
[
i
]
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
&
in_t
=
in_vars
[
i
]
->
Get
<
framework
::
LoDTensor
>
();
if
(
in_t
.
numel
()
==
0
)
{
...
...
@@ -101,13 +101,13 @@ class SumKernel : public framework::OpKernel<T> {
// Runtime InferShape
size_t
first_dim
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
in_num
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
first_dim
+=
sel_row
.
rows
().
size
();
}
std
::
vector
<
int64_t
>
in_dim
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
in_num
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
sel_row
.
rows
().
size
()
>
0
)
{
in_dim
=
framework
::
vectorize
(
sel_row
.
value
().
dims
());
...
...
@@ -116,14 +116,14 @@ class SumKernel : public framework::OpKernel<T> {
}
if
(
in_dim
.
empty
())
{
VLOG
(
3
)
<<
"WARNING: all the inputs are empty"
;
in_dim
=
framework
::
vectorize
(
get_selected_row
(
N
-
1
).
value
().
dims
());
in_dim
=
framework
::
vectorize
(
get_selected_row
(
in_num
-
1
).
value
().
dims
());
}
else
{
in_dim
[
0
]
=
static_cast
<
int64_t
>
(
first_dim
);
}
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
out_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// if all the input sparse vars are empty, no need to
// merge these vars.
if
(
first_dim
==
0UL
)
{
...
...
@@ -133,7 +133,7 @@ class SumKernel : public framework::OpKernel<T> {
math
::
SelectedRowsAddTo
<
DeviceContext
,
T
>
functor
;
int64_t
offset
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
in_num
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
sel_row
.
rows
().
size
()
==
0
)
{
continue
;
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
85ddb5c7
...
...
@@ -201,7 +201,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
compute_capability
=
GetCUDAComputeCapability
(
place_
.
device
);
multi_process
=
GetCUDAMultiProcessors
(
place_
.
device
);
max_threads_per_mp
=
GetCUDAMaxThreadsPerMultiProcessor
(
place_
.
device
);
grid_max_dims_
=
GpuMaxGridDim
(
place_
.
device
);
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream_
));
eigen_stream_
.
reset
(
new
EigenCudaStreamDevice
());
eigen_stream_
->
Reinitialize
(
&
stream_
,
place
);
...
...
@@ -240,10 +239,6 @@ int CUDADeviceContext::GetMaxPhysicalThreadCount() const {
return
multi_process
*
max_threads_per_mp
;
}
std
::
tuple
<
int
,
int
,
int
>
CUDADeviceContext
::
GetMaxGridDims
()
const
{
return
grid_max_dims_
;
}
Eigen
::
GpuDevice
*
CUDADeviceContext
::
eigen_device
()
const
{
return
eigen_device_
.
get
();
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
85ddb5c7
...
...
@@ -13,7 +13,6 @@ limitations under the License. */
#include <memory>
#include <mutex> // NOLINT
#include <string>
#include <tuple>
#include <unordered_map>
#include <vector>
...
...
@@ -92,8 +91,6 @@ class CUDADeviceContext : public DeviceContext {
/*! \brief Return the max physical thread count in the device context */
int
GetMaxPhysicalThreadCount
()
const
;
std
::
tuple
<
int
,
int
,
int
>
GetMaxGridDims
()
const
;
/*! \brief Return eigen device in the device context. */
Eigen
::
GpuDevice
*
eigen_device
()
const
;
...
...
@@ -138,8 +135,6 @@ class CUDADeviceContext : public DeviceContext {
cudaStream_t
stream_
;
cublasHandle_t
cublas_handle_
;
std
::
tuple
<
int
,
int
,
int
>
grid_max_dims_
;
int
compute_capability
;
int
multi_process
;
int
max_threads_per_mp
;
...
...
paddle/fluid/platform/for_range.h
浏览文件 @
85ddb5c7
...
...
@@ -48,54 +48,35 @@ __global__ static void ForRangeElemwiseOpGridIsOne(Function func) {
}
template
<
typename
Function
>
__global__
static
void
ForRangeElemwiseOp
(
Function
func
,
size_
t
limit
)
{
__global__
static
void
ForRangeElemwiseOp
(
Function
func
,
in
t
limit
)
{
size_t
idx
=
static_cast
<
size_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
idx
<
limit
)
{
func
(
idx
);
}
}
template
<
typename
Function
>
__global__
static
void
ForRangeElemwiseOpGridLarge
(
Function
func
,
size_t
limit
,
int
grid_dim
)
{
size_t
idx
=
static_cast
<
size_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
while
(
idx
<
limit
)
{
func
(
idx
);
idx
+=
grid_dim
;
}
}
template
<
>
struct
ForRange
<
CUDADeviceContext
>
{
ForRange
(
const
CUDADeviceContext
&
dev_ctx
,
size_t
limit
)
:
dev_ctx_
(
dev_ctx
),
limit_
(
limit
)
{}
:
dev_ctx_
(
dev_ctx
),
limit_
(
static_cast
<
int
>
(
limit
)
)
{}
template
<
typename
Function
>
inline
void
operator
()(
Function
func
)
const
{
constexpr
int
num_threads
=
1024
;
int
block_size
=
limit_
<=
num_threads
?
limit_
:
num_threads
;
size_t
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
int
max_grid_dim
=
std
::
get
<
0
>
(
dev_ctx_
.
GetMaxGridDims
());
if
(
grid_size
<
max_grid_dim
)
{
int
grid_size_int
=
static_cast
<
int
>
(
grid_size
);
if
(
grid_size
==
1
)
{
ForRangeElemwiseOpGridIsOne
<<<
1
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
);
}
else
{
ForRangeElemwiseOp
<<<
grid_size_int
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
);
}
int
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
if
(
grid_size
==
1
)
{
ForRangeElemwiseOpGridIsOne
<<<
1
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
);
}
else
{
ForRangeElemwiseOpGridLarge
<<<
max_grid_dim
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
,
max_grid_dim
);
ForRangeElemwiseOp
<<<
grid_size
,
block_size
,
0
,
dev_ctx_
.
stream
()
>>>
(
func
,
limit_
);
}
}
const
CUDADeviceContext
&
dev_ctx_
;
size_
t
limit_
;
in
t
limit_
;
};
#endif
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
85ddb5c7
...
...
@@ -152,22 +152,5 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
PADDLE_ENFORCE
(
cudaMemsetAsync
(
dst
,
value
,
count
,
stream
),
"cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync"
);
}
std
::
tuple
<
int
,
int
,
int
>
GpuMaxGridDim
(
int
id
)
{
std
::
tuple
<
int
,
int
,
int
>
result
;
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
0
>
(
result
),
cudaDevAttrMaxBlockDimX
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
1
>
(
result
),
cudaDevAttrMaxBlockDimY
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
PADDLE_ENFORCE
(
cudaDeviceGetAttribute
(
&
std
::
get
<
2
>
(
result
),
cudaDevAttrMaxBlockDimZ
,
id
),
"cudaDeviceGetAttribute failed in "
"cudaDevAttrMaxBlockDim"
);
return
result
;
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/gpu_info.h
浏览文件 @
85ddb5c7
...
...
@@ -19,7 +19,6 @@ limitations under the License. */
#include <cuda_runtime.h>
#include <stddef.h>
#include <string>
#include <tuple>
namespace
paddle
{
namespace
platform
{
...
...
@@ -73,8 +72,6 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
//! Set memory dst with value count size asynchronously
void
GpuMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
cudaStream_t
stream
);
std
::
tuple
<
int
,
int
,
int
>
GpuMaxGridDim
(
int
id
);
}
// namespace platform
}
// namespace paddle
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
85ddb5c7
...
...
@@ -70,8 +70,8 @@ function cmake_gen() {
PYTHON_FLAGS
=
""
SYSTEM
=
`
uname
-s
`
if
[
"
$SYSTEM
"
==
"Darwin"
]
;
then
echo
"Using python abi:
$1
"
if
[[
"
$1
"
==
"cp27-cp27m"
]]
||
[[
"
$1
"
==
""
]]
;
then
echo
"using python abi:
$1
"
if
[
-d
"/Library/Frameworks/Python.framework/Versions/2.7"
]
;
then
export
LD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/2.7
export
DYLD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/2.7
...
...
@@ -82,7 +82,18 @@ function cmake_gen() {
else
exit
1
fi
# TODO: qiyang add python3 part here
elif
[
"
$1
"
==
"cp35-cp35m"
]
;
then
if
[
-d
"/Library/Frameworks/Python.framework/Versions/3.5"
]
;
then
export
LD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/3.5/lib/
export
DYLD_LIBRARY_PATH
=
/Library/Frameworks/Python.framework/Versions/3.5/lib/
export
PATH
=
/Library/Frameworks/Python.framework/Versions/3.5/bin/:
${
PATH
}
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
else
exit
1
fi
fi
else
if
[
"
$1
"
!=
""
]
;
then
...
...
@@ -629,10 +640,10 @@ EOF
function
gen_capi_package
()
{
if
[[
${
WITH_C_API
}
==
"ON"
]]
;
then
install_prefix
=
"
${
PADDLE_ROOT
}
/build/capi_output"
rm
-rf
$install_prefix
make
DESTDIR
=
"
$install_prefix
"
install
cd
$
install_prefix
/usr/local
capi_install_prefix
=
${
INSTALL_PREFIX
:-
/paddle/build
}
/capi_output
rm
-rf
$
capi_
install_prefix
make
DESTDIR
=
"
$
capi_
install_prefix
"
install
cd
$
capi_install_prefix
/
ls
| egrep
-v
"^Found.*item$"
| xargs
tar
-czf
${
PADDLE_ROOT
}
/build/paddle.tgz
fi
}
...
...
python/paddle/dataset/common.py
浏览文件 @
85ddb5c7
...
...
@@ -77,13 +77,14 @@ def download(url, module_name, md5sum, save_name=None):
retry_limit
=
3
while
not
(
os
.
path
.
exists
(
filename
)
and
md5file
(
filename
)
==
md5sum
):
if
os
.
path
.
exists
(
filename
):
print
(
"file md5"
,
md5file
(
filename
),
md5sum
)
sys
.
stderr
.
write
(
"file %s md5 %s"
%
(
md5file
(
filename
),
md5sum
)
)
if
retry
<
retry_limit
:
retry
+=
1
else
:
raise
RuntimeError
(
"Cannot download {0} within retry limit {1}"
.
format
(
url
,
retry_limit
))
print
(
"Cache file %s not found, downloading %s"
%
(
filename
,
url
))
sys
.
stderr
.
write
(
"Cache file %s not found, downloading %s"
%
(
filename
,
url
))
r
=
requests
.
get
(
url
,
stream
=
True
)
total_length
=
r
.
headers
.
get
(
'content-length'
)
...
...
@@ -100,10 +101,11 @@ def download(url, module_name, md5sum, save_name=None):
dl
+=
len
(
data
)
f
.
write
(
data
)
done
=
int
(
50
*
dl
/
total_length
)
sys
.
std
out
.
write
(
"
\r
[%s%s]"
%
(
'='
*
done
,
sys
.
std
err
.
write
(
"
\r
[%s%s]"
%
(
'='
*
done
,
' '
*
(
50
-
done
)))
sys
.
stdout
.
flush
()
sys
.
stderr
.
write
(
"
\n
"
)
sys
.
stdout
.
flush
()
return
filename
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
85ddb5c7
...
...
@@ -18,5 +18,10 @@ from . import decoder
from
.decoder
import
*
from
.
import
memory_usage_calc
from
.memory_usage_calc
import
*
from
.
import
op_frequence
from
.op_frequence
import
*
__all__
=
decoder
.
__all__
+
memory_usage_calc
.
__all__
__all__
=
[]
__all__
+=
decoder
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
python/paddle/fluid/contrib/op_frequence.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
collections
import
OrderedDict
from
..framework
import
Program
__all__
=
[
'op_freq_statistic'
]
def
op_freq_statistic
(
program
):
"""
Statistics of Op frequency.
Args:
program(Program): The current Program.
Returns:
uni_op_freq(dict): the single op frequency.
adj_2_op_freq(dict): the two adjacent ops frequency.
Examples:
>>> import paddle.fluid as fluid
>>> uni_op_freq, adj_2_op_freq = fluid.contrib.op_freq_statistic(
>>> fluid.default_main_program())
>>> for op_type, op_num in uni_op_freq:
>>> print("%s
\t
%d" % (op_type, op_num))
>>> for op_type, op_num in adj_2_op_freq:
>>> print("%s
\t
%d" % (op_type, op_num))
"""
if
not
isinstance
(
program
,
Program
):
raise
TypeError
(
"The input type should be Porgram."
"But you passed in %s"
%
(
type
(
program
)))
uni_op_freq
=
OrderedDict
()
adj_2_op_freq
=
OrderedDict
()
op_in_ops
=
OrderedDict
()
parameters
=
[
p
.
name
for
p
in
program
.
blocks
[
0
].
all_parameters
()]
# get uni_op_freq
for
op
in
program
.
global_block
().
ops
:
had_recorded
=
False
for
var_name
in
op
.
output_arg_names
:
if
var_name
in
parameters
:
continue
if
not
had_recorded
and
uni_op_freq
.
has_key
(
op
.
type
):
uni_op_freq
[
op
.
type
]
+=
1
had_recorded
=
True
elif
not
had_recorded
:
uni_op_freq
[
op
.
type
]
=
1
had_recorded
=
True
# get adj_2_op_freq
var_gen_op
=
{}
for
op
in
program
.
global_block
().
ops
:
for
var_name
in
op
.
input_arg_names
:
if
var_name
in
parameters
:
continue
if
var_gen_op
.
has_key
(
var_name
):
assert
len
(
var_gen_op
[
var_name
])
>
0
if
op_in_ops
.
has_key
(
op
.
type
):
op_in_ops
[
op
.
type
].
append
(
var_gen_op
[
var_name
][
-
1
])
else
:
op_in_ops
[
op
.
type
]
=
[
var_gen_op
[
var_name
][
-
1
]]
else
:
print
(
"Var's generate op is not found,%s, %s"
%
(
var_name
,
op
.
type
))
for
var_name
in
op
.
output_arg_names
:
if
var_gen_op
.
has_key
(
var_name
):
var_gen_op
[
var_name
].
append
(
op
.
type
)
else
:
var_gen_op
[
var_name
]
=
[
op
.
type
]
for
op
,
in_ops
in
op_in_ops
.
iteritems
():
for
in_op
in
in_ops
:
op_op
=
in_op
+
"->"
+
op
if
adj_2_op_freq
.
has_key
(
op_op
):
adj_2_op_freq
[
op_op
]
+=
1
else
:
adj_2_op_freq
[
op_op
]
=
1
uni_op_freq
=
sorted
(
uni_op_freq
.
items
(),
key
=
lambda
item
:
item
[
1
],
reverse
=
True
)
adj_2_op_freq
=
sorted
(
adj_2_op_freq
.
items
(),
key
=
lambda
item
:
item
[
1
],
reverse
=
True
)
return
uni_op_freq
,
adj_2_op_freq
python/paddle/fluid/layers/detection.py
浏览文件 @
85ddb5c7
...
...
@@ -284,7 +284,7 @@ def detection_output(loc,
target_box
=
loc
,
code_type
=
'decode_center_size'
)
compile_shape
=
scores
.
shape
run_shape
=
ops
.
shape
(
scores
)
run_shape
=
nn
.
shape
(
scores
)
scores
=
nn
.
flatten
(
x
=
scores
,
axis
=
2
)
scores
=
nn
.
softmax
(
input
=
scores
)
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
compile_shape
,
actual_shape
=
run_shape
)
...
...
@@ -697,7 +697,7 @@ def ssd_loss(location,
raise
ValueError
(
"Only support mining_type == max_negative now."
)
num
,
num_prior
,
num_class
=
confidence
.
shape
conf_shape
=
ops
.
shape
(
confidence
)
conf_shape
=
nn
.
shape
(
confidence
)
def
__reshape_to_2d
(
var
):
return
nn
.
flatten
(
x
=
var
,
axis
=
2
)
...
...
@@ -724,7 +724,7 @@ def ssd_loss(location,
target_label
.
stop_gradient
=
True
conf_loss
=
nn
.
softmax_with_cross_entropy
(
confidence
,
target_label
)
# 3. Mining hard examples
actual_shape
=
ops
.
slice
(
conf_shape
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
2
])
actual_shape
=
nn
.
slice
(
conf_shape
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
2
])
actual_shape
.
stop_gradient
=
True
conf_loss
=
nn
.
reshape
(
x
=
conf_loss
,
shape
=
(
num
,
num_prior
),
actual_shape
=
actual_shape
)
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
85ddb5c7
...
...
@@ -312,7 +312,6 @@ def _copy_reader_var_(block, var):
new_var
=
block
.
create_var
(
name
=
var
.
name
,
type
=
core
.
VarDesc
.
VarType
.
READER
)
new_var
.
desc
.
set_shapes
(
var
.
desc
.
shapes
())
new_var
.
desc
.
set_dtypes
(
var
.
desc
.
dtypes
())
new_var
.
desc
.
set_lod_levels
(
var
.
desc
.
lod_levels
())
new_var
.
persistable
=
True
return
new_var
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
85ddb5c7
...
...
@@ -29,110 +29,29 @@ from .. import unique_name
from
functools
import
reduce
__all__
=
[
'fc'
,
'embedding'
,
'dynamic_lstm'
,
'dynamic_lstmp'
,
'dynamic_gru'
,
'gru_unit'
,
'linear_chain_crf'
,
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'square_error_cost'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'conv3d'
,
'sequence_pool'
,
'sequence_softmax'
,
'softmax'
,
'pool2d'
,
'pool3d'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'conv3d_transpose'
,
'sequence_expand'
,
'sequence_expand_as'
,
'sequence_pad'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'hsigmoid'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'autoincreased_step_counter'
,
'reshape'
,
'squeeze'
,
'unsqueeze'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'pad_constant_like'
,
'label_smooth'
,
'roi_pool'
,
'dice_loss'
,
'image_resize'
,
'image_resize_short'
,
'resize_bilinear'
,
'gather'
,
'scatter'
,
'sequence_scatter'
,
'random_crop'
,
'mean_iou'
,
'relu'
,
'log'
,
'crop'
,
'rank_loss'
,
'elu'
,
'relu6'
,
'pow'
,
'stanh'
,
'hard_sigmoid'
,
'swish'
,
'prelu'
,
'brelu'
,
'leaky_relu'
,
'soft_relu'
,
'flatten'
,
'sequence_mask'
,
'stack'
,
'pad2d'
,
'unstack'
,
'sequence_enumerate'
,
'expand'
,
'sequence_concat'
,
'scale'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
'fc'
,
'embedding'
,
'dynamic_lstm'
,
'dynamic_lstmp'
,
'dynamic_gru'
,
'gru_unit'
,
'linear_chain_crf'
,
'crf_decoding'
,
'cos_sim'
,
'cross_entropy'
,
'square_error_cost'
,
'chunk_eval'
,
'sequence_conv'
,
'conv2d'
,
'conv3d'
,
'sequence_pool'
,
'sequence_softmax'
,
'softmax'
,
'pool2d'
,
'pool3d'
,
'batch_norm'
,
'beam_search_decode'
,
'conv2d_transpose'
,
'conv3d_transpose'
,
'sequence_expand'
,
'sequence_expand_as'
,
'sequence_pad'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
'reduce_max'
,
'reduce_min'
,
'reduce_prod'
,
'sequence_first_step'
,
'sequence_last_step'
,
'dropout'
,
'split'
,
'ctc_greedy_decoder'
,
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
'im2sequence'
,
'nce'
,
'hsigmoid'
,
'beam_search'
,
'row_conv'
,
'multiplex'
,
'layer_norm'
,
'softmax_with_cross_entropy'
,
'smooth_l1'
,
'one_hot'
,
'autoincreased_step_counter'
,
'reshape'
,
'squeeze'
,
'unsqueeze'
,
'lod_reset'
,
'lrn'
,
'pad'
,
'pad_constant_like'
,
'label_smooth'
,
'roi_pool'
,
'dice_loss'
,
'image_resize'
,
'image_resize_short'
,
'resize_bilinear'
,
'gather'
,
'scatter'
,
'sequence_scatter'
,
'random_crop'
,
'mean_iou'
,
'relu'
,
'log'
,
'crop'
,
'rank_loss'
,
'elu'
,
'relu6'
,
'pow'
,
'stanh'
,
'hard_sigmoid'
,
'swish'
,
'prelu'
,
'brelu'
,
'leaky_relu'
,
'soft_relu'
,
'flatten'
,
'sequence_mask'
,
'stack'
,
'pad2d'
,
'unstack'
,
'sequence_enumerate'
,
'expand'
,
'sequence_concat'
,
'scale'
,
'elementwise_add'
,
'elementwise_div'
,
'elementwise_sub'
,
'elementwise_mul'
,
'elementwise_max'
,
'elementwise_min'
,
'elementwise_pow'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'sampling_id'
,
'gaussian_random_batch_size_like'
,
'sum'
,
'slice'
,
'shape'
]
...
...
@@ -6463,6 +6382,246 @@ def expand(x, expand_times, name=None):
return
out
from
paddle.fluid.framework
import
convert_np_dtype_to_dtype_
@
templatedoc
()
def
uniform_random_batch_size_like
(
input
,
shape
,
dtype
=
'float32'
,
input_dim_idx
=
0
,
output_dim_idx
=
0
,
min
=-
1.0
,
max
=
1.0
,
seed
=
0
):
"""
${comment}
Args:
input (Variable): ${input_comment}
shape (tuple|list): ${shape_comment}
input_dim_idx (Int): ${input_dim_idx_comment}
output_dim_idx (Int): ${output_dim_idx_comment}
min (Float): ${min_comment}
max (Float): ${max_comment}
seed (Int): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'uniform_random_batch_size_like'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
type
=
'uniform_random_batch_size_like'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
,
'input_dim_idx'
:
input_dim_idx
,
'output_dim_idx'
:
output_dim_idx
,
'min'
:
min
,
'max'
:
max
,
'seed'
:
seed
,
'dtype'
:
c_dtype
})
return
out
@
templatedoc
()
def
gaussian_random
(
shape
,
mean
=
0.0
,
std
=
1.0
,
seed
=
0
,
dtype
=
'float32'
,
use_mkldnn
=
False
):
"""
${comment}
Args:
shape (tuple|list): ${shape_comment}
mean (Float): ${mean_comment}
std (Float): ${std_comment}
seed (Int): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): Output data type.
use_mkldnn (Bool): Only used in mkldnn kernel.
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'gaussian_random'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
type
=
'gaussian_random'
,
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
,
'mean'
:
mean
,
'std'
:
std
,
'seed'
:
seed
,
'dtype'
:
c_dtype
,
'use_mkldnn'
:
use_mkldnn
})
return
out
@
templatedoc
()
def
sampling_id
(
x
,
min
=
0.0
,
max
=
1.0
,
seed
=
0
,
dtype
=
'float32'
):
"""
${comment}
Args:
x (Variable): ${x_comment}
min (Float): ${min_comment}
max (Float): ${max_comment}
seed (Float): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of output data : float32, float_16, int etc
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'sampling_id'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
'sampling_id'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'min'
:
min
,
'max'
:
max
,
'seed'
:
seed
})
return
out
@
templatedoc
()
def
gaussian_random_batch_size_like
(
input
,
shape
,
input_dim_idx
=
0
,
output_dim_idx
=
0
,
mean
=
0.0
,
std
=
1.0
,
seed
=
0
,
dtype
=
'float32'
):
"""
${comment}
Args:
input (Variable): ${input_comment}
shape (tuple|list): ${shape_comment}
input_dim_idx (Int): ${input_dim_idx_comment}
output_dim_idx (Int): ${output_dim_idx_comment}
mean (Float): ${mean_comment}
std (Float): ${std_comment}
seed (Int): ${seed_comment}
dtype(np.dtype|core.VarDesc.VarType|str): The type of output data : float32, float_16, int etc
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'gaussian_random_batch_size_like'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
)
c_dtype
=
convert_np_dtype_to_dtype_
(
dtype
)
helper
.
append_op
(
type
=
'gaussian_random_batch_size_like'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'shape'
:
shape
,
'input_dim_idx'
:
input_dim_idx
,
'output_dim_idx'
:
output_dim_idx
,
'mean'
:
mean
,
'std'
:
std
,
'seed'
:
seed
,
'dtype'
:
c_dtype
})
return
out
@
templatedoc
()
def
sum
(
x
,
use_mkldnn
=
False
):
"""
${comment}
Args:
x (Variable): ${x_comment}
use_mkldnn (Bool): ${use_mkldnn_comment}
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'sum'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'x'
))
helper
.
append_op
(
type
=
'sum'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'use_mkldnn'
:
use_mkldnn
})
return
out
@
templatedoc
()
def
slice
(
input
,
axes
,
starts
,
ends
):
"""
${comment}
Args:
input (Variable): ${input_comment}.
axes (List): ${axes_comment}
starts (List): ${starts_comment}
ends (List): ${ends_comment}
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'slice'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'input'
))
helper
.
append_op
(
type
=
'slice'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'axes'
:
axes
,
'starts'
:
starts
,
'ends'
:
ends
})
return
out
@
templatedoc
()
def
shape
(
input
):
"""
${comment}
Args:
input (Variable): ${input_comment}
Returns:
out (Variable): ${out_comment}
"""
helper
=
LayerHelper
(
'shape'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
(
'input'
))
helper
.
append_op
(
type
=
'shape'
,
inputs
=
{
'Input'
:
input
},
outputs
=
{
'Out'
:
out
})
return
out
def
_elementwise_op
(
helper
):
op_type
=
helper
.
layer_type
x
=
helper
.
kwargs
.
get
(
'x'
,
None
)
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
85ddb5c7
...
...
@@ -45,13 +45,6 @@ __all__ = [
'logical_or'
,
'logical_xor'
,
'logical_not'
,
'uniform_random_batch_size_like'
,
'gaussian_random'
,
'sampling_id'
,
'gaussian_random_batch_size_like'
,
'sum'
,
'slice'
,
'shape'
,
'maxout'
,
]
...
...
@@ -63,6 +56,8 @@ for _OP in set(__all__):
# e.g.: test_program_code.py, test_dist_train.py
globals
()[
'_scale'
]
=
generate_layer_fn
(
'scale'
)
globals
()[
'_elementwise_div'
]
=
generate_layer_fn
(
'elementwise_div'
)
__all__
+=
__activations_noattr__
for
_OP
in
set
(
__activations_noattr__
):
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
85ddb5c7
...
...
@@ -26,6 +26,7 @@ from .layer_helper import LayerHelper
from
.regularizer
import
append_regularization_ops
from
.clip
import
append_gradient_clip_ops
,
error_clip_callback
from
contextlib
import
contextmanager
from
.layers
import
ops
__all__
=
[
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
,
'Ftrl'
,
...
...
@@ -1301,7 +1302,7 @@ class ModelAverage(Optimizer):
x
=
tmp
,
dtype
=
'float32'
if
self
.
_dtype
==
None
else
self
.
_dtype
)
sum
=
layers
.
cast
(
x
=
sum
,
dtype
=
'float32'
if
self
.
_dtype
==
None
else
self
.
_dtype
)
layers
.
elementwise_div
(
x
=
sum
,
y
=
tmp
,
out
=
param
)
ops
.
_
elementwise_div
(
x
=
sum
,
y
=
tmp
,
out
=
param
)
def
_add_average_restore_op
(
self
,
block
,
param_grad
):
param
=
block
.
_clone_variable
(
param_grad
[
0
])
...
...
python/paddle/fluid/tests/unittests/dist_ctr.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
paddle
import
paddle.fluid
as
fluid
import
dist_ctr_reader
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
IS_SPARSE
=
True
# Fix seed for test
fluid
.
default_startup_program
().
random_seed
=
1
fluid
.
default_main_program
().
random_seed
=
1
class
TestDistCTR2x2
(
TestDistRunnerBase
):
def
get_model
(
self
,
batch_size
=
2
):
dnn_input_dim
,
lr_input_dim
=
dist_ctr_reader
.
load_data_meta
()
""" network definition """
dnn_data
=
fluid
.
layers
.
data
(
name
=
"dnn_data"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
,
lod_level
=
1
,
append_batch_size
=
False
)
lr_data
=
fluid
.
layers
.
data
(
name
=
"lr_data"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
,
lod_level
=
1
,
append_batch_size
=
False
)
label
=
fluid
.
layers
.
data
(
name
=
"click"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
,
lod_level
=
0
,
append_batch_size
=
False
)
# build dnn model
dnn_layer_dims
=
[
128
,
64
,
32
,
1
]
dnn_embedding
=
fluid
.
layers
.
embedding
(
is_distributed
=
False
,
input
=
dnn_data
,
size
=
[
dnn_input_dim
,
dnn_layer_dims
[
0
]],
param_attr
=
fluid
.
ParamAttr
(
name
=
"deep_embedding"
,
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
)),
is_sparse
=
IS_SPARSE
)
dnn_pool
=
fluid
.
layers
.
sequence_pool
(
input
=
dnn_embedding
,
pool_type
=
"sum"
)
dnn_out
=
dnn_pool
for
i
,
dim
in
enumerate
(
dnn_layer_dims
[
1
:]):
fc
=
fluid
.
layers
.
fc
(
input
=
dnn_out
,
size
=
dim
,
act
=
"relu"
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
)),
name
=
'dnn-fc-%d'
%
i
)
dnn_out
=
fc
# build lr model
lr_embbding
=
fluid
.
layers
.
embedding
(
is_distributed
=
False
,
input
=
lr_data
,
size
=
[
lr_input_dim
,
1
],
param_attr
=
fluid
.
ParamAttr
(
name
=
"wide_embedding"
,
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
)),
is_sparse
=
IS_SPARSE
)
lr_pool
=
fluid
.
layers
.
sequence_pool
(
input
=
lr_embbding
,
pool_type
=
"sum"
)
merge_layer
=
fluid
.
layers
.
concat
(
input
=
[
dnn_out
,
lr_pool
],
axis
=
1
)
predict
=
fluid
.
layers
.
fc
(
input
=
merge_layer
,
size
=
2
,
act
=
'softmax'
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
auc_var
,
batch_auc_var
,
auc_states
=
fluid
.
layers
.
auc
(
input
=
predict
,
label
=
label
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
inference_program
=
paddle
.
fluid
.
default_main_program
().
clone
()
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.0001
)
sgd_optimizer
.
minimize
(
avg_cost
)
dataset
=
dist_ctr_reader
.
Dataset
()
train_reader
=
paddle
.
batch
(
dataset
.
train
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
dataset
.
test
(),
batch_size
=
batch_size
)
return
inference_program
,
avg_cost
,
train_reader
,
test_reader
,
None
,
predict
if
__name__
==
"__main__"
:
runtime_main
(
TestDistCTR2x2
)
python/paddle/fluid/tests/unittests/dist_ctr_reader.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
logging
import
paddle
import
tarfile
logging
.
basicConfig
()
logger
=
logging
.
getLogger
(
"paddle"
)
logger
.
setLevel
(
logging
.
INFO
)
DATA_URL
=
"http://paddle-ctr-data.cdn.bcebos.com/avazu_ctr_data.tgz"
DATA_MD5
=
"c11df99fbd14e53cd4bfa6567344b26e"
"""
avazu_ctr_data/train.txt
avazu_ctr_data/infer.txt
avazu_ctr_data/test.txt
avazu_ctr_data/data.meta.txt
"""
def
read_data
(
file_name
):
path
=
paddle
.
dataset
.
common
.
download
(
DATA_URL
,
"avazu_ctr_data"
,
DATA_MD5
)
tar
=
tarfile
.
open
(
path
,
"r:gz"
)
tar_info
=
None
for
member
in
tar
.
getmembers
():
if
member
.
name
.
endswith
(
file_name
):
tar_info
=
member
f
=
tar
.
extractfile
(
tar_info
)
ret_lines
=
[
_
.
decode
(
'utf-8'
)
for
_
in
f
.
readlines
()]
return
ret_lines
class
TaskMode
:
TRAIN_MODE
=
0
TEST_MODE
=
1
INFER_MODE
=
2
def
__init__
(
self
,
mode
):
self
.
mode
=
mode
def
is_train
(
self
):
return
self
.
mode
==
self
.
TRAIN_MODE
def
is_test
(
self
):
return
self
.
mode
==
self
.
TEST_MODE
def
is_infer
(
self
):
return
self
.
mode
==
self
.
INFER_MODE
@
staticmethod
def
create_train
():
return
TaskMode
(
TaskMode
.
TRAIN_MODE
)
@
staticmethod
def
create_test
():
return
TaskMode
(
TaskMode
.
TEST_MODE
)
@
staticmethod
def
create_infer
():
return
TaskMode
(
TaskMode
.
INFER_MODE
)
class
ModelType
:
CLASSIFICATION
=
0
REGRESSION
=
1
def
__init__
(
self
,
mode
):
self
.
mode
=
mode
def
is_classification
(
self
):
return
self
.
mode
==
self
.
CLASSIFICATION
def
is_regression
(
self
):
return
self
.
mode
==
self
.
REGRESSION
@
staticmethod
def
create_classification
():
return
ModelType
(
ModelType
.
CLASSIFICATION
)
@
staticmethod
def
create_regression
():
return
ModelType
(
ModelType
.
REGRESSION
)
def
load_dnn_input_record
(
sent
):
return
list
(
map
(
int
,
sent
.
split
()))
def
load_lr_input_record
(
sent
):
res
=
[]
for
_
in
[
x
.
split
(
':'
)
for
x
in
sent
.
split
()]:
res
.
append
(
int
(
_
[
0
]))
return
res
feeding_index
=
{
'dnn_input'
:
0
,
'lr_input'
:
1
,
'click'
:
2
}
class
Dataset
(
object
):
def
train
(
self
):
'''
Load trainset.
'''
file_name
=
"train.txt"
logger
.
info
(
"load trainset from %s"
%
file_name
)
mode
=
TaskMode
.
create_train
()
return
self
.
_parse_creator
(
file_name
,
mode
)
def
test
(
self
):
'''
Load testset.
'''
file_name
=
"test.txt"
logger
.
info
(
"load testset from %s"
%
file_name
)
mode
=
TaskMode
.
create_test
()
return
self
.
_parse_creator
(
file_name
,
mode
)
def
infer
(
self
):
'''
Load infer set.
'''
file_name
=
"infer.txt"
logger
.
info
(
"load inferset from %s"
%
file_name
)
mode
=
TaskMode
.
create_infer
()
return
self
.
_parse_creator
(
file_name
,
mode
)
def
_parse_creator
(
self
,
file_name
,
mode
):
'''
Parse dataset.
'''
def
_parse
():
data
=
read_data
(
file_name
)
for
line_id
,
line
in
enumerate
(
data
):
fs
=
line
.
strip
().
split
(
'
\t
'
)
dnn_input
=
load_dnn_input_record
(
fs
[
0
])
lr_input
=
load_lr_input_record
(
fs
[
1
])
if
not
mode
.
is_infer
():
click
=
int
(
fs
[
2
])
yield
[
dnn_input
,
lr_input
,
click
]
else
:
yield
[
dnn_input
,
lr_input
]
return
_parse
def
load_data_meta
():
'''
load data meta info from path, return (dnn_input_dim, lr_input_dim)
'''
lines
=
read_data
(
'data.meta.txt'
)
err_info
=
"wrong meta format"
assert
len
(
lines
)
==
2
,
err_info
assert
'dnn_input_dim:'
in
lines
[
0
]
and
'lr_input_dim:'
in
lines
[
1
],
err_info
res
=
map
(
int
,
[
_
.
split
(
':'
)[
1
]
for
_
in
lines
])
res
=
list
(
res
)
logger
.
info
(
'dnn input dim: %d'
%
res
[
0
])
logger
.
info
(
'lr input dim: %d'
%
res
[
1
])
return
res
python/paddle/fluid/tests/unittests/dist_mnist.py
浏览文件 @
85ddb5c7
...
...
@@ -47,7 +47,7 @@ def cnn_model(data):
pool_stride
=
2
,
act
=
"relu"
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.
3
)))
value
=
0.
01
)))
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
...
...
@@ -56,7 +56,7 @@ def cnn_model(data):
pool_stride
=
2
,
act
=
"relu"
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.
2
)))
value
=
0.
01
)))
SIZE
=
10
input_shape
=
conv_pool_2
.
shape
...
...
@@ -68,7 +68,7 @@ def cnn_model(data):
size
=
SIZE
,
act
=
"softmax"
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.
0
1
)))
return
predict
...
...
python/paddle/fluid/tests/unittests/dist_simnet_bow.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
argparse
import
time
import
math
import
random
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.profiler
as
profiler
from
paddle.fluid
import
core
import
unittest
from
multiprocessing
import
Process
import
os
import
signal
from
functools
import
reduce
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
DTYPE
=
"int64"
DATA_URL
=
'http://paddle-dist-ce-data.bj.bcebos.com/simnet.train.1000'
DATA_MD5
=
'24e49366eb0611c552667989de2f57d5'
# For Net
base_lr
=
0.2
emb_lr
=
base_lr
*
3
dict_dim
=
1500
emb_dim
=
128
hid_dim
=
128
margin
=
0.1
sample_rate
=
1
# Fix seed for test
fluid
.
default_startup_program
().
random_seed
=
1
fluid
.
default_main_program
().
random_seed
=
1
def
get_acc
(
cos_q_nt
,
cos_q_pt
,
batch_size
):
cond
=
fluid
.
layers
.
less_than
(
cos_q_nt
,
cos_q_pt
)
cond
=
fluid
.
layers
.
cast
(
cond
,
dtype
=
'float64'
)
cond_3
=
fluid
.
layers
.
reduce_sum
(
cond
)
acc
=
fluid
.
layers
.
elementwise_div
(
cond_3
,
fluid
.
layers
.
fill_constant
(
shape
=
[
1
],
value
=
batch_size
*
1.0
,
dtype
=
'float64'
),
name
=
"simnet_acc"
)
return
acc
def
get_loss
(
cos_q_pt
,
cos_q_nt
):
loss_op1
=
fluid
.
layers
.
elementwise_sub
(
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
cos_q_pt
,
shape
=
[
-
1
,
1
],
value
=
margin
,
dtype
=
'float32'
),
cos_q_pt
)
loss_op2
=
fluid
.
layers
.
elementwise_add
(
loss_op1
,
cos_q_nt
)
loss_op3
=
fluid
.
layers
.
elementwise_max
(
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
loss_op2
,
shape
=
[
-
1
,
1
],
value
=
0.0
,
dtype
=
'float32'
),
loss_op2
)
avg_cost
=
fluid
.
layers
.
mean
(
loss_op3
)
return
avg_cost
def
get_optimizer
():
# SGD optimizer
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
base_lr
)
return
optimizer
def
train_network
(
batch_size
,
is_distributed
=
False
,
is_sparse
=
False
):
# query
q
=
fluid
.
layers
.
data
(
name
=
"query_ids"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
## embedding
q_emb
=
fluid
.
layers
.
embedding
(
input
=
q
,
is_distributed
=
is_distributed
,
size
=
[
dict_dim
,
emb_dim
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
),
name
=
"__emb__"
,
learning_rate
=
emb_lr
),
is_sparse
=
is_sparse
)
## vsum
q_sum
=
fluid
.
layers
.
sequence_pool
(
input
=
q_emb
,
pool_type
=
'sum'
)
q_ss
=
fluid
.
layers
.
softsign
(
q_sum
)
## fc layer after conv
q_fc
=
fluid
.
layers
.
fc
(
input
=
q_ss
,
size
=
hid_dim
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
),
name
=
"__q_fc__"
,
learning_rate
=
base_lr
))
# label data
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
# pt
pt
=
fluid
.
layers
.
data
(
name
=
"pos_title_ids"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
## embedding
pt_emb
=
fluid
.
layers
.
embedding
(
input
=
pt
,
is_distributed
=
is_distributed
,
size
=
[
dict_dim
,
emb_dim
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
),
name
=
"__emb__"
,
learning_rate
=
emb_lr
),
is_sparse
=
is_sparse
)
## vsum
pt_sum
=
fluid
.
layers
.
sequence_pool
(
input
=
pt_emb
,
pool_type
=
'sum'
)
pt_ss
=
fluid
.
layers
.
softsign
(
pt_sum
)
## fc layer
pt_fc
=
fluid
.
layers
.
fc
(
input
=
pt_ss
,
size
=
hid_dim
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
),
name
=
"__fc__"
,
learning_rate
=
base_lr
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"__fc_b__"
))
# nt
nt
=
fluid
.
layers
.
data
(
name
=
"neg_title_ids"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
## embedding
nt_emb
=
fluid
.
layers
.
embedding
(
input
=
nt
,
is_distributed
=
is_distributed
,
size
=
[
dict_dim
,
emb_dim
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
),
name
=
"__emb__"
,
learning_rate
=
emb_lr
),
is_sparse
=
is_sparse
)
## vsum
nt_sum
=
fluid
.
layers
.
sequence_pool
(
input
=
nt_emb
,
pool_type
=
'sum'
)
nt_ss
=
fluid
.
layers
.
softsign
(
nt_sum
)
## fc layer
nt_fc
=
fluid
.
layers
.
fc
(
input
=
nt_ss
,
size
=
hid_dim
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
),
name
=
"__fc__"
,
learning_rate
=
base_lr
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"__fc_b__"
))
cos_q_pt
=
fluid
.
layers
.
cos_sim
(
q_fc
,
pt_fc
)
cos_q_nt
=
fluid
.
layers
.
cos_sim
(
q_fc
,
nt_fc
)
# loss
avg_cost
=
get_loss
(
cos_q_pt
,
cos_q_nt
)
# acc
acc
=
get_acc
(
cos_q_nt
,
cos_q_pt
,
batch_size
)
return
[
avg_cost
,
acc
,
cos_q_pt
]
def
combination
(
x
,
y
):
res
=
[[[
xi
,
yi
]
for
yi
in
y
]
for
xi
in
x
]
return
res
[
0
]
def
get_one_data
(
file_list
):
for
file
in
file_list
:
contents
=
[]
with
open
(
file
,
"r"
)
as
fin
:
for
i
in
fin
:
contents
.
append
(
i
.
strip
())
for
index
,
q
in
enumerate
(
contents
):
try
:
one_data
=
[[
int
(
j
)
for
j
in
i
.
split
(
" "
)]
for
i
in
q
.
split
(
";"
)[:
-
1
]]
if
one_data
[
1
][
0
]
+
one_data
[
1
][
1
]
!=
len
(
one_data
)
-
3
:
q
=
fin
.
readline
()
continue
tmp
=
combination
(
one_data
[
3
:
3
+
one_data
[
1
][
0
]],
one_data
[
3
+
one_data
[
1
][
0
]:])
except
Exception
as
e
:
continue
for
each
in
tmp
:
yield
[
one_data
[
2
],
0
,
each
[
0
],
each
[
1
]]
def
get_batch_reader
(
file_list
,
batch_size
):
def
batch_reader
():
res
=
[]
for
i
in
get_one_data
(
file_list
):
if
random
.
random
()
<=
sample_rate
:
res
.
append
(
i
)
if
len
(
res
)
>=
batch_size
:
yield
res
res
=
[]
return
batch_reader
def
get_train_reader
(
batch_size
):
# The training data set.
train_file
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"simnet"
,
"train"
)
train_reader
=
get_batch_reader
([
train_file
],
batch_size
)
train_feed
=
[
"query_ids"
,
"pos_title_ids"
,
"neg_title_ids"
,
"label"
]
return
train_reader
,
train_feed
class
TestDistSimnetBow2x2
(
TestDistRunnerBase
):
def
get_model
(
self
,
batch_size
=
2
):
# Train program
avg_cost
,
acc
,
predict
=
\
train_network
(
batch_size
,
bool
(
int
(
os
.
environ
[
"IS_DISTRIBUTED"
])),
bool
(
int
(
os
.
environ
[
"IS_SPARSE"
])))
inference_program
=
fluid
.
default_main_program
().
clone
()
# Optimization
opt
=
get_optimizer
()
opt
.
minimize
(
avg_cost
)
# Reader
train_reader
,
_
=
get_train_reader
(
batch_size
)
return
inference_program
,
avg_cost
,
train_reader
,
train_reader
,
acc
,
predict
if
__name__
==
"__main__"
:
paddle
.
dataset
.
common
.
download
(
DATA_URL
,
'simnet'
,
DATA_MD5
,
"train"
)
runtime_main
(
TestDistSimnetBow2x2
)
python/paddle/fluid/tests/unittests/dist_text_classification.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
argparse
import
time
import
math
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.profiler
as
profiler
from
paddle.fluid
import
core
import
unittest
from
multiprocessing
import
Process
import
os
import
signal
import
six
import
tarfile
import
string
import
re
from
functools
import
reduce
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
DTYPE
=
"float32"
VOCAB_URL
=
'http://paddle-dist-ce-data.bj.bcebos.com/imdb.vocab'
VOCAB_MD5
=
'23c86a0533c0151b6f12fa52b106dcc2'
DATA_URL
=
'http://paddle-dist-ce-data.bj.bcebos.com/text_classification.tar.gz'
DATA_MD5
=
'29ebfc94f11aea9362bbb7f5e9d86b8a'
# Load dictionary.
def
load_vocab
(
filename
):
vocab
=
{}
if
six
.
PY2
:
with
open
(
filename
,
'r'
)
as
f
:
for
idx
,
line
in
enumerate
(
f
):
vocab
[
line
.
strip
()]
=
idx
else
:
with
open
(
filename
,
'r'
,
encoding
=
"utf-8"
)
as
f
:
for
idx
,
line
in
enumerate
(
f
):
vocab
[
line
.
strip
()]
=
idx
return
vocab
def
get_worddict
(
dict_path
):
word_dict
=
load_vocab
(
dict_path
)
word_dict
[
"<unk>"
]
=
len
(
word_dict
)
dict_dim
=
len
(
word_dict
)
return
word_dict
,
dict_dim
def
conv_net
(
input
,
dict_dim
,
emb_dim
=
128
,
window_size
=
3
,
num_filters
=
128
,
fc0_dim
=
96
,
class_dim
=
2
):
emb
=
fluid
.
layers
.
embedding
(
input
=
input
,
size
=
[
dict_dim
,
emb_dim
],
is_sparse
=
False
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
)))
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
num_filters
=
num_filters
,
filter_size
=
window_size
,
act
=
"tanh"
,
pool_type
=
"max"
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
)))
fc_0
=
fluid
.
layers
.
fc
(
input
=
[
conv_3
],
size
=
fc0_dim
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
)))
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_0
],
size
=
class_dim
,
act
=
"softmax"
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.01
)))
return
prediction
def
inference_network
(
dict_dim
):
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
out
=
conv_net
(
data
,
dict_dim
)
return
out
def
get_reader
(
word_dict
,
batch_size
):
# The training data set.
train_reader
=
paddle
.
batch
(
train
(
word_dict
),
batch_size
=
batch_size
)
# The testing data set.
test_reader
=
paddle
.
batch
(
test
(
word_dict
),
batch_size
=
batch_size
)
return
train_reader
,
test_reader
def
get_optimizer
(
learning_rate
):
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
learning_rate
)
return
optimizer
class
TestDistTextClassification2x2
(
TestDistRunnerBase
):
def
get_model
(
self
,
batch_size
=
2
):
vocab
=
os
.
path
.
join
(
paddle
.
dataset
.
common
.
DATA_HOME
,
"text_classification"
,
"imdb.vocab"
)
word_dict
,
dict_dim
=
get_worddict
(
vocab
)
# Input data
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
predict
=
conv_net
(
data
,
dict_dim
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
)
inference_program
=
fluid
.
default_main_program
().
clone
()
# Optimization
opt
=
get_optimizer
(
learning_rate
=
0.001
)
opt
.
minimize
(
avg_cost
)
# Reader
train_reader
,
test_reader
=
get_reader
(
word_dict
,
batch_size
)
return
inference_program
,
avg_cost
,
train_reader
,
test_reader
,
acc
,
predict
def
tokenize
(
pattern
):
"""
Read files that match the given pattern. Tokenize and yield each file.
"""
with
tarfile
.
open
(
paddle
.
dataset
.
common
.
download
(
DATA_URL
,
'text_classification'
,
DATA_MD5
))
as
tarf
:
# Note that we should use tarfile.next(), which does
# sequential access of member files, other than
# tarfile.extractfile, which does random access and might
# destroy hard disks.
tf
=
tarf
.
next
()
while
tf
!=
None
:
if
bool
(
pattern
.
match
(
tf
.
name
)):
# newline and punctuations removal and ad-hoc tokenization.
yield
tarf
.
extractfile
(
tf
).
read
().
rstrip
(
six
.
b
(
"
\n\r
"
)).
translate
(
None
,
six
.
b
(
string
.
punctuation
)).
lower
().
split
()
tf
=
tarf
.
next
()
def
reader_creator
(
pos_pattern
,
neg_pattern
,
word_idx
):
UNK
=
word_idx
[
'<unk>'
]
INS
=
[]
def
load
(
pattern
,
out
,
label
):
for
doc
in
tokenize
(
pattern
):
out
.
append
(([
word_idx
.
get
(
w
,
UNK
)
for
w
in
doc
],
label
))
load
(
pos_pattern
,
INS
,
0
)
load
(
neg_pattern
,
INS
,
1
)
def
reader
():
for
doc
,
label
in
INS
:
yield
doc
,
label
return
reader
def
train
(
word_idx
):
"""
IMDB training set creator.
It returns a reader creator, each sample in the reader is an zero-based ID
sequence and label in [0, 1].
:param word_idx: word dictionary
:type word_idx: dict
:return: Training reader creator
:rtype: callable
"""
return
reader_creator
(
re
.
compile
(
"train/pos/.*\.txt$"
),
re
.
compile
(
"train/neg/.*\.txt$"
),
word_idx
)
def
test
(
word_idx
):
"""
IMDB test set creator.
It returns a reader creator, each sample in the reader is an zero-based ID
sequence and label in [0, 1].
:param word_idx: word dictionary
:type word_idx: dict
:return: Test reader creator
:rtype: callable
"""
return
reader_creator
(
re
.
compile
(
"test/pos/.*\.txt$"
),
re
.
compile
(
"test/neg/.*\.txt$"
),
word_idx
)
if
__name__
==
"__main__"
:
paddle
.
dataset
.
common
.
download
(
VOCAB_URL
,
'text_classification'
,
VOCAB_MD5
)
paddle
.
dataset
.
common
.
download
(
DATA_URL
,
'text_classification'
,
DATA_MD5
)
runtime_main
(
TestDistTextClassification2x2
)
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
85ddb5c7
...
...
@@ -1699,10 +1699,9 @@ class DistTransformer2x2(TestDistRunnerBase):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
use_cuda
,
args
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
TrainTaskConfig
.
use_gpu
=
use_cuda
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
,
test_program
=
get_model
(
def
run_trainer
(
self
,
args
):
TrainTaskConfig
.
use_gpu
=
args
.
use_cuda
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
=
get_model
(
args
.
is_dist
,
not
args
.
sync_mode
)
if
args
.
is_dist
:
...
...
@@ -1718,6 +1717,11 @@ class DistTransformer2x2(TestDistRunnerBase):
TrainTaskConfig
.
batch_size
=
20
trainer_prog
=
fluid
.
default_main_program
()
if
args
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
0
)
else
:
place
=
fluid
.
CPUPlace
()
startup_exe
=
fluid
.
Executor
(
place
)
TrainTaskConfig
.
local
=
not
args
.
is_dist
...
...
python/paddle/fluid/tests/unittests/dist_word2vec.py
浏览文件 @
85ddb5c7
...
...
@@ -122,4 +122,7 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
if
__name__
==
"__main__"
:
import
os
os
.
environ
[
'CPU_NUM'
]
=
'1'
os
.
environ
[
'USE_CUDA'
]
=
"FALSE"
runtime_main
(
TestDistWord2vec2x2
)
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
85ddb5c7
...
...
@@ -345,7 +345,7 @@ class OpTest(unittest.TestCase):
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
str
(
actual_t
)
+
" in class "
+
self
.
__class__
.
__name__
)
if
isinstance
(
expect
,
tuple
):
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
out_name
+
...
...
python/paddle/fluid/tests/unittests/test_detection_map_op.py
浏览文件 @
85ddb5c7
...
...
@@ -20,6 +20,7 @@ import six
import
sys
import
collections
import
math
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
...
...
@@ -32,7 +33,7 @@ class TestDetectionMAPOp(OpTest):
self
.
detect
=
np
.
array
(
self
.
detect
).
astype
(
'float32'
)
self
.
mAP
=
np
.
array
(
self
.
mAP
).
astype
(
'float32'
)
if
(
len
(
self
.
class_pos_count
)
>
0
)
:
if
len
(
self
.
class_pos_count
)
>
0
:
self
.
class_pos_count
=
np
.
array
(
self
.
class_pos_count
).
astype
(
'int32'
)
self
.
true_pos
=
np
.
array
(
self
.
true_pos
).
astype
(
'float32'
)
...
...
@@ -273,7 +274,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp):
class
TestDetectionMAPOpMultiBatch
(
TestDetectionMAPOp
):
def
init_test_case
(
self
):
super
(
TestDetectionMAPOpMultiBatch
,
self
).
init_test_case
()
self
.
class_pos_count
=
[
0
,
2
,
1
]
self
.
class_pos_count
=
[
0
,
2
,
1
,
0
]
self
.
true_pos_lod
=
[[
0
,
3
,
2
]]
self
.
true_pos
=
[[
0.7
,
1.
],
[
0.3
,
0.
],
[
0.2
,
1.
],
[
0.8
,
0.
],
[
0.1
,
1.
]]
self
.
false_pos_lod
=
[[
0
,
3
,
2
]]
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
85ddb5c7
...
...
@@ -18,23 +18,27 @@ import time
import
unittest
import
os
import
sys
import
six
import
signal
import
subprocess
import
six
import
argparse
import
paddle.fluid
as
fluid
RUN_STEP
=
10
class
TestDistRunnerBase
(
object
):
def
get_model
(
self
,
batch_size
=
2
):
raise
NotImplementedError
(
"get_model should be implemented by child classes."
)
def
get_transpiler
(
self
,
trainer_id
,
main_program
,
pserver_endpoints
,
trainers
,
sync_mode
):
@
staticmethod
def
get_transpiler
(
trainer_id
,
main_program
,
pserver_endpoints
,
trainers
,
sync_mode
):
# NOTE: import fluid until runtime, or else forking processes will cause error.
import
paddle
import
paddle.fluid
as
fluid
t
=
fluid
.
DistributeTranspiler
()
config
=
fluid
.
DistributeTranspilerConfig
()
t
=
fluid
.
DistributeTranspiler
(
config
=
config
)
t
.
transpile
(
trainer_id
=
trainer_id
,
program
=
main_program
,
...
...
@@ -44,9 +48,9 @@ class TestDistRunnerBase(object):
return
t
def
run_pserver
(
self
,
args
):
import
paddle
import
paddle.fluid
as
fluid
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
...
...
@@ -61,12 +65,10 @@ class TestDistRunnerBase(object):
exe
.
run
(
startup_prog
)
exe
.
run
(
pserver_prog
)
def
run_trainer
(
self
,
use_cuda
,
args
):
import
paddle
import
paddle.fluid
as
fluid
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
def
run_trainer
(
self
,
args
):
test_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
=
\
self
.
get_model
(
batch_size
=
2
)
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
if
args
.
is_dist
:
...
...
@@ -74,16 +76,23 @@ class TestDistRunnerBase(object):
fluid
.
default_main_program
(),
args
.
endpoints
,
args
.
trainers
,
args
.
sync_mode
)
trainer_prog
=
t
.
get_trainer_program
()
else
:
trainer_prog
=
fluid
.
default_main_program
()
if
args
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
0
)
else
:
place
=
fluid
.
CPUPlace
()
startup_exe
=
fluid
.
Executor
(
place
)
startup_exe
.
run
(
fluid
.
default_startup_program
())
strategy
=
fluid
.
ExecutionStrategy
()
strategy
.
num_threads
=
1
strategy
.
allow_op_delay
=
False
build_stra
=
fluid
.
BuildStrategy
()
if
args
.
use_reduce
:
...
...
@@ -92,7 +101,7 @@ class TestDistRunnerBase(object):
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
exe
=
fluid
.
ParallelExecutor
(
use_cuda
,
args
.
use_cuda
,
loss_name
=
avg_cost
.
name
,
exec_strategy
=
strategy
,
build_strategy
=
build_stra
)
...
...
@@ -103,27 +112,26 @@ class TestDistRunnerBase(object):
]
feeder
=
fluid
.
DataFeeder
(
feed_var_list
,
place
)
reader_generator
=
test_reader
()
data
=
next
(
reader_generator
)
first_loss
,
=
exe
.
run
(
fetch_list
=
[
avg_cost
.
name
],
feed
=
feeder
.
feed
(
data
))
print
(
first_loss
)
reader_generator
=
train_reader
()
for
i
in
six
.
moves
.
xrange
(
5
):
data
=
next
(
reader_generator
)
loss
,
=
exe
.
run
(
fetch_list
=
[
avg_cost
.
name
],
feed
=
feeder
.
feed
(
data
))
def
get_data
():
origin_batch
=
next
(
reader_generator
)
if
args
.
is_dist
and
args
.
use_reader_alloc
:
new_batch
=
[]
for
offset
,
item
in
enumerate
(
origin_batch
):
if
offset
%
2
==
args
.
trainer_id
:
new_batch
.
append
(
item
)
return
new_batch
else
:
return
origin_batch
data
=
next
(
reader_generator
)
last_loss
,
=
exe
.
run
(
fetch_list
=
[
avg_cost
.
name
],
feed
=
feeder
.
feed
(
data
))
print
(
last_loss
)
for
_
in
six
.
moves
.
xrange
(
RUN_STEP
):
loss
,
=
exe
.
run
(
fetch_list
=
[
avg_cost
.
name
],
feed
=
feeder
.
feed
(
get_data
()))
print
(
loss
)
def
runtime_main
(
test_class
):
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
parser
=
argparse
.
ArgumentParser
(
description
=
'Run dist test.'
)
parser
.
add_argument
(
'--role'
,
type
=
str
,
required
=
True
,
choices
=
[
'pserver'
,
'trainer'
])
...
...
@@ -135,7 +143,10 @@ def runtime_main(test_class):
'--current_endpoint'
,
type
=
str
,
required
=
False
,
default
=
""
)
parser
.
add_argument
(
'--sync_mode'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--mem_opt'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--use_cuda'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--use_reduce'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--use_reader_alloc'
,
action
=
'store_true'
,
required
=
False
,
default
=
True
)
args
=
parser
.
parse_args
()
...
...
@@ -143,8 +154,7 @@ def runtime_main(test_class):
if
args
.
role
==
"pserver"
and
args
.
is_dist
:
model
.
run_pserver
(
args
)
else
:
use_cuda
=
True
if
core
.
is_compiled_with_cuda
()
else
False
model
.
run_trainer
(
use_cuda
,
args
)
model
.
run_trainer
(
args
)
import
paddle.compat
as
cpt
...
...
@@ -163,8 +173,10 @@ class TestDistBase(unittest.TestCase):
self
.
_find_free_port
(),
self
.
_find_free_port
())
self
.
_python_interp
=
"python"
self
.
_sync_mode
=
True
self
.
_use_cuda
=
True
self
.
_mem_opt
=
False
self
.
_use_reduce
=
False
self
.
_use_reader_alloc
=
True
self
.
_setup_config
()
def
_find_free_port
(
self
):
...
...
@@ -172,15 +184,15 @@ class TestDistBase(unittest.TestCase):
s
.
bind
((
''
,
0
))
return
s
.
getsockname
()[
1
]
def
start_pserver
(
self
,
model_file
,
check_error_log
):
def
start_pserver
(
self
,
model_file
,
check_error_log
,
required_envs
):
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
ps_cmd
=
"%s %s --role pserver --endpoints %s --trainer_id 0 --current_endpoint %s --trainers %d --is_dist"
ps0_cmd
=
ps_cmd
%
\
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
ps0_ep
,
self
.
_trainers
)
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
ps0_ep
,
self
.
_trainers
)
ps1_cmd
=
ps_cmd
%
\
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
ps1_ep
,
self
.
_trainers
)
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
ps1_ep
,
self
.
_trainers
)
if
self
.
_sync_mode
:
ps0_cmd
+=
" --sync_mode"
...
...
@@ -198,9 +210,15 @@ class TestDistBase(unittest.TestCase):
ps1_pipe
=
open
(
"/tmp/ps1_err.log"
,
"wb"
)
ps0_proc
=
subprocess
.
Popen
(
ps0_cmd
.
strip
().
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
ps0_pipe
)
ps0_cmd
.
strip
().
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
ps0_pipe
,
env
=
required_envs
)
ps1_proc
=
subprocess
.
Popen
(
ps1_cmd
.
strip
().
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
ps1_pipe
)
ps1_cmd
.
strip
().
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
ps1_pipe
,
env
=
required_envs
)
if
not
check_error_log
:
return
ps0_proc
,
ps1_proc
,
None
,
None
...
...
@@ -222,59 +240,60 @@ class TestDistBase(unittest.TestCase):
(
e
,
retry_times
))
retry_times
-=
1
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
,
check_error_log
=
False
):
# TODO(typhoonzero): should auto adapt GPU count on the machine.
required_envs
=
{
"PATH"
:
os
.
getenv
(
"PATH"
,
""
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
,
""
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
,
"CPU_NUM"
:
"1"
}
def
_run_local
(
self
,
model
,
envs
,
check_error_log
):
if
check_error_log
:
required_envs
[
"GLOG_v"
]
=
"7"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
cmd
=
"%s %s --role trainer"
%
(
self
.
_python_interp
,
model
)
if
self
.
_use_cuda
:
cmd
+=
" --use_cuda"
env_local
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
else
:
env_local
=
{
'CPU_NUM'
:
'1'
}
envs
.
update
(
env_local
)
# Run local to get a base line
env_local
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
env_local
.
update
(
required_envs
)
local_cmd
=
"%s %s --role trainer"
%
(
self
.
_python_interp
,
model_file
)
if
not
check_error_log
:
err_log
=
open
(
"/tmp/trainer.err.log"
,
"wb"
)
local_proc
=
subprocess
.
Popen
(
local_
cmd
.
split
(
" "
),
cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
env
=
env
_local
)
stderr
=
err_log
,
env
=
env
s
)
else
:
err_log
=
open
(
"/tmp/trainer.err.log"
,
"wb"
)
local_proc
=
subprocess
.
Popen
(
local_
cmd
.
split
(
" "
),
cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
err_log
,
env
=
env
_local
)
stderr
=
subprocess
.
PIPE
,
env
=
env
s
)
local_proc
.
wait
()
out
,
err
=
local_proc
.
communicate
()
local_ret
=
cpt
.
to_text
(
out
)
sys
.
stderr
.
write
(
'local_loss: %s
\n
'
%
local_ret
)
sys
.
stderr
.
write
(
'local_stderr: %s
\n
'
%
err
)
local_out
,
local_err
=
local_proc
.
communicate
()
local_ret
=
cpt
.
to_text
(
local_out
)
if
check_error_log
:
err_log
.
close
()
sys
.
stderr
.
write
(
'local_stdout: %s
\n
'
%
local_ret
)
sys
.
stderr
.
write
(
'local_stderr: %s
\n
'
%
local_err
)
local_losses
=
local_ret
.
split
(
"
\n
"
)
return
local_losses
def
_run_cluster
(
self
,
model
,
envs
,
check_error_log
):
# Run dist train to compare with local results
ps0
,
ps1
,
ps0_pipe
,
ps1_pipe
=
self
.
start_pserver
(
model
_file
,
check_error_log
)
ps0
,
ps1
,
ps0_pipe
,
ps1_pipe
=
self
.
start_pserver
(
model
,
check_error_log
,
envs
)
self
.
_wait_ps_ready
(
ps0
.
pid
)
self
.
_wait_ps_ready
(
ps1
.
pid
)
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
tr_cmd
=
"%s %s --role trainer --endpoints %s --trainer_id %d --current_endpoint %s --trainers %d --is_dist"
tr0_cmd
=
tr_cmd
%
\
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
0
,
ps0_ep
,
self
.
_trainers
)
(
self
.
_python_interp
,
model
,
self
.
_ps_endpoints
,
0
,
ps0_ep
,
self
.
_trainers
)
tr1_cmd
=
tr_cmd
%
\
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
1
,
ps1_ep
,
self
.
_trainers
)
(
self
.
_python_interp
,
model
,
self
.
_ps_endpoints
,
1
,
ps1_ep
,
self
.
_trainers
)
if
self
.
_sync_mode
:
tr0_cmd
+=
" --sync_mode"
...
...
@@ -285,18 +304,28 @@ class TestDistBase(unittest.TestCase):
if
self
.
_use_reduce
:
tr0_cmd
+=
" --use_reduce"
tr1_cmd
+=
" --use_reduce"
if
self
.
_use_reader_alloc
:
tr0_cmd
+=
" --use_reader_alloc"
tr1_cmd
+=
" --use_reader_alloc"
if
self
.
_use_cuda
:
tr0_cmd
+=
" --use_cuda"
tr1_cmd
+=
" --use_cuda"
env0
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
env1
=
{
"CUDA_VISIBLE_DEVICES"
:
"1"
}
else
:
env0
=
{
'CPU_NUM'
:
'1'
}
env1
=
{
'CPU_NUM'
:
'1'
}
env0
.
update
(
envs
)
env1
.
update
(
envs
)
env0
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
env1
=
{
"CUDA_VISIBLE_DEVICES"
:
"1"
}
env0
.
update
(
required_envs
)
env1
.
update
(
required_envs
)
FNULL
=
open
(
os
.
devnull
,
'w'
)
tr0_pipe
=
subprocess
.
PIPE
tr1_pipe
=
subprocess
.
PIPE
if
check_error_log
:
print
(
"tr0_cmd:
"
,
tr0_cmd
)
print
(
"tr1_cmd:
"
,
tr1_cmd
)
print
(
"tr0_cmd:
{}, env0: {}"
.
format
(
tr0_cmd
,
env0
)
)
print
(
"tr1_cmd:
{}, env1: {}"
.
format
(
tr1_cmd
,
env1
)
)
tr0_pipe
=
open
(
"/tmp/tr0_err.log"
,
"wb"
)
tr1_pipe
=
open
(
"/tmp/tr1_err.log"
,
"wb"
)
...
...
@@ -313,17 +342,11 @@ class TestDistBase(unittest.TestCase):
tr0_proc
.
wait
()
tr1_proc
.
wait
()
out
,
err
=
tr0_proc
.
communicate
()
sys
.
stderr
.
write
(
'dist_stderr: %s
\n
'
%
err
)
loss_data0
=
cpt
.
to_text
(
out
)
sys
.
stderr
.
write
(
'dist_loss: %s
\n
'
%
loss_data0
)
lines
=
loss_data0
.
split
(
"
\n
"
)
dist_first_loss
=
eval
(
lines
[
0
].
replace
(
" "
,
","
))[
0
]
dist_last_loss
=
eval
(
lines
[
1
].
replace
(
" "
,
","
))[
0
]
local_lines
=
local_ret
.
split
(
"
\n
"
)
local_first_loss
=
eval
(
local_lines
[
0
])[
0
]
local_last_loss
=
eval
(
local_lines
[
1
])[
0
]
tr0_out
,
tr0_err
=
tr0_proc
.
communicate
()
tr0_loss_text
=
cpt
.
to_text
(
tr0_out
)
tr1_out
,
tr1_err
=
tr1_proc
.
communicate
()
tr1_loss_text
=
cpt
.
to_text
(
tr1_out
)
# close trainer file
if
check_error_log
:
...
...
@@ -341,5 +364,47 @@ class TestDistBase(unittest.TestCase):
ps1
.
wait
()
FNULL
.
close
()
self
.
assertAlmostEqual
(
local_first_loss
,
dist_first_loss
,
delta
=
delta
)
self
.
assertAlmostEqual
(
local_last_loss
,
dist_last_loss
,
delta
=
delta
)
# print log
sys
.
stderr
.
write
(
'trainer 0 stdout:
\n
%s
\n
'
%
tr0_loss_text
)
sys
.
stderr
.
write
(
'trainer 0 stderr:
\n
%s
\n
'
%
tr0_err
)
sys
.
stderr
.
write
(
'trainer 1 stdout: %s
\n
'
%
tr1_loss_text
)
sys
.
stderr
.
write
(
'trainer 1 stderr: %s
\n
'
%
tr1_err
)
tr0_losses
=
tr0_loss_text
.
split
(
"
\n
"
)
tr1_losses
=
tr1_loss_text
.
split
(
"
\n
"
)
return
tr0_losses
,
tr1_losses
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
,
check_error_log
=
False
,
need_envs
=
{}):
# TODO(typhoonzero): should auto adapt GPU count on the machine.
required_envs
=
{
"PATH"
:
os
.
getenv
(
"PATH"
,
""
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
,
""
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
,
}
required_envs
.
update
(
need_envs
)
if
check_error_log
:
required_envs
[
"GLOG_v"
]
=
"7"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
local_losses
\
=
self
.
_run_local
(
model_file
,
required_envs
,
check_error_log
)
tr0_losses
,
tr1_losses
=
self
.
_run_cluster
(
model_file
,
required_envs
,
check_error_log
)
for
step_id
in
range
(
RUN_STEP
):
local_loss
=
eval
(
local_losses
[
step_id
])[
0
]
tr0_loss
=
eval
(
tr0_losses
[
step_id
])[
0
]
tr1_loss
=
eval
(
tr1_losses
[
step_id
])[
0
]
dist_loss
=
(
tr0_loss
+
tr1_loss
)
/
2
print
(
str
(
local_loss
)
+
":"
+
str
(
dist_loss
))
self
.
assertAlmostEqual
(
local_loss
,
dist_loss
,
delta
=
delta
)
python/paddle/fluid/tests/unittests/test_dist_ctr.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
unittest
from
test_dist_base
import
TestDistBase
class
TestDistCTR2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_use_cuda
=
False
def
test_dist_ctr
(
self
):
self
.
check_with_place
(
"dist_ctr.py"
,
delta
=
1e-7
,
check_error_log
=
False
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_mnist.py
浏览文件 @
85ddb5c7
...
...
@@ -23,7 +23,7 @@ class TestDistMnist2x2(TestDistBase):
self
.
_use_reduce
=
False
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1e-
7
)
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1e-
5
)
class
TestDistMnist2x2WithMemopt
(
TestDistBase
):
...
...
@@ -32,7 +32,7 @@ class TestDistMnist2x2WithMemopt(TestDistBase):
self
.
_mem_opt
=
True
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1e-
7
)
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1e-
5
)
class
TestDistMnistAsync
(
TestDistBase
):
...
...
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
85ddb5c7
...
...
@@ -20,9 +20,10 @@ from test_dist_base import TestDistBase
class
TestDistSeResneXt2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_use_reader_alloc
=
False
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1
e-7
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1
00
)
# TODO(typhoonzero): fix this test
...
...
@@ -38,6 +39,7 @@ class TestDistSeResneXt2x2(TestDistBase):
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_use_reader_alloc
=
False
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
...
...
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
unittest
from
test_dist_base
import
TestDistBase
class
TestDistSimnetBowDense2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_use_cuda
=
False
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'0'
}
self
.
check_with_place
(
"dist_simnet_bow.py"
,
delta
=
1e-5
,
check_error_log
=
False
,
need_envs
=
need_envs
)
class
TestDistSimnetBow2x2DenseAsync
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_use_cuda
=
False
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'0'
}
self
.
check_with_place
(
"dist_simnet_bow.py"
,
delta
=
100
,
check_error_log
=
False
,
need_envs
=
need_envs
)
class
TestDistSimnetBowSparse2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_use_cuda
=
False
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'1'
}
self
.
check_with_place
(
"dist_simnet_bow.py"
,
delta
=
1e-5
,
check_error_log
=
False
,
need_envs
=
need_envs
)
class
TestDistSimnetBow2x2SparseAsync
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_use_cuda
=
False
def
test_simnet_bow
(
self
):
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'1'
}
self
.
check_with_place
(
"dist_simnet_bow.py"
,
delta
=
100
,
check_error_log
=
False
,
need_envs
=
need_envs
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_text_classification.py
0 → 100644
浏览文件 @
85ddb5c7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
unittest
from
test_dist_base
import
TestDistBase
class
TestDistTextClassification2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_use_cuda
=
False
def
test_text_classification
(
self
):
self
.
check_with_place
(
"dist_text_classification.py"
,
delta
=
1e-6
)
class
TestDistTextClassification2x2Async
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
False
self
.
_use_cuda
=
False
def
test_se_resnext
(
self
):
self
.
check_with_place
(
"dist_text_classification.py"
,
delta
=
100
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_word2vec.py
浏览文件 @
85ddb5c7
...
...
@@ -39,7 +39,7 @@ class TestDistW2V2x2Async(TestDistBase):
self
.
_sync_mode
=
False
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_word2vec.py"
,
delta
=
1
)
self
.
check_with_place
(
"dist_word2vec.py"
,
delta
=
1
00
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
浏览文件 @
85ddb5c7
...
...
@@ -277,7 +277,6 @@ class TestGenerateProposalsOp(OpTest):
'eta'
:
self
.
eta
}
print
(
"lod = "
,
self
.
lod
)
self
.
outputs
=
{
'RpnRois'
:
(
self
.
rpn_rois
[
0
],
[
self
.
lod
]),
'RpnRoiProbs'
:
(
self
.
rpn_roi_probs
[
0
],
[
self
.
lod
])
...
...
@@ -295,7 +294,7 @@ class TestGenerateProposalsOp(OpTest):
self
.
post_nms_topN
=
5000
# train 6000, test 1000
self
.
nms_thresh
=
0.7
self
.
min_size
=
3.0
self
.
eta
=
0.8
self
.
eta
=
1.
def
init_test_input
(
self
):
batch_size
=
1
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
85ddb5c7
...
...
@@ -541,7 +541,7 @@ class TestBook(unittest.TestCase):
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
3
,
100
,
100
],
dtype
=
"float32"
)
out
=
layers
.
shape
(
input
,
name
=
"shape"
)
out
=
layers
.
shape
(
input
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
...
...
@@ -758,6 +758,65 @@ class TestBook(unittest.TestCase):
out
=
layers
.
expand
(
x
,
[
1
,
2
])
print
(
str
(
program
))
def
test_uniform_random_batch_size_like
(
self
):
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
)
out
=
layers
.
uniform_random_batch_size_like
(
input
,
[
-
1
,
11
])
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_gaussian_random
(
self
):
program
=
Program
()
with
program_guard
(
program
):
out
=
layers
.
gaussian_random
(
shape
=
[
20
,
30
])
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_sampling_id
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"X"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
,
append_batch_size
=
False
)
out
=
layers
.
sampling_id
(
x
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_gaussian_random_batch_size_like
(
self
):
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
)
out
=
layers
.
gaussian_random_batch_size_like
(
input
,
shape
=
[
-
1
,
11
],
mean
=
1.0
,
std
=
2.0
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_sum
(
self
):
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
13
,
11
],
dtype
=
'float32'
)
out
=
layers
.
sum
(
input
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_slice
(
self
):
starts
=
[
1
,
0
,
2
]
ends
=
[
3
,
3
,
4
]
axes
=
[
0
,
1
,
2
]
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
3
,
4
,
5
,
6
],
dtype
=
'float32'
)
out
=
layers
.
slice
(
input
,
axes
=
axes
,
starts
=
starts
,
ends
=
ends
)
def
test_softshrink
(
self
):
program
=
Program
()
with
program_guard
(
program
):
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
85ddb5c7
...
...
@@ -470,7 +470,10 @@ class DistributeTranspiler(object):
"""
# remove optimize ops and add a send op to main_program
# FIXME(typhoonzero): Also ops like clip_gradient, lrn_decay?
lr_ops
=
self
.
_get_lr_ops
()
delete_ops
(
self
.
origin_program
.
global_block
(),
self
.
optimize_ops
)
delete_ops
(
self
.
origin_program
.
global_block
(),
lr_ops
)
self
.
origin_program
.
__str__
()
if
wait_port
:
...
...
@@ -1487,7 +1490,6 @@ to transpile() call.")
per_trainer_name
=
"%s.trainer_%d"
%
\
(
merged_var_name
,
i
)
vars2merge
.
append
(
pserver_block
.
vars
[
per_trainer_name
])
optimize_block
.
append_op
(
type
=
"sum"
,
inputs
=
{
"X"
:
vars2merge
},
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录