Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
a59b7ac7
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a59b7ac7
编写于
1月 25, 2019
作者:
J
JiabinYang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into feature/imperative
上级
ba981604
88bd7e1a
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
1079 addition
and
181 deletion
+1079
-181
paddle/fluid/API.spec
paddle/fluid/API.spec
+2
-0
paddle/fluid/framework/details/execution_strategy.h
paddle/fluid/framework/details/execution_strategy.h
+4
-1
paddle/fluid/operators/distributed/variable_response.cc
paddle/fluid/operators/distributed/variable_response.cc
+3
-2
paddle/fluid/operators/jit/CMakeLists.txt
paddle/fluid/operators/jit/CMakeLists.txt
+1
-1
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+70
-37
paddle/fluid/operators/reader/create_ctr_reader_op.cc
paddle/fluid/operators/reader/create_ctr_reader_op.cc
+27
-9
paddle/fluid/operators/reader/ctr_reader.cc
paddle/fluid/operators/reader/ctr_reader.cc
+199
-39
paddle/fluid/operators/reader/ctr_reader.h
paddle/fluid/operators/reader/ctr_reader.h
+73
-18
paddle/fluid/operators/reader/ctr_reader_test.cc
paddle/fluid/operators/reader/ctr_reader_test.cc
+81
-7
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
+4
-8
paddle/fluid/operators/reader/read_op.cc
paddle/fluid/operators/reader/read_op.cc
+24
-16
paddle/fluid/operators/reader/reader_op_registry.cc
paddle/fluid/operators/reader/reader_op_registry.cc
+21
-13
paddle/fluid/operators/shuffle_channel_op.cc
paddle/fluid/operators/shuffle_channel_op.cc
+113
-0
paddle/fluid/operators/shuffle_channel_op.cu
paddle/fluid/operators/shuffle_channel_op.cu
+125
-0
paddle/fluid/operators/shuffle_channel_op.h
paddle/fluid/operators/shuffle_channel_op.h
+95
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+7
-13
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-0
python/paddle/fluid/contrib/reader/README.md
python/paddle/fluid/contrib/reader/README.md
+15
-0
python/paddle/fluid/contrib/reader/__init__.py
python/paddle/fluid/contrib/reader/__init__.py
+19
-0
python/paddle/fluid/contrib/reader/ctr_reader.py
python/paddle/fluid/contrib/reader/ctr_reader.py
+57
-16
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+1
-1
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+74
-0
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+8
-0
python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py
...n/paddle/fluid/tests/unittests/test_shuffle_channel_op.py
+52
-0
python/setup.py.in
python/setup.py.in
+1
-0
未找到文件。
paddle/fluid/API.spec
浏览文件 @
a59b7ac7
...
...
@@ -213,6 +213,7 @@ paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act
paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1))
paddle.fluid.layers.shuffle_channel ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.py_func ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.psroi_pool ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.teacher_student_sigmoid_loss ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0))
...
...
@@ -359,6 +360,7 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.reader.ctr_reader.ctr_reader ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
paddle.fluid.contrib.CompressPass.add_strategy ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/details/execution_strategy.h
浏览文件 @
a59b7ac7
...
...
@@ -25,7 +25,10 @@ struct ExecutionStrategy {
size_t
num_threads_
{
0
};
bool
use_cuda_
{
true
};
bool
allow_op_delay_
{
false
};
size_t
num_iteration_per_drop_scope_
{
1
};
// If we set this to 1, we will delete all variables when finish a batch. and
// this will loss 15%+ performance.
// Please be aware about this parameters.
size_t
num_iteration_per_drop_scope_
{
100
};
ExecutorType
type_
{
kDefault
};
bool
dry_run_
{
false
};
};
...
...
paddle/fluid/operators/distributed/variable_response.cc
浏览文件 @
a59b7ac7
...
...
@@ -117,8 +117,9 @@ bool VariableResponse::CopyLodTensorData(
tensor
->
mutable_data
(
ctx
.
GetPlace
(),
ToVarType
(
meta_
.
data_type
()));
VLOG
(
6
)
<<
"Tensor.memory_size = "
<<
tensor
->
memory_size
()
<<
", Buffer Size = "
<<
length
;
PADDLE_ENFORCE_EQ
(
tensor
->
memory_size
(),
static_cast
<
unsigned
int
>
(
length
));
<<
", Buffer Size = "
<<
length
<<
", dims:"
<<
dims
<<
", numel:"
<<
tensor
->
numel
();
PADDLE_ENFORCE_GE
(
tensor
->
memory_size
(),
static_cast
<
unsigned
int
>
(
length
));
return
ReadRaw
(
input
,
ctx
,
tensor
->
place
(),
tensor_data
,
length
);
}
...
...
paddle/fluid/operators/jit/CMakeLists.txt
浏览文件 @
a59b7ac7
...
...
@@ -21,5 +21,5 @@ endif()
cc_library
(
jit_kernel_helper SRCS
${
jit_kernel_cc_srcs
}
DEPS
${
JIT_KERNEL_DEPS
}
)
cc_test
(
jit_kernel_test SRCS test.cc DEPS jit_kernel_helper
)
if
(
NOT WIN32
)
cc_binary
(
jit_kernel_benchmark SRCS benchmark.cc DEPS jit_kernel_helper device_tracer
)
cc_binary
(
jit_kernel_benchmark SRCS benchmark.cc DEPS jit_kernel_helper device_tracer
tensor
)
endif
()
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
a59b7ac7
...
...
@@ -18,6 +18,7 @@
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -155,14 +156,22 @@ void BenchAllImpls(const typename KernelTuples::attr_type& attr, Args... args) {
LOG
(
INFO
)
<<
loginfos
.
str
();
}
using
Tensor
=
paddle
::
framework
::
Tensor
;
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchXYZNKernel
()
{
for
(
int
d
:
TestSizes
())
{
std
::
vector
<
T
>
x
(
d
),
y
(
d
),
z
(
d
);
RandomVec
<
T
>
(
d
,
x
.
data
());
RandomVec
<
T
>
(
d
,
y
.
data
());
BenchAllImpls
<
KT
,
jit
::
XYZNTuples
<
T
>
,
PlaceType
>
(
d
,
x
.
data
(),
y
.
data
(),
z
.
data
(),
d
);
Tensor
x
,
y
,
z
;
x
.
Resize
({
d
});
y
.
Resize
({
d
});
z
.
Resize
({
d
});
T
*
x_data
=
x
.
mutable_data
<
T
>
(
PlaceType
());
T
*
y_data
=
y
.
mutable_data
<
T
>
(
PlaceType
());
T
*
z_data
=
z
.
mutable_data
<
T
>
(
PlaceType
());
RandomVec
<
T
>
(
d
,
x_data
);
RandomVec
<
T
>
(
d
,
y_data
);
BenchAllImpls
<
KT
,
jit
::
XYZNTuples
<
T
>
,
PlaceType
>
(
d
,
x
.
data
<
T
>
(),
y
.
data
<
T
>
(),
z_data
,
d
);
}
}
...
...
@@ -170,9 +179,13 @@ template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void
BenchAXYNKernel
()
{
for
(
int
d
:
TestSizes
())
{
const
T
a
=
static_cast
<
T
>
(
3
);
std
::
vector
<
T
>
x
(
d
),
y
(
d
);
RandomVec
<
T
>
(
d
,
x
.
data
());
BenchAllImpls
<
KT
,
jit
::
AXYNTuples
<
T
>
,
PlaceType
>
(
d
,
&
a
,
x
.
data
(),
y
.
data
(),
Tensor
x
,
y
;
x
.
Resize
({
d
});
y
.
Resize
({
d
});
T
*
x_data
=
x
.
mutable_data
<
T
>
(
PlaceType
());
T
*
y_data
=
y
.
mutable_data
<
T
>
(
PlaceType
());
RandomVec
<
T
>
(
d
,
x_data
);
BenchAllImpls
<
KT
,
jit
::
AXYNTuples
<
T
>
,
PlaceType
>
(
d
,
&
a
,
x
.
data
<
T
>
(),
y_data
,
d
);
}
}
...
...
@@ -180,9 +193,13 @@ void BenchAXYNKernel() {
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchXYNKernel
()
{
for
(
int
d
:
TestSizes
())
{
std
::
vector
<
T
>
x
(
d
),
y
(
d
);
RandomVec
<
T
>
(
d
,
x
.
data
());
BenchAllImpls
<
KT
,
jit
::
XYNTuples
<
T
>
,
PlaceType
>
(
d
,
x
.
data
(),
y
.
data
(),
d
);
Tensor
x
,
y
;
x
.
Resize
({
d
});
y
.
Resize
({
d
});
T
*
x_data
=
x
.
mutable_data
<
T
>
(
PlaceType
());
T
*
y_data
=
y
.
mutable_data
<
T
>
(
PlaceType
());
RandomVec
<
T
>
(
d
,
x_data
);
BenchAllImpls
<
KT
,
jit
::
XYNTuples
<
T
>
,
PlaceType
>
(
d
,
x
.
data
<
T
>
(),
y_data
,
d
);
}
}
...
...
@@ -192,16 +209,23 @@ void BenchLSTMKernel() {
for
(
int
d
:
TestSizes
())
{
const
jit
::
lstm_attr_t
attr
(
d
,
jit
::
kVSigmoid
,
jit
::
kVTanh
,
jit
::
kVTanh
,
use_peephole
);
std
::
vector
<
T
>
x
(
4
*
d
),
ct_1
(
d
),
ct
(
d
),
ht
(
d
),
wp
(
3
*
d
),
checked
(
2
*
d
);
RandomVec
<
T
>
(
4
*
d
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
3
*
d
,
wp
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
ct_1
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
ct_1_data
=
ct_1
.
data
();
const
T
*
wp_data
=
wp
.
data
();
T
*
x_data
=
x
.
data
();
T
*
checked_data
=
checked
.
data
();
T
*
ct_data
=
ct
.
data
();
T
*
ht_data
=
ht
.
data
();
Tensor
x
,
ct_1
,
ct
,
ht
,
wp
,
checked
;
x
.
Resize
({
4
*
d
});
ct_1
.
Resize
({
d
});
ct
.
Resize
({
d
});
ht
.
Resize
({
d
});
wp
.
Resize
({
3
*
d
});
checked
.
Resize
({
2
*
d
});
auto
place
=
PlaceType
();
RandomVec
<
T
>
(
x
.
numel
(),
x
.
mutable_data
<
T
>
(
place
),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
wp
.
numel
(),
wp
.
mutable_data
<
T
>
(
place
),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
ct_1
.
numel
(),
ct_1
.
mutable_data
<
T
>
(
place
),
-
2.
f
,
2.
f
);
const
T
*
ct_1_data
=
ct_1
.
data
<
T
>
();
const
T
*
wp_data
=
wp
.
data
<
T
>
();
T
*
x_data
=
x
.
mutable_data
<
T
>
(
place
);
T
*
checked_data
=
checked
.
mutable_data
<
T
>
(
place
);
T
*
ct_data
=
ct
.
mutable_data
<
T
>
(
place
);
T
*
ht_data
=
ht
.
mutable_data
<
T
>
(
place
);
jit
::
lstm_t
step
;
step
.
gates
=
x_data
;
step
.
ct_1
=
ct_1_data
;
...
...
@@ -220,12 +244,16 @@ template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void
BenchGRUKernel
()
{
for
(
int
d
:
TestSizes
())
{
const
jit
::
gru_attr_t
attr
(
d
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
std
::
vector
<
T
>
x
(
3
*
d
),
ht_1
(
d
),
ht
(
d
);
RandomVec
<
T
>
(
3
*
d
,
x
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
ht_1
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
ht_1_data
=
ht_1
.
data
();
T
*
x_data
=
x
.
data
();
T
*
ht_data
=
ht
.
data
();
auto
place
=
PlaceType
();
Tensor
x
,
ht_1
,
ht
;
x
.
Resize
({
3
*
d
});
ht_1
.
Resize
({
d
});
ht
.
Resize
({
d
});
RandomVec
<
T
>
(
3
*
d
,
x
.
mutable_data
<
T
>
(
place
),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
d
,
ht_1
.
mutable_data
<
T
>
(
place
),
-
2.
f
,
2.
f
);
const
T
*
ht_1_data
=
ht_1
.
data
<
T
>
();
T
*
x_data
=
x
.
mutable_data
<
T
>
(
place
);
T
*
ht_data
=
ht
.
mutable_data
<
T
>
(
place
);
jit
::
gru_t
step
;
step
.
gates
=
x_data
;
step
.
ht_1
=
ht_1_data
;
...
...
@@ -243,10 +271,12 @@ void BenchSeqPoolKernel() {
jit
::
seq_pool_attr_t
attr
(
w
,
type
);
for
(
int
h
:
TestSizes
())
{
attr
.
h
=
h
;
std
::
vector
<
T
>
x
(
h
*
w
),
y
(
w
);
RandomVec
<
T
>
(
h
*
w
,
x
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
x_data
=
x
.
data
();
T
*
y_data
=
y
.
data
();
Tensor
x
,
y
;
x
.
Resize
({
h
*
w
});
y
.
Resize
({
w
});
RandomVec
<
T
>
(
h
*
w
,
x
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
x_data
=
x
.
data
<
T
>
();
T
*
y_data
=
y
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
SeqPoolTuples
<
T
>
,
PlaceType
>
(
attr
,
x_data
,
y_data
,
&
attr
);
}
...
...
@@ -259,12 +289,15 @@ void BenchMatMulKernel() {
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
for
(
int
n
:
TestSizes
())
{
for
(
int
k
:
TestSizes
())
{
std
::
vector
<
T
>
a
(
m
*
k
),
b
(
k
*
n
),
c
(
m
*
n
);
RandomVec
<
T
>
(
m
*
k
,
a
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
k
*
n
,
b
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
a_data
=
a
.
data
();
const
T
*
b_data
=
b
.
data
();
T
*
c_data
=
c
.
data
();
Tensor
a
,
b
,
c
;
a
.
Resize
({
m
*
k
});
b
.
Resize
({
k
*
n
});
c
.
Resize
({
m
*
n
});
RandomVec
<
T
>
(
m
*
k
,
a
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
k
*
n
,
b
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
a_data
=
a
.
data
<
T
>
();
const
T
*
b_data
=
b
.
data
<
T
>
();
T
*
c_data
=
c
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
MatMulTuples
<
T
>
,
PlaceType
>
(
k
,
a_data
,
b_data
,
c_data
,
m
,
n
,
k
);
}
...
...
paddle/fluid/operators/reader/create_ctr_reader_op.cc
浏览文件 @
a59b7ac7
...
...
@@ -41,13 +41,19 @@ class CreateCTRReaderOp : public framework::OperatorBase {
auto
*
queue_holder
=
queue_holder_var
->
template
GetMutable
<
LoDTensorBlockingQueueHolder
>();
int
thread_num
=
Attr
<
int
>
(
"thread_num"
);
std
::
vector
<
std
::
string
>
slots
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"slots"
);
int
batch_size
=
Attr
<
int
>
(
"batch_size"
);
std
::
vector
<
std
::
string
>
file_list
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"file_list"
);
out
->
Reset
(
std
::
make_shared
<
CTRReader
>
(
queue_holder
->
GetQueue
(),
batch_size
,
thread_num
,
slots
,
file_list
));
auto
thread_num
=
Attr
<
int
>
(
"thread_num"
);
auto
sparse_slots
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"sparse_slots"
);
auto
dense_slot_index
=
Attr
<
std
::
vector
<
int
>>
(
"dense_slot_index"
);
auto
sparse_slot_index
=
Attr
<
std
::
vector
<
int
>>
(
"sparse_slot_index"
);
auto
batch_size
=
Attr
<
int
>
(
"batch_size"
);
auto
file_type
=
Attr
<
std
::
string
>
(
"file_type"
);
auto
file_format
=
Attr
<
std
::
string
>
(
"file_format"
);
auto
file_list
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"file_list"
);
DataDesc
data_desc
(
batch_size
,
file_list
,
file_type
,
file_format
,
dense_slot_index
,
sparse_slot_index
,
sparse_slots
);
VLOG
(
1
)
<<
data_desc
;
out
->
Reset
(
std
::
make_shared
<
CTRReader
>
(
queue_holder
->
GetQueue
(),
thread_num
,
data_desc
));
}
};
...
...
@@ -58,10 +64,22 @@ class CreateCTRReaderOpMaker : public FileReaderMakerBase {
"Name of the `LoDTensorBlockingQueueHolder` variable"
);
AddAttr
<
int
>
(
"thread_num"
,
"the thread num to read data"
);
AddAttr
<
int
>
(
"batch_size"
,
"the batch size of read data"
);
AddAttr
<
std
::
string
>
(
"file_type"
,
"plain or gzip"
).
SetDefault
(
"plain"
);
AddAttr
<
std
::
string
>
(
"file_format"
,
"svm or csv"
).
SetDefault
(
"csv"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
"file_list"
,
"The list of files that need to read"
);
AddAttr
<
std
::
vector
<
std
::
string
>>
(
"slots"
,
"the slots that should be extract from file"
);
AddAttr
<
std
::
vector
<
int
>>
(
"dense_slot_index"
,
"the dense slots id that should be extract from file"
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
int
>>
(
"sparse_slot_index"
,
"the sparse slots id that should be extract from file"
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
std
::
string
>>
(
"sparse_slots"
,
"the sparse slots id that should be "
"extract from file, used when file "
"format is svm"
);
AddComment
(
R"DOC(
Create CTRReader to support read ctr data with cpp.
...
...
paddle/fluid/operators/reader/ctr_reader.cc
浏览文件 @
a59b7ac7
...
...
@@ -73,6 +73,9 @@ static inline void parse_line(
}
}
// label slot1:fea_sign slot2:fea_sign slot1:fea_sign
static
inline
void
parse_svm_line
(
const
std
::
string
&
line
)
{}
class
Reader
{
public:
virtual
~
Reader
()
{}
...
...
@@ -95,11 +98,27 @@ class GzipReader : public Reader {
igzstream
gzstream_
;
};
class
MultiGzip
Reader
:
public
Reader
{
class
PlainFile
Reader
:
public
Reader
{
public:
explicit
MultiGzipReader
(
const
std
::
vector
<
std
::
string
>&
file_list
)
{
explicit
PlainFileReader
(
const
std
::
string
&
file_name
)
:
stream_
(
file_name
.
c_str
())
{}
~
PlainFileReader
()
{}
bool
HasNext
()
override
{
return
stream_
.
peek
()
!=
EOF
;
}
void
NextLine
(
std
::
string
*
line
)
override
{
std
::
getline
(
stream_
,
*
line
);
}
private:
std
::
ifstream
stream_
;
};
template
<
typename
SingleFileReader
>
class
MultiFileReader
:
public
Reader
{
public:
explicit
MultiFileReader
(
const
std
::
vector
<
std
::
string
>&
file_list
)
{
for
(
auto
&
file
:
file_list
)
{
readers_
.
emplace_back
(
std
::
make_shared
<
Gzip
Reader
>
(
file
));
readers_
.
emplace_back
(
std
::
make_shared
<
SingleFile
Reader
>
(
file
));
}
}
...
...
@@ -119,46 +138,35 @@ class MultiGzipReader : public Reader {
}
private:
std
::
vector
<
std
::
shared_ptr
<
Gzip
Reader
>>
readers_
;
std
::
vector
<
std
::
shared_ptr
<
SingleFile
Reader
>>
readers_
;
size_t
current_reader_index_
=
0
;
};
void
MonitorThread
(
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
VLOG
(
3
0
)
<<
"monitor thread in"
;
VLOG
(
3
)
<<
"monitor thread in"
;
bool
reader_thread_is_running
=
true
;
while
(
reader_thread_is_running
)
{
VLOG
(
3
0
)
<<
"reader_thread_is_running"
;
VLOG
(
3
)
<<
"reader_thread_is_running"
;
reader_thread_is_running
=
false
;
for
(
size_t
i
=
0
;
i
<
(
*
thread_status
).
size
();
++
i
)
{
if
((
*
thread_status
)[
i
]
==
Running
)
{
VLOG
(
3
0
)
<<
"reader is running!"
;
VLOG
(
3
)
<<
"reader is running!"
;
reader_thread_is_running
=
true
;
}
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
1000
));
}
VLOG
(
3
0
)
<<
"all reader thread is stopped, push empty data into
queue"
;
queue
->
Push
({}
);
VLOG
(
3
0
)
<<
"monitor thread exited"
;
VLOG
(
3
)
<<
"all reader thread is stopped, close the
queue"
;
queue
->
Close
(
);
VLOG
(
3
)
<<
"monitor thread exited"
;
}
void
ReadThread
(
const
std
::
vector
<
std
::
string
>&
file_list
,
const
std
::
vector
<
std
::
string
>&
slots
,
int
batch_size
,
int
thread_id
,
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
VLOG
(
30
)
<<
"["
<<
thread_id
<<
"]"
<<
" reader thread start! thread_id = "
<<
thread_id
;
for
(
auto
&
file
:
file_list
)
{
VLOG
(
30
)
<<
"["
<<
thread_id
<<
"]"
<<
" file "
<<
file
;
}
(
*
thread_status
)[
thread_id
]
=
Running
;
VLOG
(
30
)
<<
"set status to running"
;
void
ReadSvmData
(
const
DataDesc
&
data_desc
,
std
::
shared_ptr
<
Reader
>
reader
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
std
::
unordered_map
<
std
::
string
,
size_t
>
slot_to_index
;
for
(
size_t
i
=
0
;
i
<
slots
.
size
();
++
i
)
{
slot_to_index
[
slots
[
i
]]
=
i
;
for
(
size_t
i
=
0
;
i
<
data_desc
.
sparse_slot_ids_
.
size
();
++
i
)
{
slot_to_index
[
data_desc
.
sparse_slot_ids_
[
i
]]
=
i
;
}
std
::
string
line
;
...
...
@@ -166,21 +174,17 @@ void ReadThread(const std::vector<std::string>& file_list,
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>>
batch_data
;
std
::
vector
<
int64_t
>
batch_label
;
MultiGzipReader
reader
(
file_list
);
VLOG
(
30
)
<<
"reader inited"
;
while
(
reader
.
HasNext
())
{
while
(
reader
->
HasNext
())
{
batch_data
.
clear
();
batch_data
.
reserve
(
batch_size
);
batch_data
.
reserve
(
data_desc
.
batch_size_
);
batch_label
.
clear
();
batch_label
.
reserve
(
batch_size
);
batch_label
.
reserve
(
data_desc
.
batch_size_
);
// read batch_size data
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
if
(
reader
.
HasNext
())
{
reader
.
NextLine
(
&
line
);
for
(
int
i
=
0
;
i
<
data_desc
.
batch_size_
;
++
i
)
{
if
(
reader
->
HasNext
())
{
reader
->
NextLine
(
&
line
);
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>
slot_to_data
;
int64_t
label
;
parse_line
(
line
,
slot_to_index
,
&
label
,
&
slot_to_data
);
...
...
@@ -193,8 +197,8 @@ void ReadThread(const std::vector<std::string>& file_list,
std
::
vector
<
framework
::
LoDTensor
>
lod_datas
;
// first insert tensor for each slots
for
(
auto
&
slot
:
slots
)
{
// first insert tensor for each s
parse_s
lots
for
(
auto
&
slot
:
data_desc
.
sparse_slot_ids_
)
{
std
::
vector
<
size_t
>
lod_data
{
0
};
std
::
vector
<
int64_t
>
batch_feasign
;
...
...
@@ -226,11 +230,167 @@ void ReadThread(const std::vector<std::string>& file_list,
lod_datas
.
push_back
(
label_tensor
);
queue
->
Push
(
lod_datas
);
VLOG
(
40
)
<<
"push one data, queue_size="
<<
queue
->
Size
();
VLOG
(
4
)
<<
"push one data, queue_size="
<<
queue
->
Size
();
}
}
// label dense_fea,dense_fea sparse_fea,sparse_fea
static
inline
void
parse_csv_line
(
const
std
::
string
&
line
,
const
DataDesc
&
data_desc
,
int64_t
*
label
,
std
::
vector
<
std
::
vector
<
float
>>*
dense_datas
,
std
::
vector
<
std
::
vector
<
int64_t
>>*
sparse_datas
)
{
std
::
vector
<
std
::
string
>
ret
;
string_split
(
line
,
' '
,
&
ret
);
*
label
=
std
::
stol
(
ret
[
0
]);
dense_datas
->
resize
(
data_desc
.
dense_slot_index_
.
size
());
for
(
size_t
i
=
0
;
i
<
data_desc
.
dense_slot_index_
.
size
();
++
i
)
{
int
slot_idx
=
data_desc
.
dense_slot_index_
[
i
];
auto
&
slot_data
=
ret
[
slot_idx
];
std
::
vector
<
std
::
string
>
data_in_slot_str
;
string_split
(
slot_data
,
','
,
&
data_in_slot_str
);
std
::
vector
<
float
>
data_in_slot
;
for
(
auto
&
data_str
:
data_in_slot_str
)
{
(
*
dense_datas
)[
i
].
push_back
(
std
::
stof
(
data_str
));
}
}
sparse_datas
->
resize
(
data_desc
.
sparse_slot_index_
.
size
());
for
(
size_t
i
=
0
;
i
<
data_desc
.
sparse_slot_index_
.
size
();
++
i
)
{
int
slot_idx
=
data_desc
.
sparse_slot_index_
[
i
];
auto
&
slot_data
=
ret
[
slot_idx
];
std
::
vector
<
std
::
string
>
data_in_slot_str
;
string_split
(
slot_data
,
','
,
&
data_in_slot_str
);
std
::
vector
<
int64_t
>
data_in_slot
;
for
(
auto
&
data_str
:
data_in_slot_str
)
{
auto
id
=
std
::
stol
(
data_str
);
(
*
sparse_datas
)[
i
].
push_back
(
id
);
}
}
}
void
ReadCsvData
(
const
DataDesc
&
data_desc
,
std
::
shared_ptr
<
Reader
>
reader
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
std
::
string
line
;
while
(
reader
->
HasNext
())
{
std
::
vector
<
int64_t
>
batch_label
;
batch_label
.
reserve
(
data_desc
.
batch_size_
);
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>>>
batch_dense_data
;
batch_dense_data
.
reserve
(
data_desc
.
batch_size_
);
std
::
vector
<
std
::
vector
<
std
::
vector
<
int64_t
>>>
batch_sparse_data
;
batch_sparse_data
.
reserve
(
data_desc
.
batch_size_
);
// read batch_size data
for
(
int
i
=
0
;
i
<
data_desc
.
batch_size_
;
++
i
)
{
if
(
reader
->
HasNext
())
{
reader
->
NextLine
(
&
line
);
int64_t
label
;
std
::
vector
<
std
::
vector
<
float
>>
dense_datas
;
std
::
vector
<
std
::
vector
<
int64_t
>>
sparse_datas
;
parse_csv_line
(
line
,
data_desc
,
&
label
,
&
dense_datas
,
&
sparse_datas
);
batch_label
.
push_back
(
label
);
if
(
!
batch_dense_data
.
empty
())
{
PADDLE_ENFORCE_EQ
(
batch_dense_data
[
0
].
size
(),
dense_datas
.
size
(),
"dense data should have the same shape"
);
}
batch_dense_data
.
push_back
(
dense_datas
);
batch_sparse_data
.
push_back
(
sparse_datas
);
}
else
{
break
;
}
}
// the order of output data is label, dense_datas, sparse_datas
std
::
vector
<
framework
::
LoDTensor
>
lod_datas
;
// insert label tensor
framework
::
LoDTensor
label_tensor
;
auto
*
label_tensor_data
=
label_tensor
.
mutable_data
<
int64_t
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
batch_label
.
size
()),
1
}),
platform
::
CPUPlace
());
memcpy
(
label_tensor_data
,
batch_label
.
data
(),
batch_label
.
size
()
*
sizeof
(
int64_t
));
lod_datas
.
push_back
(
label_tensor
);
// insert tensor for each dense_slots
for
(
size_t
i
=
0
;
i
<
data_desc
.
dense_slot_index_
.
size
();
++
i
)
{
framework
::
LoDTensor
lod_tensor
;
size_t
width
=
batch_dense_data
[
0
][
i
].
size
();
auto
*
tensor_data
=
lod_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
(
{
static_cast
<
int64_t
>
(
batch_dense_data
.
size
()),
// batch_size
static_cast
<
int64_t
>
(
width
)}),
platform
::
CPUPlace
());
for
(
size_t
j
=
0
;
j
<
batch_dense_data
.
size
();
++
j
)
{
auto
&
dense_data_row
=
batch_dense_data
[
j
][
i
];
memcpy
(
tensor_data
+
j
*
width
,
dense_data_row
.
data
(),
width
*
sizeof
(
float
));
}
lod_datas
.
push_back
(
lod_tensor
);
}
// insert tensor for each sparse_slots
for
(
size_t
i
=
0
;
i
<
data_desc
.
sparse_slot_index_
.
size
();
++
i
)
{
std
::
vector
<
size_t
>
lod_data
{
0
};
std
::
vector
<
int64_t
>
batch_feasign
;
for
(
size_t
row_idx
=
0
;
row_idx
<
batch_sparse_data
.
size
();
++
row_idx
)
{
auto
&
sparse_ids
=
batch_sparse_data
[
row_idx
][
i
];
lod_data
.
push_back
(
lod_data
.
back
()
+
sparse_ids
.
size
());
batch_feasign
.
insert
(
batch_feasign
.
end
(),
sparse_ids
.
begin
(),
sparse_ids
.
end
());
}
framework
::
LoDTensor
lod_tensor
;
framework
::
LoD
lod
{
lod_data
};
lod_tensor
.
set_lod
(
lod
);
int64_t
*
tensor_data
=
lod_tensor
.
mutable_data
<
int64_t
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
batch_feasign
.
size
()),
1
}),
platform
::
CPUPlace
());
memcpy
(
tensor_data
,
batch_feasign
.
data
(),
batch_feasign
.
size
()
*
sizeof
(
int64_t
));
lod_datas
.
push_back
(
lod_tensor
);
}
queue
->
Push
(
lod_datas
);
VLOG
(
4
)
<<
"push one data, queue_size="
<<
queue
->
Size
();
}
}
void
ReadThread
(
const
std
::
vector
<
std
::
string
>&
file_list
,
const
DataDesc
&
data_desc
,
int
thread_id
,
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
)
{
VLOG
(
3
)
<<
"["
<<
thread_id
<<
"]"
<<
" reader thread start! thread_id = "
<<
thread_id
;
for
(
auto
&
file
:
file_list
)
{
VLOG
(
3
)
<<
"["
<<
thread_id
<<
"]"
<<
" file "
<<
file
;
}
(
*
thread_status
)[
thread_id
]
=
Running
;
VLOG
(
3
)
<<
"set status to running"
;
std
::
shared_ptr
<
Reader
>
reader
;
if
(
data_desc
.
file_type_
==
"gzip"
)
{
reader
.
reset
(
new
MultiFileReader
<
GzipReader
>
(
file_list
));
}
else
if
(
data_desc
.
file_type_
==
"plain"
)
{
reader
.
reset
(
new
MultiFileReader
<
PlainFileReader
>
(
file_list
));
}
else
{
PADDLE_THROW
(
"do not support file format %s"
,
data_desc
.
file_type_
);
}
VLOG
(
3
)
<<
"reader inited"
;
if
(
data_desc
.
file_format_
==
"svm"
)
{
ReadSvmData
(
data_desc
,
reader
,
queue
);
}
else
if
(
data_desc
.
file_format_
==
"csv"
)
{
ReadCsvData
(
data_desc
,
reader
,
queue
);
}
(
*
thread_status
)[
thread_id
]
=
Stopped
;
VLOG
(
3
0
)
<<
"set status to stopped, thread "
<<
thread_id
<<
" exited"
;
VLOG
(
3
)
<<
"set status to stopped, thread "
<<
thread_id
<<
" exited"
;
}
}
// namespace reader
...
...
paddle/fluid/operators/reader/ctr_reader.h
浏览文件 @
a59b7ac7
...
...
@@ -36,9 +36,63 @@ namespace reader {
enum
ReaderThreadStatus
{
Running
,
Stopped
};
struct
DataDesc
{
DataDesc
(
int
batch_size
,
const
std
::
vector
<
std
::
string
>&
file_names
,
const
std
::
string
&
file_type
,
const
std
::
string
&
file_format
,
const
std
::
vector
<
int
>&
dense_slot_index
,
const
std
::
vector
<
int
>&
sparse_slot_index
,
const
std
::
vector
<
std
::
string
>&
sparse_slot_ids
)
:
batch_size_
(
batch_size
),
file_names_
(
file_names
),
file_type_
(
file_type
),
file_format_
(
file_format
),
dense_slot_index_
(
dense_slot_index
),
sparse_slot_index_
(
sparse_slot_index
),
sparse_slot_ids_
(
sparse_slot_ids
)
{}
const
int
batch_size_
;
const
std
::
vector
<
std
::
string
>
file_names_
;
const
std
::
string
file_type_
;
// gzip or plain
const
std
::
string
file_format_
;
// csv or svm
// used for csv data format
const
std
::
vector
<
int
>
dense_slot_index_
;
const
std
::
vector
<
int
>
sparse_slot_index_
;
// used for svm data format
const
std
::
vector
<
std
::
string
>
sparse_slot_ids_
;
};
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
DataDesc
&
data_desc
)
{
os
<<
"data_desc:
\n
"
;
os
<<
"
\t
batch_size -> "
<<
data_desc
.
batch_size_
<<
"
\n
"
;
os
<<
"
\t
file_type -> "
<<
data_desc
.
file_type_
<<
"
\n
"
;
os
<<
"
\t
file_format -> "
<<
data_desc
.
file_format_
<<
"
\n
"
;
os
<<
"
\t
file_names -> {"
;
for
(
auto
&
file_name
:
data_desc
.
file_names_
)
{
os
<<
file_name
<<
","
;
}
os
<<
"}
\n
"
;
os
<<
"
\t
dense_slot_index -> {"
;
for
(
auto
&
slot
:
data_desc
.
dense_slot_index_
)
{
os
<<
slot
<<
","
;
}
os
<<
"}
\n
"
;
os
<<
"
\t
sparse_slot_index_ -> {"
;
for
(
auto
&
slot
:
data_desc
.
sparse_slot_index_
)
{
os
<<
slot
<<
","
;
}
os
<<
"}
\n
"
;
os
<<
"
\t
sparse_slot_ids_ -> {"
;
for
(
auto
&
slot
:
data_desc
.
sparse_slot_ids_
)
{
os
<<
slot
<<
","
;
}
os
<<
"}
\n
"
;
return
os
;
}
void
ReadThread
(
const
std
::
vector
<
std
::
string
>&
file_list
,
const
std
::
vector
<
std
::
string
>&
slots
,
int
batch_size
,
int
thread_id
,
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
const
DataDesc
&
data_desc
,
int
thread_id
,
std
::
vector
<
ReaderThreadStatus
>*
thread_status
,
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
);
// monitor all running thread, if they are all stopped,
...
...
@@ -48,15 +102,15 @@ void MonitorThread(std::vector<ReaderThreadStatus>* thread_status,
class
CTRReader
:
public
framework
::
FileReader
{
public:
explicit
CTRReader
(
const
std
::
shared_ptr
<
LoDTensorBlockingQueue
>&
queue
,
int
batch_size
,
size_t
thread_num
,
const
std
::
vector
<
std
::
string
>&
slots
,
const
std
::
vector
<
std
::
string
>&
file_list
)
:
batch_size_
(
batch_size
),
slots_
(
slots
),
file_list_
(
file_list
)
{
CTRReader
(
const
std
::
shared_ptr
<
LoDTensorBlockingQueue
>&
queue
,
int
thread_num
,
const
DataDesc
&
data_desc
)
:
data_desc_
(
data_desc
)
{
PADDLE_ENFORCE_GT
(
thread_num
,
0
,
"thread num should be larger then 0!"
);
PADDLE_ENFORCE
(
queue
!=
nullptr
,
"LoDTensorBlockingQueue must not be null"
);
PADDLE_ENFORCE_GT
(
file_list
.
size
(),
0
,
"file list should not be empty"
);
thread_num_
=
std
::
min
<
size_t
>
(
file_list_
.
size
(),
thread_num
);
PADDLE_ENFORCE_GT
(
data_desc_
.
file_names_
.
size
(),
0
,
"file list should not be empty"
);
thread_num_
=
std
::
min
<
size_t
>
(
data_desc_
.
file_names_
.
size
(),
thread_num
);
queue_
=
queue
;
SplitFiles
();
for
(
size_t
i
=
0
;
i
<
thread_num_
;
++
i
)
{
...
...
@@ -64,7 +118,7 @@ class CTRReader : public framework::FileReader {
}
}
~
CTRReader
()
{}
~
CTRReader
()
{
Shutdown
();
}
void
ReadNext
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
{
bool
success
;
...
...
@@ -81,7 +135,10 @@ class CTRReader : public framework::FileReader {
for
(
auto
&
read_thread
:
read_threads_
)
{
read_thread
->
join
();
}
monitor_thread_
->
join
();
if
(
monitor_thread_
)
{
monitor_thread_
->
join
();
}
read_threads_
.
clear
();
monitor_thread_
.
reset
(
nullptr
);
...
...
@@ -95,9 +152,9 @@ class CTRReader : public framework::FileReader {
queue_
->
ReOpen
();
VLOG
(
3
)
<<
"reopen success"
;
VLOG
(
3
)
<<
"thread_num "
<<
thread_num_
;
for
(
size_
t
thread_id
=
0
;
thread_id
<
thread_num_
;
thread_id
++
)
{
for
(
in
t
thread_id
=
0
;
thread_id
<
thread_num_
;
thread_id
++
)
{
read_threads_
.
emplace_back
(
new
std
::
thread
(
std
::
bind
(
&
ReadThread
,
file_groups_
[
thread_id
],
slots_
,
batch_size
_
,
&
ReadThread
,
file_groups_
[
thread_id
],
data_desc
_
,
static_cast
<
int
>
(
thread_id
),
&
read_thread_status_
,
queue_
)));
}
monitor_thread_
.
reset
(
new
std
::
thread
(
...
...
@@ -108,8 +165,8 @@ class CTRReader : public framework::FileReader {
private:
void
SplitFiles
()
{
file_groups_
.
resize
(
thread_num_
);
for
(
size_t
i
=
0
;
i
<
file_list
_
.
size
();
++
i
)
{
auto
&
file_name
=
file_list
_
[
i
];
for
(
size_t
i
=
0
;
i
<
data_desc_
.
file_names
_
.
size
();
++
i
)
{
auto
&
file_name
=
data_desc_
.
file_names
_
[
i
];
std
::
ifstream
f
(
file_name
.
c_str
());
PADDLE_ENFORCE
(
f
.
good
(),
"file %s not exist!"
,
file_name
);
file_groups_
[
i
%
thread_num_
].
push_back
(
file_name
);
...
...
@@ -118,9 +175,7 @@ class CTRReader : public framework::FileReader {
private:
size_t
thread_num_
;
const
int
batch_size_
;
const
std
::
vector
<
std
::
string
>
slots_
;
const
std
::
vector
<
std
::
string
>
file_list_
;
const
DataDesc
data_desc_
;
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
thread
>>
read_threads_
;
std
::
unique_ptr
<
std
::
thread
>
monitor_thread_
;
...
...
paddle/fluid/operators/reader/ctr_reader_test.cc
浏览文件 @
a59b7ac7
...
...
@@ -36,6 +36,7 @@ using paddle::framework::LoD;
using
paddle
::
framework
::
DDim
;
using
paddle
::
platform
::
CPUPlace
;
using
paddle
::
framework
::
make_ddim
;
using
paddle
::
operators
::
reader
::
DataDesc
;
static
void
generatedata
(
const
std
::
vector
<
std
::
string
>&
data
,
const
std
::
string
&
file_name
)
{
...
...
@@ -126,30 +127,103 @@ TEST(CTR_READER, read_data) {
LoDTensorBlockingQueueHolder
queue_holder
;
int
capacity
=
64
;
queue_holder
.
InitOnce
(
capacity
,
{},
false
);
queue_holder
.
InitOnce
(
capacity
,
false
);
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
=
queue_holder
.
GetQueue
();
int
batch_size
=
3
;
int
thread_num
=
1
;
std
::
vector
<
std
::
string
>
slots
=
{
"6002"
,
"6003"
};
std
::
vector
<
std
::
string
>
s
parse_s
lots
=
{
"6002"
,
"6003"
};
std
::
vector
<
std
::
string
>
file_list
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
file_list
.
push_back
(
gz_file_name
);
}
CTRReader
reader
(
queue
,
batch_size
,
thread_num
,
slots
,
file_list
);
DataDesc
data_desc
(
batch_size
,
file_list
,
"gzip"
,
"svm"
,
{},
{},
sparse_slots
);
CTRReader
reader
(
queue
,
thread_num
,
data_desc
);
reader
.
Start
();
size_t
batch_num
=
std
::
ceil
(
static_cast
<
float
>
(
ctr_data
.
size
())
/
batch_size
)
*
thread_num
;
check_all_data
(
ctr_data
,
slots
,
label_dims
,
label_value
,
data_slot_6002
,
data_slot_6003
,
batch_num
,
batch_size
,
queue
,
&
reader
);
check_all_data
(
ctr_data
,
sparse_slots
,
label_dims
,
label_value
,
data_slot_6002
,
data_slot_6003
,
batch_num
,
batch_size
,
queue
,
&
reader
);
reader
.
Shutdown
();
reader
.
Start
();
check_all_data
(
ctr_data
,
slots
,
label_dims
,
label_value
,
data_slot_6002
,
data_slot_6003
,
batch_num
,
batch_size
,
queue
,
&
reader
);
check_all_data
(
ctr_data
,
sparse_slots
,
label_dims
,
label_value
,
data_slot_6002
,
data_slot_6003
,
batch_num
,
batch_size
,
queue
,
&
reader
);
reader
.
Shutdown
();
}
static
void
GenereteCsvData
(
const
std
::
string
&
file_name
,
const
std
::
vector
<
std
::
string
>&
data
)
{
std
::
ofstream
out
(
file_name
.
c_str
());
PADDLE_ENFORCE
(
out
.
good
(),
"open file %s failed!"
,
file_name
);
for
(
auto
&
c
:
data
)
{
out
<<
c
;
}
out
.
close
();
PADDLE_ENFORCE
(
out
.
good
(),
"save file %s failed!"
,
file_name
);
}
static
void
CheckReadCsvOut
(
const
std
::
vector
<
LoDTensor
>&
out
)
{
ASSERT_EQ
(
out
.
size
(),
3
);
ASSERT_EQ
(
out
[
0
].
dims
()[
1
],
1
);
ASSERT_EQ
(
out
[
1
].
dims
()[
1
],
2
);
ASSERT_EQ
(
out
[
2
].
dims
()[
1
],
1
);
for
(
size_t
i
=
0
;
i
<
out
[
0
].
numel
();
++
i
)
{
int64_t
label
=
out
[
0
].
data
<
int64_t
>
()[
i
];
auto
&
dense_dim
=
out
[
1
].
dims
();
for
(
size_t
j
=
0
;
j
<
dense_dim
[
1
];
++
j
)
{
ASSERT_EQ
(
out
[
1
].
data
<
float
>
()[
i
*
dense_dim
[
1
]
+
j
],
static_cast
<
float
>
(
label
+
0.1
));
}
auto
&
sparse_lod
=
out
[
2
].
lod
();
for
(
size_t
j
=
sparse_lod
[
0
][
i
];
j
<
sparse_lod
[
0
][
i
+
1
];
++
j
)
{
ASSERT_EQ
(
out
[
2
].
data
<
int64_t
>
()[
j
],
label
);
}
}
}
TEST
(
CTR_READER
,
read_csv_data
)
{
std
::
string
file_name
=
"test_ctr_reader_data.csv"
;
const
std
::
vector
<
std
::
string
>
csv_data
=
{
"0 0.1,0.1 0,0,0,0
\n
"
,
"1 1.1,1.1 1,1,1,1
\n
"
,
"2 2.1,2.1 2,2,2,2
\n
"
,
"3 3.1,3.1 3,3,3,3
\n
"
,
};
GenereteCsvData
(
file_name
,
csv_data
);
LoDTensorBlockingQueueHolder
queue_holder
;
int
capacity
=
64
;
queue_holder
.
InitOnce
(
capacity
,
false
);
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
queue
=
queue_holder
.
GetQueue
();
int
batch_size
=
3
;
int
thread_num
=
1
;
std
::
vector
<
std
::
string
>
file_list
;
for
(
int
i
=
0
;
i
<
thread_num
;
++
i
)
{
file_list
.
push_back
(
file_name
);
}
DataDesc
data_desc
(
batch_size
,
file_list
,
"plain"
,
"csv"
,
{
1
},
{
2
},
{});
CTRReader
reader
(
queue
,
thread_num
,
data_desc
);
for
(
size_t
i
=
0
;
i
<
2
;
++
i
)
{
reader
.
Start
();
std
::
vector
<
LoDTensor
>
out
;
while
(
true
)
{
reader
.
ReadNext
(
&
out
);
if
(
out
.
empty
())
{
break
;
}
CheckReadCsvOut
(
out
);
}
reader
.
Shutdown
();
}
}
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
浏览文件 @
a59b7ac7
...
...
@@ -32,10 +32,8 @@ class LoDTensorBlockingQueue {
friend
class
LoDTensorBlockingQueueHolder
;
private:
LoDTensorBlockingQueue
(
size_t
capacity
,
const
std
::
vector
<
framework
::
DDim
>&
dims
,
bool
speed_test_mode
=
false
)
:
queue_
(
capacity
,
speed_test_mode
),
dims_
(
dims
)
{}
explicit
LoDTensorBlockingQueue
(
size_t
capacity
,
bool
speed_test_mode
=
false
)
:
queue_
(
capacity
,
speed_test_mode
)
{}
public:
bool
Push
(
const
std
::
vector
<
framework
::
LoDTensor
>&
lod_tensor_vec
)
{
...
...
@@ -65,17 +63,15 @@ class LoDTensorBlockingQueue {
private:
BlockingQueue
<
std
::
vector
<
framework
::
LoDTensor
>>
queue_
;
std
::
vector
<
framework
::
DDim
>
dims_
;
};
class
LoDTensorBlockingQueueHolder
{
public:
void
InitOnce
(
size_t
capacity
,
const
std
::
vector
<
framework
::
DDim
>&
dims
,
bool
speed_test_mode
=
false
)
{
void
InitOnce
(
size_t
capacity
,
bool
speed_test_mode
=
false
)
{
PADDLE_ENFORCE
(
queue_
==
nullptr
,
"LoDTensorBlockingQueueHolder::InitOnce() can only be called once"
);
queue_
.
reset
(
new
LoDTensorBlockingQueue
(
capacity
,
dims
,
speed_test_mode
));
queue_
.
reset
(
new
LoDTensorBlockingQueue
(
capacity
,
speed_test_mode
));
}
inline
const
std
::
shared_ptr
<
LoDTensorBlockingQueue
>&
GetQueue
()
const
{
...
...
paddle/fluid/operators/reader/read_op.cc
浏览文件 @
a59b7ac7
...
...
@@ -27,13 +27,13 @@ class ReadInferShape : public framework::InferShapeBase {
"The ReadOp must take a reader as input."
);
PADDLE_ENFORCE
(
ctx
->
HasOutputs
(
"Out"
),
"The ReadOp should be assigned with output."
);
std
::
vector
<
framework
::
DDim
>
reader_dims
=
ctx
->
GetReaderDims
(
"Reader"
);
std
::
vector
<
std
::
string
>
out_names
=
ctx
->
Outputs
(
"Out
"
);
PADDLE_ENFORCE_EQ
(
reader_dims
.
size
(),
out_names
.
size
(),
"The reader's dim number doesn't match the output number."
);
ctx
->
SetOutputsDim
(
"Out"
,
reader_dims
);
if
(
!
ctx
->
IsRuntime
())
{
if
(
!
ctx
->
IsRuntime
()
&&
ctx
->
Attrs
().
Get
<
bool
>
(
"infer_out"
))
{
std
::
vector
<
framework
::
DDim
>
reader_dims
=
ctx
->
GetReaderDims
(
"Reader
"
);
std
::
vector
<
std
::
string
>
out_names
=
ctx
->
Outputs
(
"Out"
);
PADDLE_ENFORCE_EQ
(
reader_dims
.
size
(),
out_names
.
size
(),
"The reader's dim number doesn't match the output number."
);
ctx
->
SetOutputsDim
(
"Out"
,
reader_dims
);
auto
in_desc
=
boost
::
get
<
framework
::
VarDesc
*>
(
ctx
->
GetInputVarPtrs
(
"Reader"
)[
0
]);
auto
in_lod_levels
=
in_desc
->
GetLoDLevels
();
...
...
@@ -53,15 +53,18 @@ class ReadInferVarType : public framework::VarTypeInference {
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
std
::
string
reader_name
=
op_desc
.
Input
(
"Reader"
)[
0
];
std
::
vector
<
std
::
string
>
out_names
=
op_desc
.
Output
(
"Out"
);
framework
::
VarDesc
*
reader
=
block
->
FindVarRecursive
(
reader_name
);
auto
dtypes
=
reader
->
GetDataTypes
();
PADDLE_ENFORCE_EQ
(
dtypes
.
size
(),
out_names
.
size
());
for
(
size_t
i
=
0
;
i
<
dtypes
.
size
();
++
i
)
{
framework
::
VarDesc
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_names
[
i
]);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
out
.
SetDataType
(
dtypes
[
i
]);
bool
infer_out
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"infer_out"
));
if
(
infer_out
)
{
std
::
string
reader_name
=
op_desc
.
Input
(
"Reader"
)[
0
];
std
::
vector
<
std
::
string
>
out_names
=
op_desc
.
Output
(
"Out"
);
framework
::
VarDesc
*
reader
=
block
->
FindVarRecursive
(
reader_name
);
auto
dtypes
=
reader
->
GetDataTypes
();
PADDLE_ENFORCE_EQ
(
dtypes
.
size
(),
out_names
.
size
());
for
(
size_t
i
=
0
;
i
<
dtypes
.
size
();
++
i
)
{
framework
::
VarDesc
&
out
=
block
->
FindRecursiveOrCreateVar
(
out_names
[
i
]);
out
.
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
out
.
SetDataType
(
dtypes
[
i
]);
}
}
}
};
...
...
@@ -73,6 +76,7 @@ class ReadOp : public framework::OperatorBase {
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
VLOG
(
3
)
<<
"read op in"
;
framework
::
ReaderHolder
*
reader
=
detail
::
Ref
(
scope
.
FindVar
(
Input
(
"Reader"
)),
"Cannot find reader variable %s"
,
Input
(
"Reader"
))
...
...
@@ -87,7 +91,9 @@ class ReadOp : public framework::OperatorBase {
reader
->
ReadNext
(
&
ins
);
if
(
ins
.
empty
())
{
VLOG
(
3
)
<<
"read empty data in"
;
if
(
Attr
<
bool
>
(
"throw_eof_exp"
))
{
VLOG
(
3
)
<<
"throw_eof_exp"
;
PADDLE_THROW_EOF
();
}
else
{
ins
.
resize
(
out_arg_names
.
size
());
...
...
@@ -96,6 +102,7 @@ class ReadOp : public framework::OperatorBase {
tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
0
}),
dev_place
);
}
}
VLOG
(
3
)
<<
"read empty data out"
;
}
PADDLE_ENFORCE_EQ
(
ins
.
size
(),
out_arg_names
.
size
());
for
(
size_t
i
=
0
;
i
<
out_arg_names
.
size
();
++
i
)
{
...
...
@@ -120,6 +127,7 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
" only when the data-balance is enabled in ParallelExecutor"
" and it is set by ParallelExecutor instance, not users."
)
.
SetDefault
(
true
);
AddAttr
<
bool
>
(
"infer_out"
,
""
).
SetDefault
(
true
);
AddComment
(
R"DOC(
Read Operator
...
...
paddle/fluid/operators/reader/reader_op_registry.cc
浏览文件 @
a59b7ac7
...
...
@@ -65,6 +65,10 @@ void FileReaderMakerBase::Make() {
"It means the reader will generate two data each time,"
"whose shapes are [2,3,4] and [5,6] respectively."
);
AddAttr
<
std
::
vector
<
int
>>
(
"lod_levels"
,
"The LoD levels of each data."
);
AddAttr
<
bool
>
(
"use_data_config"
,
"Use the config of all datas like shape_concat/ranks/lod_levels"
)
.
SetDefault
(
true
);
Apply
();
}
...
...
@@ -75,19 +79,23 @@ void FileReaderInferShape::operator()(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"The output file reader should not be null."
);
const
auto
shape_concat
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"shape_concat"
);
const
auto
ranks
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"ranks"
);
std
::
vector
<
framework
::
DDim
>
shapes
=
RestoreShapes
(
shape_concat
,
ranks
);
ctx
->
SetReaderDims
(
"Out"
,
shapes
);
const
auto
lod_levels
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"lod_levels"
);
PADDLE_ENFORCE_EQ
(
lod_levels
.
size
(),
shapes
.
size
(),
"The number of 'lod_levels'(%d) doesn't match the number "
"of 'shapes'(%d)."
,
lod_levels
.
size
(),
shapes
.
size
());
framework
::
VarDesc
*
reader
=
boost
::
get
<
framework
::
VarDesc
*>
(
ctx
->
GetOutputVarPtrs
(
"Out"
)[
0
]);
reader
->
SetLoDLevels
(
lod_levels
);
bool
use_data_config
=
ctx
->
Attrs
().
Get
<
bool
>
(
"use_data_config"
);
if
(
use_data_config
)
{
const
auto
shape_concat
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"shape_concat"
);
const
auto
ranks
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"ranks"
);
std
::
vector
<
framework
::
DDim
>
shapes
=
RestoreShapes
(
shape_concat
,
ranks
);
ctx
->
SetReaderDims
(
"Out"
,
shapes
);
const
auto
lod_levels
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"lod_levels"
);
PADDLE_ENFORCE_EQ
(
lod_levels
.
size
(),
shapes
.
size
(),
"The number of 'lod_levels'(%d) doesn't match the number "
"of 'shapes'(%d)."
,
lod_levels
.
size
(),
shapes
.
size
());
framework
::
VarDesc
*
reader
=
boost
::
get
<
framework
::
VarDesc
*>
(
ctx
->
GetOutputVarPtrs
(
"Out"
)[
0
]);
reader
->
SetLoDLevels
(
lod_levels
);
}
}
void
FileReaderInferVarType
::
operator
()(
const
framework
::
OpDesc
&
op_desc
,
...
...
paddle/fluid/operators/shuffle_channel_op.cc
0 → 100644
浏览文件 @
a59b7ac7
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/shuffle_channel_op.h"
namespace
paddle
{
namespace
operators
{
class
ShuffleChannelOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of ShuffleChannelOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ShuffleChannelOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
"The layout of input is NCHW."
);
ctx
->
SetOutputDim
(
"Out"
,
input_dims
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
}
};
class
ShuffleChannelOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor, default Tensor<float>), "
"the input feature data of ShuffleChannelOp, the layout is NCHW."
);
AddOutput
(
"Out"
,
"(Tensor, default Tensor<float>), the output of "
"ShuffleChannelOp. The layout is NCHW."
);
AddAttr
<
int
>
(
"group"
,
"the number of groups."
)
.
SetDefault
(
1
)
.
AddCustomChecker
([](
const
int
&
group
)
{
PADDLE_ENFORCE_GE
(
group
,
1
,
"group should be larger than 0."
);
});
AddComment
(
R"DOC(
Shuffle Channel operator
This opearator shuffles the channels of input x.
It divide the input channels in each group into several subgroups,
and obtain a new order by selecting element from every subgroup one by one.
Shuffle channel operation makes it possible to build more powerful structures
with multiple group convolutional layers.
please get more information from the following paper:
https://arxiv.org/pdf/1707.01083.pdf
)DOC"
);
}
};
class
ShuffleChannelGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@Grad) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
"Output(X@Grad) should not be null"
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
"The layout of input is NCHW."
);
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
input_dims
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
shuffle_channel
,
ops
::
ShuffleChannelOp
,
ops
::
ShuffleChannelOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
shuffle_channel_grad
,
ops
::
ShuffleChannelGradOp
);
REGISTER_OP_CPU_KERNEL
(
shuffle_channel
,
ops
::
ShuffleChannelOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
ShuffleChannelOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
shuffle_channel_grad
,
ops
::
ShuffleChannelGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
ShuffleChannelGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/shuffle_channel_op.cu
0 → 100644
浏览文件 @
a59b7ac7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/shuffle_channel_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
static
constexpr
int
kNumCUDAThreads
=
512
;
static
constexpr
int
kNumMaximumNumBlocks
=
4096
;
static
inline
int
NumBlocks
(
const
int
N
)
{
return
std
::
min
((
N
+
kNumCUDAThreads
-
1
)
/
kNumCUDAThreads
,
kNumMaximumNumBlocks
);
}
template
<
typename
T
>
__global__
void
ShuffleChannel
(
const
int
nthreads
,
const
int
feature_map_size
,
T
*
output
,
const
T
*
input
,
int
group_row
,
int
group_column
,
int
len
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
for
(
size_t
ii
=
index
;
ii
<
nthreads
;
ii
+=
offset
)
{
const
int
n
=
index
/
group_row
/
group_column
/
len
;
const
int
i
=
(
index
/
group_column
/
len
)
%
group_row
;
const
int
j
=
index
/
len
%
group_column
;
const
int
k
=
index
-
(
n
*
feature_map_size
+
(
i
*
group_column
+
j
)
*
len
);
T
*
p_o
=
output
+
n
*
feature_map_size
+
(
j
*
group_row
+
i
)
*
len
;
p_o
[
k
]
=
input
[
index
];
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
ShuffleChannelOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
int
group
=
ctx
.
Attr
<
int
>
(
"group"
);
auto
input_dims
=
input
->
dims
();
auto
num
=
input_dims
[
0
];
auto
channel
=
input_dims
[
1
];
auto
height
=
input_dims
[
2
];
auto
weight
=
input_dims
[
3
];
auto
feature_map_size
=
channel
*
height
*
weight
;
auto
sp_sz
=
height
*
weight
;
int
group_row
=
group
;
int
group_column
=
channel
/
group_row
;
// count is the product of NCHW same as numel()
int
count
=
num
*
group_column
*
group_row
*
sp_sz
;
int
blocks
=
NumBlocks
(
output
->
numel
());
int
threads
=
kNumCUDAThreads
;
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
ShuffleChannel
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
count
,
feature_map_size
,
output_data
,
input_data
,
group_row
,
group_column
,
sp_sz
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
ShuffleChannelGradOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
int
group
=
ctx
.
Attr
<
int
>
(
"group"
);
auto
input_dims
=
input
->
dims
();
auto
num
=
input_dims
[
0
];
auto
channel
=
input_dims
[
1
];
auto
height
=
input_dims
[
2
];
auto
weight
=
input_dims
[
3
];
auto
feature_map_size
=
channel
*
height
*
weight
;
auto
sp_sz
=
height
*
weight
;
int
group_row
=
group
;
int
group_column
=
channel
/
group_row
;
auto
*
output_grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
input_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
int
blocks
=
NumBlocks
(
output_grad
->
numel
());
int
threads
=
kNumCUDAThreads
;
int
count
=
num
*
group_column
*
group_row
*
sp_sz
;
ShuffleChannel
<
T
><<<
blocks
,
threads
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
count
,
feature_map_size
,
input_grad_data
,
output_grad_data
,
group_row
,
group_column
,
sp_sz
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
shuffle_channel
,
ops
::
ShuffleChannelOpCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ShuffleChannelOpCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
shuffle_channel_grad
,
ops
::
ShuffleChannelGradOpCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ShuffleChannelGradOpCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/shuffle_channel_op.h
0 → 100644
浏览文件 @
a59b7ac7
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
ShuffleChannelOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
int
group
=
ctx
.
Attr
<
int
>
(
"group"
);
auto
input_dims
=
input
->
dims
();
auto
num
=
input_dims
[
0
];
auto
channel
=
input_dims
[
1
];
auto
height
=
input_dims
[
2
];
auto
weight
=
input_dims
[
3
];
auto
feature_map_size
=
channel
*
height
*
weight
;
auto
sp_sz
=
height
*
weight
;
int
group_row
=
group
;
int
group_column
=
channel
/
group_row
;
const
T
*
input_data
=
input
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
int
n
=
0
;
n
<
num
;
++
n
)
{
for
(
int
i
=
0
;
i
<
group_row
;
++
i
)
{
for
(
int
j
=
0
;
j
<
group_column
;
++
j
)
{
const
T
*
p_i
=
input_data
+
n
*
feature_map_size
+
(
i
*
group_column
+
j
)
*
sp_sz
;
T
*
p_o
=
output_data
+
n
*
feature_map_size
+
(
j
*
group_row
+
i
)
*
sp_sz
;
memcpy
(
p_o
,
p_i
,
sizeof
(
int
)
*
sp_sz
);
}
}
}
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
ShuffleChannelGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
int
group
=
ctx
.
Attr
<
int
>
(
"group"
);
auto
input_dims
=
input
->
dims
();
auto
num
=
input_dims
[
0
];
auto
channel
=
input_dims
[
1
];
auto
height
=
input_dims
[
2
];
auto
weight
=
input_dims
[
3
];
auto
feature_map_size
=
channel
*
height
*
weight
;
auto
sp_sz
=
height
*
weight
;
int
group_row
=
group
;
int
group_column
=
channel
/
group_row
;
auto
*
output_grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
input_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
output_grad_data
=
output_grad
->
data
<
T
>
();
for
(
int
n
=
0
;
n
<
num
;
++
n
)
{
for
(
int
i
=
0
;
i
<
group_row
;
++
i
)
{
for
(
int
j
=
0
;
j
<
group_column
;
++
j
)
{
const
T
*
p_i
=
output_grad_data
+
n
*
feature_map_size
+
(
i
*
group_column
+
j
)
*
sp_sz
;
T
*
p_o
=
input_grad_data
+
n
*
feature_map_size
+
(
j
*
group_row
+
i
)
*
sp_sz
;
memcpy
(
p_o
,
p_i
,
sizeof
(
int
)
*
sp_sz
);
}
}
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/pybind/pybind.cc
浏览文件 @
a59b7ac7
...
...
@@ -485,6 +485,7 @@ All parameter, weight, gradient are variables in Paddle.
py
::
return_value_policy
::
reference
);
py
::
class_
<
framework
::
ReaderHolder
>
(
m
,
"Reader"
,
""
)
.
def
(
"start"
,
&
framework
::
ReaderHolder
::
Start
)
.
def
(
"reset"
,
&
framework
::
ReaderHolder
::
ResetAll
);
using
LoDTensorBlockingQueue
=
...
...
@@ -505,19 +506,12 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"is_closed"
,
&
LoDTensorBlockingQueue
::
IsClosed
);
m
.
def
(
"init_lod_tensor_blocking_queue"
,
[](
Variable
&
var
,
size_t
capacity
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
shapes
)
->
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
{
std
::
vector
<
DDim
>
dims
(
shapes
.
size
());
std
::
transform
(
shapes
.
begin
(),
shapes
.
end
(),
dims
.
begin
(),
[](
const
std
::
vector
<
int64_t
>
&
shape
)
{
return
make_ddim
(
shape
);
});
auto
*
holder
=
var
.
GetMutable
<
LoDTensorBlockingQueueHolder
>
();
holder
->
InitOnce
(
capacity
,
dims
,
FLAGS_reader_queue_speed_test_mode
);
return
holder
->
GetQueue
();
},
[](
Variable
&
var
,
size_t
capacity
)
->
std
::
shared_ptr
<
LoDTensorBlockingQueue
>
{
auto
*
holder
=
var
.
GetMutable
<
LoDTensorBlockingQueueHolder
>
();
holder
->
InitOnce
(
capacity
,
FLAGS_reader_queue_speed_test_mode
);
return
holder
->
GetQueue
();
},
py
::
return_value_policy
::
copy
);
py
::
class_
<
Scope
>
(
m
,
"_Scope"
,
R"DOC(
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
a59b7ac7
...
...
@@ -22,6 +22,8 @@ from . import op_frequence
from
.op_frequence
import
*
from
.
import
quantize
from
.quantize
import
*
from
.
import
reader
from
.reader
import
*
from
.
import
slim
from
.slim
import
*
from
.
import
utils
...
...
@@ -32,5 +34,6 @@ __all__ += decoder.__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
reader
.
__all__
__all__
+=
slim
.
__all__
__all__
+=
utils
.
__all__
python/paddle/fluid/contrib/reader/README.md
0 → 100644
浏览文件 @
a59b7ac7
## CTR READER
An multi-thread cpp reader that has the same interface with py_reader. It
uses cpp multi-thread to read file and is much more faster then the Python read
thread in py_reader.
Currently, it support two types of file:
-
gzip
-
plain text file
and two types of data format:
-
cvs data format is :
*
label dense_fea,dense_fea sparse_fea,sparse_fea
-
the svm data format is :
*
label slot1:fea_sign slot2:fea_sign slot1:fea_sign
python/paddle/fluid/contrib/reader/__init__.py
0 → 100644
浏览文件 @
a59b7ac7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
.
import
ctr_reader
__all__
=
ctr_reader
.
__all__
python/paddle/fluid/contrib/reader/ctr_reader.py
浏览文件 @
a59b7ac7
...
...
@@ -20,6 +20,8 @@ from paddle.fluid.framework import default_main_program, \
default_startup_program
,
Variable
from
paddle.fluid.unique_name
import
generate
as
unique_name
__all__
=
[
'ctr_reader'
]
def
monkey_patch_reader_methods
(
reader
):
def
__get_reader__
():
...
...
@@ -30,7 +32,11 @@ def monkey_patch_reader_methods(reader):
def
reset
():
return
__get_reader__
().
reset
()
def
start
():
return
__get_reader__
().
start
()
reader
.
reset
=
reset
reader
.
start
=
start
reader
.
stop_gradient
=
True
reader
.
persistable
=
True
return
reader
...
...
@@ -44,13 +50,18 @@ def _copy_reader_var_(block, var):
return
new_var
def
ctr_reader
(
feed_data
,
capacity
,
thread_num
,
batch_size
,
file_list
,
slots
,
name
=
None
):
def
ctr_reader
(
feed_dict
,
file_type
,
# gzip or plain
file_format
,
# csv or svm
dense_slot_index
,
sparse_slot_index
,
capacity
,
thread_num
,
batch_size
,
file_list
,
slots
,
name
=
None
):
"""
Create a CTR reader for data feeding in Python
...
...
@@ -67,12 +78,21 @@ def ctr_reader(feed_data,
Note that :code:`Program.clone()` method cannot clone :code:`py_reader`.
Args:
feed_dict(list(variable)): a list of data variable.
file_type('gzip'|'plain'): the type of the data file
file_format('csv'|'svm'): csv data or svm data format.
cvs data format is :
label dense_fea,dense_fea sparse_fea,sparse_fea
the svm data format is :
label slot1:fea_sign slot2:fea_sign slot1:fea_sign
dense_slot_index(list(int)): the index of dense slots
sparse_slot_index(list(int)): the index of sparse slots
capacity(int): The buffer capacity maintained by :code:`py_reader`.
thread_num(
list|tuple): List of tuples which declaring data shapes
.
batch_size(
list|tuple): List of strs which declaring data type
.
file_list(list
|tuple): List of ints which declaring data lod_level
.
slots(
bool): Whether use double buffer or not
.
name(
base
string): The prefix Python queue name and Reader name. None will
thread_num(
int): the thread num to read files by cpp reader
.
batch_size(
int): batch size of data
.
file_list(list
(str)): List of file names that need to read
.
slots(
list(int64)): list of slot id
.
name(string): The prefix Python queue name and Reader name. None will
be generated automatically.
Returns:
...
...
@@ -80,7 +100,15 @@ def ctr_reader(feed_data,
Examples:
1. The basic usage of :code:`py_reader` is as follows:
1. The basic usage of :code:`ctr_reader` is as follows:
.. code-block:: python
py_reader = fluid.contrib.ctr_reader.ctr_reader(
feed_dict=datas, file_type='plain', file_format='csv',
file_list=file_list, dense_slot_indexs=[1, 2, 3, 4], sparse_slot_indexs=[],
capacity=64, thread_num=20, batch_size=1000, slots=[], name='ctr_reader')
"""
if
name
is
None
:
queue_name
=
unique_name
(
'lod_tensor_blocking_queue'
)
...
...
@@ -90,7 +118,7 @@ def ctr_reader(feed_data,
reader_name
=
"_"
.
join
([
name
,
"reader"
])
var
=
global_scope
().
var
(
queue_name
)
feed_queue
=
core
.
init_lod_tensor_blocking_queue
(
var
,
capacity
,
shapes
)
feed_queue
=
core
.
init_lod_tensor_blocking_queue
(
var
,
capacity
)
startup_blk
=
default_startup_program
().
current_block
()
reader_var
=
startup_blk
.
create_var
(
name
=
reader_name
)
...
...
@@ -99,12 +127,22 @@ def ctr_reader(feed_data,
inputs
=
{
'blocking_queue'
:
[
queue_name
]},
outputs
=
{
'Out'
:
[
reader_var
]},
attrs
=
{
'use_data_config'
:
False
,
'thread_num'
:
thread_num
,
'batch_size'
:
batch_size
,
'file_list'
:
file_list
,
'slots'
:
slots
,
'file_type'
:
file_type
,
'file_format'
:
file_format
,
'dense_slot_index'
:
dense_slot_index
,
'sparse_slot_index'
:
sparse_slot_index
,
'sparse_slots'
:
slots
,
'ranks'
:
[],
'lod_levels'
:
[],
'shape_concat'
:
[]
})
dtypes
=
[
data
.
dtype
for
data
in
feed_dict
]
reader_var
.
desc
.
set_dtypes
(
dtypes
)
reader_var
.
persistable
=
True
main_prog_reader_var
=
_copy_reader_var_
(
...
...
@@ -118,6 +156,9 @@ def ctr_reader(feed_data,
main_blk
=
default_main_program
().
current_block
()
main_blk
.
append_op
(
type
=
'read'
,
inputs
=
{
'Reader'
:
[
reader
]},
outputs
=
{
'Out'
:
feed_data
})
type
=
'read'
,
inputs
=
{
'Reader'
:
[
reader
]},
attrs
=
{
'infer_out'
:
False
},
outputs
=
{
'Out'
:
feed_dict
})
return
reader
python/paddle/fluid/layers/io.py
浏览文件 @
a59b7ac7
...
...
@@ -523,7 +523,7 @@ def _py_reader(capacity,
double_buffer_name
=
"_"
.
join
([
name
,
"double_buffer"
])
var
=
global_scope
().
var
(
queue_name
)
feed_queue
=
core
.
init_lod_tensor_blocking_queue
(
var
,
capacity
,
shapes
)
feed_queue
=
core
.
init_lod_tensor_blocking_queue
(
var
,
capacity
)
startup_blk
=
default_startup_program
().
current_block
()
startup_var
=
startup_blk
.
create_var
(
name
=
reader_name
)
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
a59b7ac7
...
...
@@ -179,6 +179,7 @@ __all__ = [
'merge_selected_rows'
,
'get_tensor_from_selected_rows'
,
'lstm'
,
'shuffle_channel'
,
'py_func'
,
'psroi_pool'
,
'teacher_student_sigmoid_loss'
,
...
...
@@ -9646,6 +9647,79 @@ def get_tensor_from_selected_rows(x, name=None):
return
out
def
shuffle_channel
(
x
,
group
,
name
=
None
):
"""
**Shuffle Channel Operator**
This operator shuffles the channels of input x.
It divide the input channels in each group into :attr:`group` subgroups,
and obtain a new order by selecting element from every subgroup one by one.
Please refer to the paper
https://arxiv.org/pdf/1707.01083.pdf
.. code-block:: text
Given a 4-D tensor input with the shape (N, C, H, W):
input.shape = (1, 4, 2, 2)
input.data =[[[[0.1, 0.2],
[0.2, 0.3]],
[[0.3, 0.4],
[0.4, 0.5]],
[[0.5, 0.6],
[0.6, 0.7]],
[[0.7, 0.8],
[0.8, 0.9]]]]
Given group: 2
then we get a 4-D tensor out whth the same shape of input:
out.shape = (1, 4, 2, 2)
out.data = [[[[0.1, 0.2],
[0.2, 0.3]],
[[0.5, 0.6],
[0.6, 0.7]],
[[0.3, 0.4],
[0.4, 0.5]],
[[0.7, 0.8],
[0.8, 0.9]]]]
Args:
x(Variable): The input tensor variable. It should be a 4-D tensor with shape [N, C, H, W]
group(int): Indicating the conuts of subgroups, It should divide the number of channels.
Returns:
out(Variable): the channels shuffling result is a tensor variable with the
same shape and same type as the input.
Raises:
ValueError: If group is not an int type variable.
Examples:
.. code-block:: python
input = fluid.layers.data(name='input', shape=[4,2,2], dtype='float32')
out = fluid.layers.shuffle_channel(x=input, group=2)
"""
helper
=
LayerHelper
(
"shuffle_channel"
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
if
not
isinstance
(
group
,
int
):
raise
TypeError
(
"group must be int type"
)
helper
.
append_op
(
type
=
"shuffle_channel"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
},
attrs
=
{
"group"
:
group
})
return
out
class
PyFuncRegistry
(
object
):
_register_funcs
=
[]
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
a59b7ac7
...
...
@@ -1023,6 +1023,14 @@ class TestBook(unittest.TestCase):
print
(
str
(
program
))
def
test_shuffle_channel
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"X"
,
shape
=
[
16
,
4
,
4
],
dtype
=
"float32"
)
out
=
layers
.
shuffle_channel
(
x
,
group
=
4
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_shuffle_channel_op.py
0 → 100644
浏览文件 @
a59b7ac7
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
sys
import
math
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
class
TestShuffleChannelOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"shuffle_channel"
self
.
batch_size
=
10
self
.
input_channels
=
16
self
.
layer_h
=
4
self
.
layer_w
=
4
self
.
group
=
4
self
.
x
=
np
.
random
.
random
(
(
self
.
batch_size
,
self
.
input_channels
,
self
.
layer_h
,
self
.
layer_w
)).
astype
(
'float32'
)
self
.
inputs
=
{
'X'
:
self
.
x
}
self
.
attrs
=
{
'group'
:
self
.
group
}
n
,
c
,
h
,
w
=
self
.
x
.
shape
input_reshaped
=
np
.
reshape
(
self
.
x
,
(
-
1
,
self
.
group
,
c
//
self
.
group
,
h
,
w
))
input_transposed
=
np
.
transpose
(
input_reshaped
,
(
0
,
2
,
1
,
3
,
4
))
self
.
outputs
=
{
'Out'
:
np
.
reshape
(
input_transposed
,
(
-
1
,
c
,
h
,
w
))}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/setup.py.in
浏览文件 @
a59b7ac7
...
...
@@ -109,6 +109,7 @@ packages=['paddle',
'paddle.fluid.contrib',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.reader',
'paddle.fluid.contrib.slim',
'paddle.fluid.contrib.slim.core',
'paddle.fluid.contrib.slim.graph',
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录