Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ec9e12a6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ec9e12a6
编写于
7月 03, 2017
作者:
L
liaogang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'paddlepaddle/develop' into cpu_mem
上级
5ff172de
bad85f94
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
639 addition
and
49 deletion
+639
-49
CMakeLists.txt
CMakeLists.txt
+5
-0
cmake/generic.cmake
cmake/generic.cmake
+6
-13
doc/design/cluster_train/save_model.md
doc/design/cluster_train/save_model.md
+110
-0
doc/getstarted/concepts/src/train.py
doc/getstarted/concepts/src/train.py
+1
-1
paddle/framework/tensor.h
paddle/framework/tensor.h
+78
-0
paddle/function/CMakeLists.txt
paddle/function/CMakeLists.txt
+8
-0
paddle/function/nnpack/NNPACKConvOp.cpp
paddle/function/nnpack/NNPACKConvOp.cpp
+238
-0
paddle/function/nnpack/NNPACKConvOpTest.cpp
paddle/function/nnpack/NNPACKConvOpTest.cpp
+99
-0
paddle/function/nnpack/nnpack.cmake
paddle/function/nnpack/nnpack.cmake
+16
-0
paddle/gserver/layers/ExpandConvLayer.cpp
paddle/gserver/layers/ExpandConvLayer.cpp
+36
-20
python/CMakeLists.txt
python/CMakeLists.txt
+5
-2
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+2
-2
python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr
...fig_helpers/tests/configs/protostr/test_row_conv.protostr
+1
-1
python/paddle/v2/dataset/flowers.py
python/paddle/v2/dataset/flowers.py
+12
-6
python/paddle/v2/image.py
python/paddle/v2/image.py
+22
-4
未找到文件。
CMakeLists.txt
浏览文件 @
ec9e12a6
...
@@ -49,6 +49,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
...
@@ -49,6 +49,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option
(
ON_TRAVIS
"Exclude special unit test on Travis CI"
OFF
)
option
(
ON_TRAVIS
"Exclude special unit test on Travis CI"
OFF
)
option
(
WITH_C_API
"Compile PaddlePaddle with C-API(Prediction)"
OFF
)
option
(
WITH_C_API
"Compile PaddlePaddle with C-API(Prediction)"
OFF
)
option
(
WITH_GOLANG
"Compile PaddlePaddle with GOLANG"
OFF
)
option
(
WITH_GOLANG
"Compile PaddlePaddle with GOLANG"
OFF
)
option
(
USE_NNPACK
"Compile PaddlePaddle with NNPACK library"
OFF
)
# CMAKE_BUILD_TYPE
# CMAKE_BUILD_TYPE
if
(
NOT CMAKE_BUILD_TYPE
)
if
(
NOT CMAKE_BUILD_TYPE
)
...
@@ -129,6 +130,10 @@ if(WITH_GPU)
...
@@ -129,6 +130,10 @@ if(WITH_GPU)
endif
(
NOT WITH_DSO
)
endif
(
NOT WITH_DSO
)
endif
(
WITH_GPU
)
endif
(
WITH_GPU
)
if
(
USE_NNPACK
)
list
(
APPEND EXTERNAL_LIBS
${
NNPACK_LIB
}
${
PTHREADPOOL_LIB
}
"rt"
)
endif
(
USE_NNPACK
)
add_subdirectory
(
proto
)
add_subdirectory
(
proto
)
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
...
...
cmake/generic.cmake
浏览文件 @
ec9e12a6
...
@@ -101,23 +101,16 @@ function(merge_static_libs TARGET_NAME)
...
@@ -101,23 +101,16 @@ function(merge_static_libs TARGET_NAME)
# First get the file names of the libraries to be merged
# First get the file names of the libraries to be merged
foreach
(
lib
${
libs
}
)
foreach
(
lib
${
libs
}
)
get_target_property
(
libtype
${
lib
}
TYPE
)
if
(
NOT libtype STREQUAL
"STATIC_LIBRARY"
)
message
(
FATAL_ERROR
"merge_static_libs can only process static libraries"
)
endif
()
set
(
libfiles
${
libfiles
}
$<TARGET_FILE:
${
lib
}
>
)
set
(
libfiles
${
libfiles
}
$<TARGET_FILE:
${
lib
}
>
)
endforeach
()
endforeach
()
if
(
APPLE
)
# Use OSX's libtool to merge archives
if
(
APPLE
)
# Use OSX's libtool to merge archives
add_custom_target
(
${
TARGET_NAME
}
_archive
set
(
dummyfile
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
TARGET_NAME
}
_dummy.c
)
COMMAND libtool -static -o
"
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
${
TARGET_NAME
}
.a"
${
libfiles
}
file
(
WRITE
${
dummyfile
}
"const char * dummy =
\"
${
dummyfile
}
\"
;"
)
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
add_library
(
${
TARGET_NAME
}
STATIC
${
dummyfile
}
)
DEPENDS
${
libs
}
add_custom_command
(
TARGET
${
TARGET_NAME
}
POST_BUILD
)
COMMAND rm
"
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
${
TARGET_NAME
}
.a"
add_library
(
${
TARGET_NAME
}
STATIC IMPORTED GLOBAL
)
COMMAND /usr/bin/libtool -static -o
"
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
${
TARGET_NAME
}
.a"
${
libfiles
}
)
set_property
(
TARGET
${
TARGET_NAME
}
PROPERTY
IMPORTED_LOCATION
"
${
CMAKE_CURRENT_BINARY_DIR
}
/lib
${
TARGET_NAME
}
.a"
)
add_dependencies
(
${
TARGET_NAME
}
${
TARGET_NAME
}
_archive
)
else
()
# general UNIX: use "ar" to extract objects and re-add to a common lib
else
()
# general UNIX: use "ar" to extract objects and re-add to a common lib
foreach
(
lib
${
libs
}
)
foreach
(
lib
${
libs
}
)
set
(
objlistfile
${
lib
}
.objlist
)
# list of objects in the input library
set
(
objlistfile
${
lib
}
.objlist
)
# list of objects in the input library
...
...
doc/design/cluster_train/save_model.md
0 → 100644
浏览文件 @
ec9e12a6
# Design Doc: Save Model
## Overview
The model is the output of the training process. There are two
ways from which user can obtain a model:
-
Save model triggered by user code: user code asks PaddlePaddle to
save a model.
-
Convert model from the checkpoint: model being converted from
pservers' periodic checkpoint. In this way, the user can cancel a
job at any time, and still have a relatively fresh model (we
checkpoint around every 5 minutes).
### Trainer Saving Model vs. Pservers Saving Model
Both trainers and pservers have access to the model. So the model can
be saved from a trainer or pservers. We need to decide where the model
is saved from.
#### Dense Update vs. Sparse Update
There are two types of model update methods: dense update and sparse
update (when the model parameter is configured to be sparse).
-
Dense update
Every trainer has it's own full copy of the model. Every model
update will update the entire model.
-
Sparse update
The training input is sparse, and the trainer does not have the
entire model. It will only download the sub-model necessary related
to the input. When updating the model, only the sub-model related to
the training input is updated.
#### Pservers Saving Model
The benefit of letting pservers save model is they have the entire
model all the time. However, since pservers are on different nodes, it
requires a merging process to merge model shards into the same
model. Thus requires the pservers to write models to a distributed
filesystem, making the checkpoint shards visible to the merge program.
#### Trainer Saving Model
The benefit of letting one trainer to save the model is it does not
require a distributed filesystem. And it's reusing the same save model
logic when training locally - except when doing sparse update, the
trainer needs to download the entire model during the saving process.
#### Conclusion
Given trainer saving model does not require a distributed filesystem,
and is an intuitive extension to trainer saving model when training
locally, we decide to let the trainer save the model when doing
distributed training.
### Convert Model from Checkpoint
TODO
## Timeline
We first implement trainer save the model. Converting the latest
snapshot to a model will be a TODO for future.
## Trainer Save Model
### Trainer Election
One trainer will be elected as the one to save the model. When using
etcd, trainer ID is a randomly generated UUID, we will utilize etcd to
elect one trainer. When not using etcd, unique trainer IDs will be
given by the administrator, the trainer whose ID is "0" is elected to
save the model.
### Model Save Path
Each trainer will be given the directory to save the model. The
elected trainer will save the model to
`given-directory/trainerID`
. Since the trainer ID is unique, this
would prevent concurrent save to the same file when multiple trainers
are elected to save the model when split-brain problem happens.
### What Happens When Model Is Saving
It takes some time to save model, we need to define what will happen
when save model is taking place.
When doing dense update, the trainer uses the local model. Pservers
does not need to pause model update.
When doing sparse update. The trainer needs to download the entire
model while saving. To get the most accurate model, the model update
needs to be paused before the download starts and resumed after the
download finishes. Otherwise, the trainer gets a model that is
"polluted": some part of the model is old, some part of the model is
new.
It's unclear that the "polluted" model will be inferior due to the
stochastic nature of deep learning, and pausing the model update will
add more complexity to the system. Since supporting sparse update is a
TODO item. We defer the evaluation of pause the model update or not
during saving model to the future.
doc/getstarted/concepts/src/train.py
浏览文件 @
ec9e12a6
...
@@ -31,7 +31,7 @@ def event_handler(event):
...
@@ -31,7 +31,7 @@ def event_handler(event):
# define training dataset reader
# define training dataset reader
def
train_reader
():
def
train_reader
():
train_x
=
np
.
array
([[
1
,
1
],
[
1
,
2
],
[
3
,
4
],
[
5
,
2
]])
train_x
=
np
.
array
([[
1
,
1
],
[
1
,
2
],
[
3
,
4
],
[
5
,
2
]])
train_y
=
np
.
array
([
-
2
,
-
3
,
-
7
,
-
7
])
train_y
=
np
.
array
([
[
-
2
],
[
-
3
],
[
-
7
],
[
-
7
]
])
def
reader
():
def
reader
():
for
i
in
xrange
(
train_y
.
shape
[
0
]):
for
i
in
xrange
(
train_y
.
shape
[
0
]):
...
...
paddle/framework/tensor.h
0 → 100644
浏览文件 @
ec9e12a6
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace
paddle
{
namespace
framework
{
class
Tensor
{
using
paddle
::
platform
::
Place
;
using
paddle
::
platform
::
get_place
;
public:
template
<
typename
T
>
const
T
*
data
()
const
{
PADDLE_ASSERT
(
holder_
!=
nullptr
,
"Tensor::data must be called after Tensor::mutable_data"
);
return
static_cast
<
const
T
*>
(
holder
->
Ptr
());
}
template
<
typename
T
,
// must be POD types
typename
=
std
::
enable_if
<
std
::
is_pod
<
T
>
::
value
>::
type
>
T
*
mutable_data
(
DDim
dims
,
Place
place
)
{
if
(
holder_
==
nullptr
||
holder_
->
Place
()
!=
place
||
holder_
->
Size
()
<
dims
.
product
()
*
sizeof
(
T
))
{
holder_
.
reset
(
new
PlaceholderImpl
(
place
,
dims
.
product
()
*
sizeof
(
T
)));
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
template
<
typename
T
,
// must be POD types
typename
=
std
::
enable_if
<
std
::
is_pod
<
T
>
::
value
>::
type
>
T
*
mutable_data
(
DDim
dims
)
{
return
mutable_data
<
T
>
(
dims
,
paddle
::
platform
::
get_place
());
}
private:
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
void
*
Ptr
()
const
=
0
;
virtual
Place
Place
()
const
=
0
;
virtual
size_t
Size
()
const
=
0
;
};
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
PlaceholderImpl
(
Place
pl
,
size_t
size
)
:
ptr_
(
paddle
::
memory
::
Alloc
(
pl
,
size
),
paddle
::
memory
::
Deleter
(
pl
)),
place_
(
pl
),
size_
(
size
)
{}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
());
}
virtual
size_t
Size
()
const
{
return
size_
;
}
virtual
Place
Place
()
const
{
return
place_
;
}
std
::
unique_ptr
<
T
,
memory
::
Deleter
>
ptr_
;
Place
place_
;
// record the place of ptr_.
size_t
size_
;
// size of the memory block.
};
std
::
unique_ptr
<
Placeholder
>
holder_
;
// holds the memory block if allocated.
};
}
// namespace framework
}
// namespace paddle
paddle/function/CMakeLists.txt
浏览文件 @
ec9e12a6
...
@@ -10,6 +10,14 @@ if(WITH_GPU)
...
@@ -10,6 +10,14 @@ if(WITH_GPU)
cuda_compile
(
cu_objs
${
cu_files
}
)
cuda_compile
(
cu_objs
${
cu_files
}
)
endif
()
endif
()
if
(
USE_NNPACK
)
include
(
nnpack/nnpack.cmake
)
list
(
APPEND cpp_files nnpack/NNPACKConvOp.cpp
)
if
(
WITH_TESTING
)
add_unittest
(
NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp
)
endif
()
endif
()
add_library
(
paddle_function STATIC
${
cpp_files
}
${
cu_objs
}
)
add_library
(
paddle_function STATIC
${
cpp_files
}
${
cu_objs
}
)
add_dependencies
(
paddle_function
${
external_project_dependencies
}
)
add_dependencies
(
paddle_function
${
external_project_dependencies
}
)
add_dependencies
(
paddle_function paddle_proto
)
add_dependencies
(
paddle_function paddle_proto
)
...
...
paddle/function/nnpack/NNPACKConvOp.cpp
0 → 100644
浏览文件 @
ec9e12a6
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "nnpack.h"
#include "paddle/function/ConvOp.h"
DEFINE_bool
(
nnpack_allocate_outside
,
false
,
"Allocate and free workspace memory outside the NNPACK interface."
);
DEFINE_int32
(
nnpack_num_threads
,
0
,
"The number of nnpack threads"
"default: 0; 0 to disable threadpool."
);
namespace
paddle
{
nnp_convolution_algorithm
get_nnp_convolution_algorithm
(
const
std
::
string
&
algorithm
)
{
if
(
algorithm
==
"auto"
)
{
return
nnp_convolution_algorithm_auto
;
}
else
if
(
algorithm
==
"ft8x8"
)
{
return
nnp_convolution_algorithm_ft8x8
;
}
else
if
(
algorithm
==
"ft16x16"
)
{
return
nnp_convolution_algorithm_ft16x16
;
}
else
if
(
algorithm
==
"wt8x8"
)
{
return
nnp_convolution_algorithm_wt8x8
;
}
else
if
(
algorithm
==
"implicit-gemm"
)
{
return
nnp_convolution_algorithm_implicit_gemm
;
}
else
if
(
algorithm
==
"direct"
)
{
return
nnp_convolution_algorithm_direct
;
}
else
{
return
nnp_convolution_algorithm_auto
;
}
}
template
<
DeviceType
Device
>
class
NNPACKConvFunction
:
public
ConvFunctionBase
{
public:
void
init
(
const
FuncConfig
&
config
)
override
{
ConvFunctionBase
::
init
(
config
);
CHECK_EQ
(
groups_
,
(
size_t
)
1
);
algorithm_
=
get_nnp_convolution_algorithm
(
config
.
get
<
std
::
string
>
(
"algo"
));
// algorithm_ = nnp_convolution_algorithm_auto;
transform_strategy_
=
nnp_convolution_transform_strategy_compute
;
nnp_status
status
=
nnp_initialize
();
CHECK_EQ
(
status
,
nnp_status_success
);
workspaceBuffer_
=
nullptr
;
workspaceSize_
=
0
;
threadpool_
=
nullptr
;
if
(
FLAGS_nnpack_num_threads
)
{
threadpool_
=
pthreadpool_create
(
FLAGS_nnpack_num_threads
);
VLOG
(
3
)
<<
"Number of threads "
<<
pthreadpool_get_threads_count
(
threadpool_
);
}
}
~
NNPACKConvFunction
()
{
if
(
threadpool_
)
{
pthreadpool_destroy
(
threadpool_
);
}
if
(
workspaceBuffer_
)
{
free
(
workspaceBuffer_
);
}
}
virtual
void
check
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
const
TensorShape
&
input
=
inputs
[
0
].
shape
();
const
TensorShape
&
filter
=
inputs
[
1
].
shape
();
const
TensorShape
&
output
=
outputs
[
0
].
shape
();
checkShape
(
input
,
filter
,
output
);
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
numInputs_
,
inputs
.
size
());
CHECK_EQ
(
numOutputs_
,
outputs
.
size
());
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ASSIGN_TO
);
check
(
inputs
,
outputs
);
const
TensorShape
&
input
=
inputs
[
0
].
shape
();
const
TensorShape
&
filter
=
inputs
[
1
].
shape
();
const
TensorShape
&
output
=
outputs
[
0
].
shape
();
size_t
batchSize
=
input
[
0
];
size_t
inputChannels
=
input
[
1
];
size_t
inputHeight
=
input
[
2
];
size_t
inputWidth
=
input
[
3
];
size_t
filterHeight
=
getFilterHeight
(
filter
);
size_t
filterWidth
=
getFilterWidth
(
filter
);
size_t
outputChannels
=
output
[
1
];
// size_t outputHeight = output[2];
// size_t outputWidth = output[3];
nnp_size
inputSize
=
{.
width
=
inputWidth
,
.
height
=
inputHeight
};
nnp_padding
padding
=
{.
top
=
(
size_t
)
paddingH
(),
.
right
=
(
size_t
)
paddingW
(),
.
bottom
=
(
size_t
)
paddingH
(),
.
left
=
(
size_t
)
paddingW
()};
nnp_size
kernelSize
=
{.
width
=
filterWidth
,
.
height
=
filterHeight
};
nnp_size
outputSubsampling
=
{.
width
=
(
size_t
)
strideW
(),
.
height
=
(
size_t
)
strideH
()};
float
*
inputData
=
inputs
[
0
].
data
<
float
>
();
float
*
filterData
=
inputs
[
1
].
data
<
float
>
();
float
*
outputData
=
outputs
[
0
].
data
<
float
>
();
void
*
bufferPtr
=
nullptr
;
size_t
*
sizePtr
=
nullptr
;
size_t
needSize
;
if
(
FLAGS_nnpack_allocate_outside
)
{
if
(
batchSize
==
1
)
{
nnp_status
status
=
nnp_convolution_inference
(
algorithm_
,
transform_strategy_
,
inputChannels
,
outputChannels
,
inputSize
,
padding
,
kernelSize
,
outputSubsampling
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
&
needSize
,
nnp_activation_identity
,
nullptr
,
nullptr
,
nullptr
);
CHECK_EQ
(
status
,
nnp_status_success
);
}
else
{
// only supports stride = 1
CHECK_EQ
(
strideH
(),
1
);
CHECK_EQ
(
strideW
(),
1
);
nnp_status
status
=
nnp_convolution_output
(
algorithm_
,
batchSize
,
inputChannels
,
outputChannels
,
inputSize
,
padding
,
kernelSize
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
&
needSize
,
nnp_activation_identity
,
nullptr
,
nullptr
,
nullptr
);
CHECK_EQ
(
status
,
nnp_status_success
);
}
VLOG
(
3
)
<<
"workspace size is "
<<
needSize
;
if
(
needSize
>
workspaceSize_
)
{
workspaceSize_
=
needSize
;
if
(
workspaceBuffer_
)
{
free
(
workspaceBuffer_
);
}
else
{
posix_memalign
(
&
workspaceBuffer_
,
64
,
needSize
);
}
}
if
(
needSize
)
{
bufferPtr
=
workspaceBuffer_
;
sizePtr
=
&
needSize
;
}
}
if
(
batchSize
==
1
)
{
nnp_status
status
=
nnp_convolution_inference
(
algorithm_
,
transform_strategy_
,
inputChannels
,
outputChannels
,
inputSize
,
padding
,
kernelSize
,
outputSubsampling
,
inputData
,
filterData
,
nullptr
,
/* bias */
outputData
,
bufferPtr
,
sizePtr
,
nnp_activation_identity
,
nullptr
,
threadpool_
,
/* threadpool */
nullptr
);
CHECK_EQ
(
status
,
nnp_status_success
);
}
else
{
// only supports stride = 1
CHECK_EQ
(
strideH
(),
1
);
CHECK_EQ
(
strideW
(),
1
);
nnp_status
status
=
nnp_convolution_output
(
algorithm_
,
batchSize
,
inputChannels
,
outputChannels
,
inputSize
,
padding
,
kernelSize
,
inputData
,
filterData
,
nullptr
,
/* bias */
outputData
,
bufferPtr
,
sizePtr
,
nnp_activation_identity
,
nullptr
,
threadpool_
,
/* threadpool */
nullptr
);
CHECK_EQ
(
status
,
nnp_status_success
);
}
}
private:
nnp_convolution_algorithm
algorithm_
;
nnp_convolution_transform_strategy
transform_strategy_
;
void
*
workspaceBuffer_
;
size_t
workspaceSize_
;
pthreadpool_t
threadpool_
;
};
REGISTER_TYPED_FUNC
(
NNPACKConv
,
CPU
,
NNPACKConvFunction
);
}
// namespace paddle
paddle/function/nnpack/NNPACKConvOpTest.cpp
0 → 100644
浏览文件 @
ec9e12a6
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/function/Function.h"
#include "paddle/function/FunctionTest.h"
DEFINE_string
(
algo
,
"auto"
,
"The algorithm (auto, ft8x8, ft16x16, wt8x8, "
"implicit-gemm, or direct) for computing convolution of NNPACK."
);
namespace
paddle
{
#define IS_NNPACK_SUPPORT(algo, filterSize, stride) \
if (algo == "direct" && filterSize != 1) continue; \
if (algo == "direct" && batchSize != 1) continue; \
if (algo == "wt8x8" && filterSize != 3) continue; \
if (algo == "implicit-gemm" && batchSize != 1) continue; \
if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue;
class
ConvolutionTest
{
public:
ConvolutionTest
(
const
std
::
string
&
conv1
,
const
std
::
string
&
conv2
,
std
::
string
algo
=
"auto"
)
{
for
(
size_t
batchSize
:
{
1
,
32
})
{
for
(
size_t
inputSize
:
{
7
,
14
,
54
})
{
for
(
size_t
filterSize
:
{
1
,
3
,
5
})
{
for
(
size_t
inputChannels
:
{
3
,
64
})
{
for
(
size_t
outputChannels
:
{
3
,
64
,
128
})
{
if
(
inputChannels
<
outputChannels
)
break
;
for
(
size_t
stride
:
{
1
,
2
})
{
// if batchSize > 1 NNPACKConv only supports stride = 1
if
(
batchSize
>
1
&&
stride
>
1
)
break
;
for
(
size_t
padding
:
{
0
,
1
})
{
if
(
padding
>=
filterSize
)
break
;
size_t
outputSize
=
(
inputSize
-
filterSize
+
2
*
padding
+
stride
)
/
stride
;
IS_NNPACK_SUPPORT
(
algo
,
filterSize
,
stride
);
LOG
(
INFO
)
<<
" batchSize="
<<
batchSize
<<
" inputChannels="
<<
inputChannels
<<
" inputHeight="
<<
inputSize
<<
" inputWidth="
<<
inputSize
<<
" outputChannels="
<<
outputChannels
<<
" filterHeight="
<<
filterSize
<<
" filterWidth="
<<
filterSize
<<
" outputHeight="
<<
outputSize
<<
" outputWidth="
<<
outputSize
<<
" stride="
<<
stride
<<
" padding="
<<
padding
;
std
::
vector
<
size_t
>
paddings
=
{
padding
,
padding
};
std
::
vector
<
size_t
>
strides
=
{
stride
,
stride
};
Compare2Function
<
DEVICE_TYPE_CPU
,
DEVICE_TYPE_CPU
>
test
(
conv1
,
conv2
,
FuncConfig
()
.
set
(
"paddings"
,
paddings
)
.
set
(
"strides"
,
strides
)
.
set
(
"groups"
,
(
size_t
)
1
)
.
set
(
"algo"
,
algo
));
TensorShape
shape0
{
batchSize
,
inputChannels
,
inputSize
,
inputSize
};
TensorShape
shape1
{
outputChannels
,
inputChannels
,
filterSize
,
filterSize
};
TensorShape
shape2
{
batchSize
,
outputChannels
,
outputSize
,
outputSize
};
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape0
));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape1
));
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape2
));
test
.
run
();
}
}
}
}
}
}
}
}
};
TEST
(
Convolution
,
NNPACK
)
{
// NNPACK only supports stride = 1
ConvolutionTest
test
(
"GemmConv-CPU"
,
"NNPACKConv-CPU"
,
FLAGS_algo
);
}
}
// namespace paddle
paddle/function/nnpack/nnpack.cmake
0 → 100644
浏览文件 @
ec9e12a6
# Find the NNPACK library
# NNPACK_ROOT - where to find NNPACK include and library.
#
set
(
NNPACK_FOUND OFF
)
set
(
NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH
"Folder contains NNPACK"
)
find_path
(
NNPACK_INC_DIR nnpack.h PATHS
${
NNPACK_ROOT
}
/include
)
find_library
(
NNPACK_LIB NAMES nnpack PATHS
${
NNPACK_ROOT
}
/lib
)
find_library
(
PTHREADPOOL_LIB NAMES pthreadpool PATHS
${
NNPACK_ROOT
}
/lib
)
if
(
NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB
)
set
(
NNPACK_FOUND ON
)
INCLUDE_DIRECTORIES
(
${
NNPACK_INC_DIR
}
)
else
()
message
(
FATAL_ERROR
"Cannot find NNPACK in (
${
NNPACK_ROOT
}
)"
)
endif
()
paddle/gserver/layers/ExpandConvLayer.cpp
浏览文件 @
ec9e12a6
...
@@ -16,6 +16,10 @@ limitations under the License. */
...
@@ -16,6 +16,10 @@ limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Stat.h"
DEFINE_bool
(
use_nnpack
,
false
,
"Whether to use nnpack for convolution calculation."
);
namespace
paddle
{
namespace
paddle
{
/*
/*
...
@@ -37,6 +41,17 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
...
@@ -37,6 +41,17 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
for
(
int
i
=
0
;
i
<
config_
.
inputs_size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
config_
.
inputs_size
();
i
++
)
{
std
::
vector
<
size_t
>
paddings
=
{(
size_t
)
paddingY_
[
i
],
(
size_t
)
padding_
[
i
]};
std
::
vector
<
size_t
>
paddings
=
{(
size_t
)
paddingY_
[
i
],
(
size_t
)
padding_
[
i
]};
std
::
vector
<
size_t
>
strides
=
{(
size_t
)
strideY_
[
i
],
(
size_t
)
stride_
[
i
]};
std
::
vector
<
size_t
>
strides
=
{(
size_t
)
strideY_
[
i
],
(
size_t
)
stride_
[
i
]};
if
(
FLAGS_use_nnpack
)
{
CHECK_EQ
(
isDeconv_
,
false
);
createFunction
(
forward_
,
"NNPACKConv"
,
FuncConfig
()
.
set
(
"paddings"
,
paddings
)
.
set
(
"strides"
,
strides
)
.
set
(
"groups"
,
(
size_t
)
groups_
[
i
])
.
set
(
"algo"
,
std
::
string
(
"auto"
)));
}
else
{
createFunction
(
forward_
,
createFunction
(
forward_
,
!
isDeconv_
?
"GemmConv"
:
"GemmConvGradInput"
,
!
isDeconv_
?
"GemmConv"
:
"GemmConvGradInput"
,
FuncConfig
()
FuncConfig
()
...
@@ -58,6 +73,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
...
@@ -58,6 +73,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
.
set
(
"strides"
,
strides
)
.
set
(
"strides"
,
strides
)
.
set
(
"groups"
,
(
size_t
)
groups_
[
i
]));
.
set
(
"groups"
,
(
size_t
)
groups_
[
i
]));
}
}
}
return
true
;
return
true
;
}
}
...
...
python/CMakeLists.txt
浏览文件 @
ec9e12a6
...
@@ -13,8 +13,11 @@ set(PY_FILES paddle/__init__.py
...
@@ -13,8 +13,11 @@ set(PY_FILES paddle/__init__.py
${
V2_PY_FILES
}
)
${
V2_PY_FILES
}
)
add_custom_target
(
copy_paddle_master
)
add_custom_target
(
copy_paddle_master
)
SET
(
COPY_PADDLE_MASTER
""
)
if
(
WITH_GOLANG
)
if
(
WITH_GOLANG
)
add_custom_command
(
TARGET copy_paddle_master
SET
(
COPY_PADDLE_MASTER
"copy_paddle_master"
)
add_custom_command
(
TARGET
${
COPY_PADDLE_MASTER
}
COMMAND cp
${
paddle_master_LIB_PATH
}
${
PROJ_ROOT
}
/python/paddle/v2/master/
COMMAND cp
${
paddle_master_LIB_PATH
}
${
PROJ_ROOT
}
/python/paddle/v2/master/
)
)
add_dependencies
(
copy_paddle_master paddle_master
)
add_dependencies
(
copy_paddle_master paddle_master
)
...
@@ -26,7 +29,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
...
@@ -26,7 +29,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
add_custom_command
(
OUTPUT
${
OUTPUT_DIR
}
/.timestamp
add_custom_command
(
OUTPUT
${
OUTPUT_DIR
}
/.timestamp
COMMAND env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND env
${
py_env
}
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
OUTPUT_DIR
}
/.timestamp
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
OUTPUT_DIR
}
/.timestamp
DEPENDS gen_proto_py
${
PY_FILES
}
${
external_project_dependencies
}
copy_paddle_master
)
DEPENDS gen_proto_py
${
PY_FILES
}
${
external_project_dependencies
}
${
COPY_PADDLE_MASTER
}
)
add_custom_target
(
paddle_python ALL DEPENDS
add_custom_target
(
paddle_python ALL DEPENDS
${
OUTPUT_DIR
}
/.timestamp
)
${
OUTPUT_DIR
}
/.timestamp
)
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
ec9e12a6
...
@@ -2082,10 +2082,10 @@ class MaxOutLayer(LayerBase):
...
@@ -2082,10 +2082,10 @@ class MaxOutLayer(LayerBase):
class
RowConvLayer
(
LayerBase
):
class
RowConvLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
context_length
,
**
xargs
):
def
__init__
(
self
,
name
,
inputs
,
context_length
,
**
xargs
):
super
(
RowConvLayer
,
self
).
__init__
(
super
(
RowConvLayer
,
self
).
__init__
(
name
,
'
maxout
'
,
0
,
inputs
=
inputs
,
**
xargs
)
name
,
'
row_conv
'
,
0
,
inputs
=
inputs
,
**
xargs
)
config_assert
(
config_assert
(
len
(
self
.
inputs
)
==
1
,
len
(
self
.
inputs
)
==
1
,
'
TransLayer must have one and only one input
'
)
'
row convolution layer must have one and only one input.
'
)
input_layer
=
self
.
get_input_layer
(
0
)
input_layer
=
self
.
get_input_layer
(
0
)
row_conv_conf
=
self
.
config
.
inputs
[
0
].
row_conv_conf
row_conv_conf
=
self
.
config
.
inputs
[
0
].
row_conv_conf
row_conv_conf
.
context_length
=
context_length
row_conv_conf
.
context_length
=
context_length
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_conv.protostr
浏览文件 @
ec9e12a6
...
@@ -7,7 +7,7 @@ layers {
...
@@ -7,7 +7,7 @@ layers {
}
}
layers {
layers {
name: "__row_conv_layer_0__"
name: "__row_conv_layer_0__"
type: "
maxout
"
type: "
row_conv
"
size: 2560
size: 2560
active_type: "relu"
active_type: "relu"
inputs {
inputs {
...
...
python/paddle/v2/dataset/flowers.py
浏览文件 @
ec9e12a6
...
@@ -30,6 +30,7 @@ http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
...
@@ -30,6 +30,7 @@ http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}.
"""
"""
import
cPickle
import
cPickle
import
itertools
import
itertools
import
functools
from
common
import
download
from
common
import
download
import
tarfile
import
tarfile
import
scipy.io
as
scio
import
scipy.io
as
scio
...
@@ -54,21 +55,26 @@ TEST_FLAG = 'trnid'
...
@@ -54,21 +55,26 @@ TEST_FLAG = 'trnid'
VALID_FLAG
=
'valid'
VALID_FLAG
=
'valid'
def
default_mapper
(
sample
):
def
default_mapper
(
is_train
,
sample
):
'''
'''
map image bytes data to type needed by model input layer
map image bytes data to type needed by model input layer
'''
'''
img
,
label
=
sample
img
,
label
=
sample
img
=
load_image_bytes
(
img
)
img
=
load_image_bytes
(
img
)
img
=
simple_transform
(
img
,
256
,
224
,
True
)
img
=
simple_transform
(
img
,
256
,
224
,
is_train
,
mean
=
[
103.94
,
116.78
,
123.68
])
return
img
.
flatten
().
astype
(
'float32'
),
label
return
img
.
flatten
().
astype
(
'float32'
),
label
train_mapper
=
functools
.
partial
(
default_mapper
,
True
)
test_mapper
=
functools
.
partial
(
default_mapper
,
False
)
def
reader_creator
(
data_file
,
def
reader_creator
(
data_file
,
label_file
,
label_file
,
setid_file
,
setid_file
,
dataset_name
,
dataset_name
,
mapper
=
default_mapper
,
mapper
,
buffered_size
=
1024
,
buffered_size
=
1024
,
use_xmap
=
True
):
use_xmap
=
True
):
'''
'''
...
@@ -118,7 +124,7 @@ def reader_creator(data_file,
...
@@ -118,7 +124,7 @@ def reader_creator(data_file,
return
map_readers
(
mapper
,
reader
)
return
map_readers
(
mapper
,
reader
)
def
train
(
mapper
=
default
_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
def
train
(
mapper
=
train
_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
'''
'''
Create flowers training set reader.
Create flowers training set reader.
It returns a reader, each sample in the reader is
It returns a reader, each sample in the reader is
...
@@ -141,7 +147,7 @@ def train(mapper=default_mapper, buffered_size=1024, use_xmap=True):
...
@@ -141,7 +147,7 @@ def train(mapper=default_mapper, buffered_size=1024, use_xmap=True):
buffered_size
,
use_xmap
)
buffered_size
,
use_xmap
)
def
test
(
mapper
=
defaul
t_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
def
test
(
mapper
=
tes
t_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
'''
'''
Create flowers test set reader.
Create flowers test set reader.
It returns a reader, each sample in the reader is
It returns a reader, each sample in the reader is
...
@@ -164,7 +170,7 @@ def test(mapper=default_mapper, buffered_size=1024, use_xmap=True):
...
@@ -164,7 +170,7 @@ def test(mapper=default_mapper, buffered_size=1024, use_xmap=True):
buffered_size
,
use_xmap
)
buffered_size
,
use_xmap
)
def
valid
(
mapper
=
defaul
t_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
def
valid
(
mapper
=
tes
t_mapper
,
buffered_size
=
1024
,
use_xmap
=
True
):
'''
'''
Create flowers validation set reader.
Create flowers validation set reader.
It returns a reader, each sample in the reader is
It returns a reader, each sample in the reader is
...
...
python/paddle/v2/image.py
浏览文件 @
ec9e12a6
...
@@ -262,7 +262,12 @@ def left_right_flip(im):
...
@@ -262,7 +262,12 @@ def left_right_flip(im):
return
im
[:,
::
-
1
,
:]
return
im
[:,
::
-
1
,
:]
def
simple_transform
(
im
,
resize_size
,
crop_size
,
is_train
,
is_color
=
True
):
def
simple_transform
(
im
,
resize_size
,
crop_size
,
is_train
,
is_color
=
True
,
mean
=
None
):
"""
"""
Simply data argumentation for training. These operations include
Simply data argumentation for training. These operations include
resizing, croping and flipping.
resizing, croping and flipping.
...
@@ -288,8 +293,20 @@ def simple_transform(im, resize_size, crop_size, is_train, is_color=True):
...
@@ -288,8 +293,20 @@ def simple_transform(im, resize_size, crop_size, is_train, is_color=True):
im
=
left_right_flip
(
im
)
im
=
left_right_flip
(
im
)
else
:
else
:
im
=
center_crop
(
im
,
crop_size
)
im
=
center_crop
(
im
,
crop_size
)
if
len
(
im
.
shape
)
==
3
:
im
=
to_chw
(
im
)
im
=
to_chw
(
im
)
im
=
im
.
astype
(
'float32'
)
if
mean
is
not
None
:
mean
=
np
.
array
(
mean
,
dtype
=
np
.
float32
)
# mean value, may be one value per channel
if
mean
.
ndim
==
1
:
mean
=
mean
[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
# elementwise mean
assert
len
(
mean
.
shape
)
==
len
(
im
)
im
-=
mean
return
im
return
im
...
@@ -297,7 +314,8 @@ def load_and_transform(filename,
...
@@ -297,7 +314,8 @@ def load_and_transform(filename,
resize_size
,
resize_size
,
crop_size
,
crop_size
,
is_train
,
is_train
,
is_color
=
True
):
is_color
=
True
,
mean
=
None
):
"""
"""
Load image from the input file `filename` and transform image for
Load image from the input file `filename` and transform image for
data argumentation. Please refer to the `simple_transform` interface
data argumentation. Please refer to the `simple_transform` interface
...
@@ -318,5 +336,5 @@ def load_and_transform(filename,
...
@@ -318,5 +336,5 @@ def load_and_transform(filename,
:type is_train: bool
:type is_train: bool
"""
"""
im
=
load_image
(
filename
)
im
=
load_image
(
filename
)
im
=
simple_transform
(
im
,
resize_size
,
crop_size
,
is_train
,
is_color
)
im
=
simple_transform
(
im
,
resize_size
,
crop_size
,
is_train
,
is_color
,
mean
)
return
im
return
im
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录