Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
fbcadb75
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fbcadb75
编写于
8月 03, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of github.com:baidu/Paddle into gradient_check_utils
上级
a404d9ab
0478780c
变更
51
显示空白变更内容
内联
并排
Showing
51 changed file
with
742 addition
and
156 deletion
+742
-156
Dockerfile
Dockerfile
+1
-1
cmake/flags.cmake
cmake/flags.cmake
+5
-0
doc/api/v2/config/layer.rst
doc/api/v2/config/layer.rst
+10
-0
paddle/cuda/src/hl_cuda_cudnn.cc
paddle/cuda/src/hl_cuda_cudnn.cc
+9
-0
paddle/framework/detail/tensor-inl.h
paddle/framework/detail/tensor-inl.h
+4
-3
paddle/framework/op_registry.h
paddle/framework/op_registry.h
+8
-0
paddle/framework/operator.cc
paddle/framework/operator.cc
+4
-4
paddle/framework/operator.h
paddle/framework/operator.h
+1
-1
paddle/function/ConvOp.h
paddle/function/ConvOp.h
+7
-0
paddle/function/GemmConvOp.cpp
paddle/function/GemmConvOp.cpp
+83
-48
paddle/gserver/layers/ClipLayer.cpp
paddle/gserver/layers/ClipLayer.cpp
+79
-0
paddle/gserver/layers/RowL2NormLayer.cpp
paddle/gserver/layers/RowL2NormLayer.cpp
+98
-0
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+30
-0
paddle/math/BaseMatrix.cu
paddle/math/BaseMatrix.cu
+6
-0
paddle/math/BaseMatrix.h
paddle/math/BaseMatrix.h
+7
-0
paddle/operators/add_op.cc
paddle/operators/add_op.cc
+0
-4
paddle/operators/add_op.cu
paddle/operators/add_op.cu
+1
-0
paddle/operators/add_op.h
paddle/operators/add_op.h
+7
-4
paddle/operators/cross_entropy_op.cu
paddle/operators/cross_entropy_op.cu
+1
-0
paddle/operators/mean_op.h
paddle/operators/mean_op.h
+5
-2
paddle/operators/mul_op.cu
paddle/operators/mul_op.cu
+1
-0
paddle/operators/mul_op.h
paddle/operators/mul_op.h
+9
-4
paddle/operators/rowwise_add_op.cu
paddle/operators/rowwise_add_op.cu
+1
-0
paddle/operators/rowwise_add_op.h
paddle/operators/rowwise_add_op.h
+1
-1
paddle/operators/sgd_op.cu
paddle/operators/sgd_op.cu
+1
-0
paddle/operators/sgd_op.h
paddle/operators/sgd_op.h
+6
-2
paddle/operators/sigmoid_op.cu
paddle/operators/sigmoid_op.cu
+1
-0
paddle/operators/sigmoid_op.h
paddle/operators/sigmoid_op.h
+5
-3
paddle/operators/softmax_op.cu
paddle/operators/softmax_op.cu
+1
-0
paddle/operators/softmax_op.h
paddle/operators/softmax_op.h
+2
-2
paddle/platform/enforce.h
paddle/platform/enforce.h
+6
-6
paddle/pybind/CMakeLists.txt
paddle/pybind/CMakeLists.txt
+1
-1
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+62
-10
paddle/pybind/tensor_bind.h
paddle/pybind/tensor_bind.h
+36
-12
paddle/scripts/docker/build.sh
paddle/scripts/docker/build.sh
+1
-1
proto/ModelConfig.proto
proto/ModelConfig.proto
+6
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+24
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+73
-0
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
.../paddle/trainer_config_helpers/tests/configs/file_list.sh
+1
-1
python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr
...g_helpers/tests/configs/protostr/test_clip_layer.protostr
+31
-0
python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr
...rs/tests/configs/protostr/test_row_l2_norm_layer.protostr
+27
-0
python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
...e/trainer_config_helpers/tests/configs/test_clip_layer.py
+6
-0
python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
...er_config_helpers/tests/configs/test_row_l2_norm_layer.py
+6
-0
python/paddle/v2/framework/tests/CMakeLists.txt
python/paddle/v2/framework/tests/CMakeLists.txt
+0
-1
python/paddle/v2/framework/tests/op_test_util.py
python/paddle/v2/framework/tests/op_test_util.py
+32
-27
python/paddle/v2/framework/tests/test_add_two_op.py
python/paddle/v2/framework/tests/test_add_two_op.py
+16
-3
python/paddle/v2/framework/tests/test_fc_op.py
python/paddle/v2/framework/tests/test_fc_op.py
+6
-4
python/paddle/v2/framework/tests/test_mul_op.py
python/paddle/v2/framework/tests/test_mul_op.py
+2
-2
python/paddle/v2/framework/tests/test_rowwise_add_op.py
python/paddle/v2/framework/tests/test_rowwise_add_op.py
+2
-2
python/paddle/v2/framework/tests/test_sgd_op.py
python/paddle/v2/framework/tests/test_sgd_op.py
+2
-2
python/paddle/v2/framework/tests/test_tensor.py
python/paddle/v2/framework/tests/test_tensor.py
+8
-5
未找到文件。
Dockerfile
浏览文件 @
fbcadb75
...
...
@@ -27,7 +27,7 @@ RUN apt-get update && \
git python-pip python-dev openssh-server bison
\
wget unzip unrar
tar
xz-utils bzip2
gzip
coreutils ntp
\
curl
sed grep
graphviz libjpeg-dev zlib1g-dev
\
python-numpy python-matplotlib gcc
g++
\
python-numpy python-matplotlib gcc
-4.8 g++-4.8
\
automake locales clang-format-3.8 swig doxygen cmake
\
liblapack-dev liblapacke-dev libboost-dev
\
clang-3.8 llvm-3.8 libclang-3.8-dev
\
...
...
cmake/flags.cmake
浏览文件 @
fbcadb75
...
...
@@ -9,6 +9,11 @@ function(CheckCompilerCXX11Flag)
if
(
${
CMAKE_CXX_COMPILER_VERSION
}
VERSION_LESS 4.8
)
message
(
FATAL_ERROR
"Unsupported GCC version. GCC >= 4.8 required."
)
endif
()
# TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem.
# Use Debug mode instead for now.
if
(
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9
)
set
(
CMAKE_BUILD_TYPE
"Debug"
CACHE STRING
""
FORCE
)
endif
()
elseif
(
CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
)
# cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang"
# Apple Clang is a different compiler than upstream Clang which havs different version numbers.
...
...
doc/api/v2/config/layer.rst
浏览文件 @
fbcadb75
...
...
@@ -105,6 +105,11 @@ cross_channel_norm
.. autoclass:: paddle.v2.layer.cross_channel_norm
:noindex:
row_l2_norm
-----------
.. autoclass:: paddle.v2.layer.row_l2_norm
:noindex:
Recurrent Layers
================
...
...
@@ -320,6 +325,11 @@ scaling
.. autoclass:: paddle.v2.layer.scaling
:noindex:
clip
----
.. autoclass:: paddle.v2.layer.clip
:noindex:
slope_intercept
---------------
.. autoclass:: paddle.v2.layer.slope_intercept
...
...
paddle/cuda/src/hl_cuda_cudnn.cc
浏览文件 @
fbcadb75
...
...
@@ -1022,6 +1022,15 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real
alpha
=
1.0
f
;
real
beta
=
1.0
f
;
cudnnBatchNormMode_t
mode
=
CUDNN_BATCHNORM_SPATIAL
;
int
batch_size
=
((
cudnn_tensor_descriptor
)
inputDesc
)
->
batch_size
;
if
(
batch_size
>
1024
&&
g_cudnn_lib_version
<
6000
)
{
LOG
(
INFO
)
<<
" To process current batch data with size "
<<
batch_size
<<
" (>1024), cudnnBatchNorm requires cuDNN version >= 6000."
<<
" If there is an error complaining CUDNN_STATUS_NOT_SUPPORTED,"
<<
" just recompile PaddlePaddle with cuDNN >= 6000, replacing"
<<
" current version "
<<
g_cudnn_lib_version
;
}
CHECK_CUDNN
(
dynload
::
cudnnBatchNormalizationForwardInference
(
t_resource
.
cudnn_handle
,
mode
,
...
...
paddle/framework/detail/tensor-inl.h
浏览文件 @
fbcadb75
...
...
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/memory/memcpy.h"
namespace
paddle
{
...
...
@@ -62,9 +61,11 @@ inline T* Tensor::mutable_data(platform::Place place) {
if
(
platform
::
is_cpu_place
(
place
))
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
#ifdef PADDLE_ONLY_CPU
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
}
#ifndef PADDLE_ONLY_CPU
else
if
(
platform
::
is_gpu_place
(
place
))
{
#else
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
GPUPlace
>
(
boost
::
get
<
platform
::
GPUPlace
>
(
place
),
size
));
}
...
...
paddle/framework/op_registry.h
浏览文件 @
fbcadb75
...
...
@@ -400,6 +400,14 @@ class GradOpRegisterHelper {
return 0; \
}
/**
* Macro to Forbid user register Gradient Operator.
*/
#define NO_GRADIENT(__op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_gradient_op__##__op_type##__op_type##_grad, \
"NO_GRADIENT must be in global namespace")
/**
* Macro to Register OperatorKernel.
*/
...
...
paddle/framework/operator.cc
浏览文件 @
fbcadb75
...
...
@@ -20,16 +20,16 @@ namespace paddle {
namespace
framework
{
template
<
>
Eigen
::
DefaultDevice
*
ExecutionContext
::
GetEigenDevice
<
Eigen
::
DefaultDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
CPUPlace
,
Eigen
::
DefaultDevice
>
()
const
{
return
device_context_
.
get_eigen_device
<
Eigen
::
DefaultDevice
>
();
return
*
device_context_
.
get_eigen_device
<
Eigen
::
DefaultDevice
>
();
}
#ifndef PADDLE_ONLY_CPU
template
<
>
Eigen
::
GpuDevice
*
Eigen
::
GpuDevice
&
ExecutionContext
::
GetEigenDevice
<
platform
::
GPUPlace
,
Eigen
::
GpuDevice
>
()
const
{
return
device_context_
.
get_eigen_device
<
Eigen
::
GpuDevice
>
();
return
*
device_context_
.
get_eigen_device
<
Eigen
::
GpuDevice
>
();
}
#endif
...
...
paddle/framework/operator.h
浏览文件 @
fbcadb75
...
...
@@ -253,7 +253,7 @@ class ExecutionContext : public OperatorContext {
template
<
typename
PlaceType
,
typename
DeviceType
=
typename
EigenDeviceConverter
<
PlaceType
>::
EigenDeviceType
>
DeviceType
*
GetEigenDevice
()
const
;
DeviceType
&
GetEigenDevice
()
const
;
platform
::
Place
GetPlace
()
const
{
return
device_context_
.
GetPlace
();
}
...
...
paddle/function/ConvOp.h
浏览文件 @
fbcadb75
...
...
@@ -109,6 +109,13 @@ protected:
return
filter
[
filter
.
ndims
()
-
1
];
}
// determine whether im2col needs to be performed
inline
bool
isNeedIm2col
(
const
TensorShape
&
filter
)
const
{
return
!
(
getFilterHeight
(
filter
)
==
1
&&
getFilterWidth
(
filter
)
==
1
&&
strideH
()
==
1
&&
strideW
()
==
1
&&
paddingH
()
==
0
&&
paddingW
()
==
0
);
}
std
::
vector
<
size_t
>
strides_
;
std
::
vector
<
size_t
>
paddings_
;
...
...
paddle/function/GemmConvOp.cpp
浏览文件 @
fbcadb75
...
...
@@ -66,16 +66,23 @@ public:
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
=
TensorShape
({
inputChannels
/
groups_
,
TensorShape
colShape
;
real
*
colData
=
NULL
;
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
Im2ColFunctor
<
kCFO
,
Device
,
real
>
im2col
;
GemmFunctor
<
Device
,
real
>
gemm
;
...
...
@@ -86,6 +93,7 @@ public:
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
if
(
needIm2col
)
{
im2col
(
inputData
+
g
*
inputOffset
,
imShape
,
colData
,
...
...
@@ -94,7 +102,9 @@ public:
strideW
(),
paddingH
(),
paddingW
());
}
else
{
colData
=
inputData
+
g
*
inputOffset
;
}
int
M
=
outputChannels
/
groups_
;
int
N
=
outputHeight
*
outputWidth
;
int
K
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
...
...
@@ -159,19 +169,27 @@ public:
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
inputGrad
=
outputs
[
0
].
data
<
real
>
();
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
=
TensorShape
({
inputChannels
/
groups_
,
TensorShape
colShape
;
real
*
colData
=
NULL
;
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
Col2ImFunctor
<
kCFO
,
Device
,
real
>
col2im
;
GemmFunctor
<
Device
,
real
>
gemm
;
size_t
inputOffset
=
imShape
.
getElements
();
size_t
outputOffset
=
(
outputChannels
/
groups_
)
*
outputHeight
*
outputWidth
;
...
...
@@ -182,6 +200,11 @@ public:
int
K
=
outputChannels
/
groups_
;
int
N
=
outputHeight
*
outputWidth
;
int
M
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
real
scale
=
0.0
f
;
if
(
!
needIm2col
)
{
colData
=
inputGrad
+
g
*
inputOffset
;
scale
=
1.0
f
;
}
gemm
(
CblasTrans
,
CblasNoTrans
,
M
,
...
...
@@ -192,9 +215,10 @@ public:
M
,
outputGrad
+
g
*
outputOffset
,
N
,
0.0
f
,
scale
,
colData
,
N
);
if
(
needIm2col
)
{
col2im
(
inputGrad
+
g
*
inputOffset
,
imShape
,
colData
,
...
...
@@ -204,6 +228,7 @@ public:
paddingH
(),
paddingW
());
}
}
inputGrad
+=
inputChannels
*
inputHeight
*
inputWidth
;
outputGrad
+=
outputChannels
*
outputHeight
*
outputWidth
;
}
...
...
@@ -255,16 +280,23 @@ public:
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
inputData
=
inputs
[
1
].
data
<
real
>
();
real
*
filterGrad
=
outputs
[
0
].
data
<
real
>
();
bool
needIm2col
=
isNeedIm2col
(
filter
);
TensorShape
imShape
=
TensorShape
({
inputChannels
/
groups_
,
inputHeight
,
inputWidth
});
TensorShape
colShape
=
TensorShape
({
inputChannels
/
groups_
,
TensorShape
colShape
;
real
*
colData
=
NULL
;
if
(
needIm2col
)
{
colShape
=
TensorShape
({
inputChannels
/
groups_
,
filterHeight
,
filterWidth
,
outputHeight
,
outputWidth
});
resizeBuffer
<
Device
>
(
colShape
.
getElements
());
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
}
Im2ColFunctor
<
kCFO
,
Device
,
real
>
im2col
;
GemmFunctor
<
Device
,
real
>
gemm
;
...
...
@@ -274,6 +306,7 @@ public:
size_t
filterOffset
=
filter
.
getElements
()
/
groups_
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
for
(
size_t
g
=
0
;
g
<
groups_
;
g
++
)
{
if
(
needIm2col
)
{
im2col
(
inputData
+
g
*
inputOffset
,
imShape
,
colData
,
...
...
@@ -282,7 +315,9 @@ public:
strideW
(),
paddingH
(),
paddingW
());
}
else
{
colData
=
inputData
+
g
*
inputOffset
;
}
int
M
=
outputChannels
/
groups_
;
int
K
=
outputHeight
*
outputWidth
;
int
N
=
inputChannels
/
groups_
*
filterHeight
*
filterWidth
;
...
...
paddle/gserver/layers/ClipLayer.cpp
0 → 100644
浏览文件 @
fbcadb75
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace
paddle
{
/**
* A layer for clipping the input value by the threshold.
* \f[
* out[i] = \min\left(\max\left(in[i],p_{1}\right),p_{2}\right)
* \f]
*/
class
ClipLayer
:
public
Layer
{
protected:
double
min_
;
double
max_
;
public:
explicit
ClipLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
;
};
REGISTER_LAYER
(
clip
,
ClipLayer
);
bool
ClipLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
inputLayers_
.
size
(),
1U
);
auto
layerConf
=
config_
.
inputs
(
0
).
clip_conf
();
min_
=
layerConf
.
min
();
max_
=
layerConf
.
max
();
CHECK_LT
(
min_
,
max_
);
return
true
;
}
void
ClipLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
MatrixPtr
inV
=
getInputValue
(
0
);
resetOutput
(
inV
->
getHeight
(),
inV
->
getWidth
());
MatrixPtr
outV
=
getOutputValue
();
outV
->
copyFrom
(
*
inV
);
outV
->
clip
(
min_
,
max_
);
}
void
ClipLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
MatrixPtr
inV
=
getInputValue
(
0
);
MatrixPtr
inG
=
getInputGrad
(
0
);
if
(
inG
)
{
MatrixPtr
outV
=
getOutputValue
();
MatrixPtr
outG
=
getOutputGrad
();
MatrixPtr
tmpMtx
;
Matrix
::
resizeOrCreate
(
tmpMtx
,
outG
->
getHeight
(),
outG
->
getWidth
(),
false
,
useGpu_
);
tmpMtx
->
clipDerivative
(
*
inV
,
min_
,
max_
);
inG
->
addDotMul
(
*
outG
,
*
tmpMtx
,
1
,
1
);
}
}
}
// namespace paddle
paddle/gserver/layers/RowL2NormLayer.cpp
0 → 100644
浏览文件 @
fbcadb75
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace
paddle
{
/**
* A layer for L2 normalization in each row,
* \f[
* out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}}
* \f]
* where the size of \f$in\f$ is (batchSize x dataDim),
* and the size of \f$out\f$ is (batchSize x dataDim).
*/
class
RowL2NormLayer
:
public
Layer
{
protected:
MatrixPtr
inSquare_
;
MatrixPtr
l2NormReciprocal_
;
MatrixPtr
dotSum_
;
public:
explicit
RowL2NormLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
;
};
REGISTER_LAYER
(
row_l2_norm
,
RowL2NormLayer
);
bool
RowL2NormLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
inputLayers_
.
size
(),
1U
);
return
true
;
}
void
RowL2NormLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
MatrixPtr
inV
=
getInputValue
(
0
);
/* malloc memory for the output_ if necessary */
size_t
batchSize
=
inV
->
getHeight
();
size_t
dataDim
=
getSize
();
CHECK_EQ
(
dataDim
,
inV
->
getWidth
());
resetOutput
(
batchSize
,
dataDim
);
MatrixPtr
outV
=
getOutputValue
();
Matrix
::
resizeOrCreate
(
inSquare_
,
batchSize
,
dataDim
,
false
,
useGpu_
);
inV
->
square2
(
*
inSquare_
);
Matrix
::
resizeOrCreate
(
l2NormReciprocal_
,
batchSize
,
1
,
false
,
useGpu_
);
inSquare_
->
rowSum
(
*
l2NormReciprocal_
);
l2NormReciprocal_
->
sqrt2
(
*
l2NormReciprocal_
);
l2NormReciprocal_
->
scalarDiv
(
*
l2NormReciprocal_
,
1.0
);
outV
->
rowScale
(
0
,
*
inV
,
*
l2NormReciprocal_
);
}
void
RowL2NormLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
MatrixPtr
inV
=
getInputValue
(
0
);
MatrixPtr
inG
=
getInputGrad
(
0
);
MatrixPtr
outV
=
getOutputValue
();
MatrixPtr
outG
=
getOutputGrad
();
size_t
batchSize
=
inV
->
getHeight
();
// inG[ij] += outG[ij] / l2NormReciprocal
// inG[ij] += -inV[ij] * l2NormReciprocal * l2NormReciprocal * DotMul(outG[i],
// inV[i])
if
(
inG
)
{
Matrix
::
resizeOrCreate
(
dotSum_
,
batchSize
,
1
,
false
,
useGpu_
);
dotSum_
->
zeroMem
();
dotSum_
->
rowDotMul
(
0
,
*
outG
,
*
outV
);
dotSum_
->
dotMul
(
*
dotSum_
,
*
l2NormReciprocal_
);
dotSum_
->
dotMul
(
*
dotSum_
,
*
l2NormReciprocal_
);
inSquare_
->
rowScale
(
0
,
*
inV
,
*
dotSum_
);
inG
->
sub
(
*
inSquare_
);
inG
->
addRowScale
(
0
,
*
outG
,
*
l2NormReciprocal_
);
}
}
}
// namespace paddle
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
fbcadb75
...
...
@@ -1899,6 +1899,36 @@ TEST(Layer, CropLayer) {
}
}
TEST
(
Layer
,
ClipLayer
)
{
const
size_t
batchSize
=
128
;
const
size_t
size
=
512
;
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"clip"
);
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"input"
,
size
,
0
});
LayerInputConfig
*
input
=
config
.
layerConfig
.
add_inputs
();
ClipConfig
*
layerConf
=
input
->
mutable_clip_conf
();
double
p1
=
std
::
rand
()
/
(
double
)
RAND_MAX
;
double
p2
=
std
::
rand
()
/
(
double
)
RAND_MAX
;
layerConf
->
set_min
(
std
::
min
(
p1
,
p2
));
layerConf
->
set_max
(
std
::
max
(
p1
,
p2
));
for
(
auto
useGpu
:
{
false
,
true
})
{
testLayerGrad
(
config
,
"clip"
,
batchSize
,
false
,
useGpu
,
false
);
}
}
TEST
(
Layer
,
RowL2NormLayer
)
{
const
size_t
batchSize
=
128
;
const
size_t
size
=
512
;
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"row_l2_norm"
);
config
.
layerConfig
.
set_size
(
size
);
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"input"
,
size
,
0
});
config
.
layerConfig
.
add_inputs
();
for
(
auto
useGpu
:
{
false
,
true
})
{
testLayerGrad
(
config
,
"row_l2_norm"
,
batchSize
,
false
,
useGpu
,
false
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
initMain
(
argc
,
argv
);
...
...
paddle/math/BaseMatrix.cu
浏览文件 @
fbcadb75
...
...
@@ -442,6 +442,12 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER,
template
<
class
T
>
void
BaseMatrixT
<
T
>::
clip
(
T
p1
,
T
p2
)
{
applyUnary
(
unary
::
Clip
<
T
>
(
p1
,
p2
));
}
DEFINE_MATRIX_BINARY_PARAMETER_OP
(
ClipDerivative
,
TWO_PARAMETER
,
a
=
b
<
p1
?
0
:
(
b
>
p2
?
0
:
1
));
template
<
class
T
>
void
BaseMatrixT
<
T
>::
clipDerivative
(
BaseMatrixT
&
b
,
T
p1
,
T
p2
)
{
applyBinary
(
binary
::
ClipDerivative
<
T
>
(
p1
,
p2
),
b
);
}
DEFINE_MATRIX_UNARY_PARAMETER_OP
(
BiggerThanScalar
,
ONE_PARAMETER
,
a
=
a
>
p
?
1.0
f
:
0.0
f
);
template
<
class
T
>
...
...
paddle/math/BaseMatrix.h
浏览文件 @
fbcadb75
...
...
@@ -488,6 +488,13 @@ public:
*/
void
clip
(
T
p1
,
T
p2
);
/**
* this = b < low ? 0 : 1
*
* this = b > high ? 0 : 1
*/
void
clipDerivative
(
BaseMatrixT
&
b
,
T
p1
,
T
p2
);
/**
* @code
* a = a > p ? 1.0f : 0.0f
...
...
paddle/operators/add_op.cc
浏览文件 @
fbcadb75
...
...
@@ -50,10 +50,6 @@ The equation is: Out = X + Y
class
AddOpGrad
:
public
OperatorWithKernel
{
protected:
void
InferShape
(
const
InferShapeContext
&
ctx
)
const
override
{}
std
::
string
DebugString
()
const
override
{
LOG
(
INFO
)
<<
"AddOpGrad"
;
return
""
;
}
};
}
// namespace operators
...
...
paddle/operators/add_op.cu
浏览文件 @
fbcadb75
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h"
#include "paddle/operators/add_op.h"
...
...
paddle/operators/add_op.h
浏览文件 @
fbcadb75
...
...
@@ -28,10 +28,13 @@ public:
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenVector
<
T
>::
Flatten
(
*
output
).
device
(
*
(
context
.
GetEigenDevice
<
Place
>
()))
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
input0
)
+
framework
::
EigenVector
<
T
>::
Flatten
(
*
input1
);
auto
X
=
EigenVector
<
T
>::
Flatten
(
*
input0
);
auto
Y
=
EigenVector
<
T
>::
Flatten
(
*
input1
);
auto
Z
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
Z
.
device
(
place
)
=
X
+
Y
;
}
};
...
...
paddle/operators/cross_entropy_op.cu
浏览文件 @
fbcadb75
#define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h"
REGISTER_OP_GPU_KERNEL
(
onehot_cross_entropy
,
...
...
paddle/operators/mean_op.h
浏览文件 @
fbcadb75
...
...
@@ -27,8 +27,11 @@ public:
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenScalar
<
T
>::
From
(
*
output
).
device
(
*
(
context
.
GetEigenDevice
<
Place
>
()))
=
EigenVector
<
T
>::
Flatten
(
*
input
).
mean
();
auto
X
=
EigenVector
<
T
>::
Flatten
(
*
input
);
auto
y
=
EigenScalar
<
T
>::
From
(
*
output
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
y
.
device
(
place
)
=
X
.
mean
();
}
};
...
...
paddle/operators/mul_op.cu
浏览文件 @
fbcadb75
...
...
@@ -12,6 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/mul_op.h"
REGISTER_OP_GPU_KERNEL
(
mul
,
ops
::
MulKernel
<
ops
::
GPUPlace
,
float
>
);
\ No newline at end of file
paddle/operators/mul_op.h
浏览文件 @
fbcadb75
...
...
@@ -26,13 +26,18 @@ public:
Eigen
::
array
<
Eigen
::
IndexPair
<
Eigen
::
DenseIndex
>
,
1
>
dim_pair
=
{
{
Eigen
::
IndexPair
<
Eigen
::
DenseIndex
>
(
1
,
0
)}};
auto
input0
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
input1
=
context
.
Input
<
Tensor
>
(
"Y"
);
auto
output
=
context
.
Output
<
Tensor
>
(
0
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenMatrix
<
T
>::
From
(
*
output
).
device
(
*
(
context
.
GetEigenDevice
<
Place
>
()))
=
EigenMatrix
<
T
>::
From
(
*
context
.
Input
<
Tensor
>
(
"X"
))
.
contract
(
EigenMatrix
<
T
>::
From
(
*
context
.
Input
<
Tensor
>
(
"Y"
)),
dim_pair
);
auto
X
=
EigenMatrix
<
T
>::
From
(
*
input0
);
auto
Y
=
EigenMatrix
<
T
>::
From
(
*
input1
);
auto
Z
=
EigenMatrix
<
T
>::
From
(
*
output
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
Z
.
device
(
place
)
=
X
.
contract
(
Y
,
dim_pair
);
}
};
}
// namespace operators
...
...
paddle/operators/rowwise_add_op.cu
浏览文件 @
fbcadb75
#define EIGEN_USE_GPU
#include "paddle/operators/rowwise_add_op.h"
REGISTER_OP_GPU_KERNEL
(
rowwise_add
,
...
...
paddle/operators/rowwise_add_op.h
浏览文件 @
fbcadb75
...
...
@@ -33,7 +33,7 @@ public:
const
int
rest_size
=
input
.
size
()
/
bias_size
;
Eigen
::
DSizes
<
int
,
1
>
one_d
(
input
.
size
());
Eigen
::
DSizes
<
int
,
1
>
bcast
(
rest_size
);
output
.
reshape
(
one_d
).
device
(
*
(
context
.
GetEigenDevice
<
Place
>
()
))
=
output
.
reshape
(
one_d
).
device
(
context
.
GetEigenDevice
<
Place
>
(
))
=
input
.
reshape
(
one_d
)
+
bias
.
broadcast
(
bcast
).
reshape
(
one_d
);
}
};
...
...
paddle/operators/sgd_op.cu
浏览文件 @
fbcadb75
#define EIGEN_USE_GPU
#include "paddle/operators/sgd_op.h"
REGISTER_OP_GPU_KERNEL
(
sgd
,
ops
::
SGDOpKernel
<
ops
::
GPUPlace
,
float
>
);
\ No newline at end of file
paddle/operators/sgd_op.h
浏览文件 @
fbcadb75
...
...
@@ -29,8 +29,12 @@ public:
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
EigenVector
<
T
>::
Flatten
(
*
param_out
).
device
(
*
(
ctx
.
GetEigenDevice
<
Place
>
()))
=
EigenVector
<
T
>::
Flatten
(
*
param
)
-
lr
*
EigenVector
<
T
>::
Flatten
(
*
grad
);
auto
p
=
EigenVector
<
T
>::
Flatten
(
*
param
);
auto
g
=
EigenVector
<
T
>::
Flatten
(
*
grad
);
auto
o
=
EigenVector
<
T
>::
Flatten
(
*
param_out
);
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
o
.
device
(
place
)
=
p
-
lr
*
g
;
}
};
...
...
paddle/operators/sigmoid_op.cu
浏览文件 @
fbcadb75
#define EIGEN_USE_GPU
#include "paddle/operators/sigmoid_op.h"
REGISTER_OP_GPU_KERNEL
(
sigmoid
,
ops
::
SigmoidKernel
<
ops
::
GPUPlace
,
float
>
);
paddle/operators/sigmoid_op.h
浏览文件 @
fbcadb75
...
...
@@ -27,9 +27,11 @@ public:
auto
output
=
context
.
Output
<
Tensor
>
(
0
);
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
EigenVector
<
T
>::
Flatten
(
*
output
).
device
(
*
(
context
.
GetEigenDevice
<
Place
>
()))
=
1.0
/
(
1.0
+
(
-
1.0
*
EigenVector
<
T
>::
Flatten
(
*
input
)).
exp
());
auto
X
=
EigenVector
<
T
>::
Flatten
(
*
input
);
auto
Y
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
Y
.
device
(
place
)
=
1.0
/
(
1.0
+
(
-
1.0
*
X
).
exp
());
}
};
}
// namespace operators
...
...
paddle/operators/softmax_op.cu
浏览文件 @
fbcadb75
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h"
#include "paddle/operators/softmax_op.h"
...
...
paddle/operators/softmax_op.h
浏览文件 @
fbcadb75
...
...
@@ -46,9 +46,9 @@ public:
.
reshape
(
batch_by_one
)
.
broadcast
(
one_by_class
));
softmax
.
device
(
*
(
context
.
GetEigenDevice
<
Place
>
()
))
=
shifted_logits
.
exp
();
softmax
.
device
(
context
.
GetEigenDevice
<
Place
>
(
))
=
shifted_logits
.
exp
();
softmax
.
device
(
*
(
context
.
GetEigenDevice
<
Place
>
()
))
=
softmax
.
device
(
context
.
GetEigenDevice
<
Place
>
(
))
=
(
softmax
*
softmax
.
sum
(
along_class
)
.
inverse
()
...
...
paddle/platform/enforce.h
浏览文件 @
fbcadb75
...
...
@@ -148,7 +148,7 @@ inline void throw_on_error(T e) {
do { \
throw ::paddle::platform::EnforceNotMet( \
std::make_exception_ptr( \
std::runtime_error(string::Sprintf(__VA_ARGS__))), \
std::runtime_error(
paddle::
string::Sprintf(__VA_ARGS__))), \
__FILE__, __LINE__); \
} while (0)
...
...
paddle/pybind/CMakeLists.txt
浏览文件 @
fbcadb75
cc_library
(
paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python
DEPS pybind python
backward
fc_op
sgd_op
add_op
...
...
paddle/pybind/pybind.cc
浏览文件 @
fbcadb75
...
...
@@ -16,10 +16,13 @@ limitations under the License. */
#include <fstream>
#include <vector>
#include "paddle/framework/backward.h"
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/scope.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#include "paddle/pybind/tensor_bind.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
...
...
@@ -43,6 +46,10 @@ template <typename ClassType>
void
ExposeOperator
(
ClassType
&
m
)
{
m
.
def
(
"infer_shape"
,
&
ClassType
::
type
::
InferShape
)
.
def
(
"run"
,
&
ClassType
::
type
::
Run
)
.
def
(
"type"
,
[](
const
typename
ClassType
::
type
&
op
)
->
std
::
string
{
return
op
.
type_
;
})
.
def
(
"outputs"
,
[](
const
typename
ClassType
::
type
&
op
)
->
std
::
vector
<
std
::
string
>
{
return
op
.
outputs_
;
...
...
@@ -55,6 +62,14 @@ static size_t UniqueIntegerGenerator() {
return
generator
.
fetch_add
(
1
);
}
bool
IsCompileGPU
()
{
#ifdef PADDLE_ONLY_CPU
return
false
;
#else
return
true
;
#endif
}
PYBIND11_PLUGIN
(
core
)
{
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
);
...
...
@@ -69,15 +84,27 @@ PYBIND11_PLUGIN(core) {
self
.
Resize
(
pd
::
make_ddim
(
dim
));
})
.
def
(
"alloc_float"
,
[](
pd
::
Tensor
&
self
)
{
self
.
mutable_data
<
float
>
(
paddle
::
platform
::
CPUPlace
());
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
GPUPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
})
.
def
(
"alloc_float"
,
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
CPUPlace
&
place
)
{
self
.
mutable_data
<
float
>
(
place
);
})
.
def
(
"alloc_int"
,
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
CPUPlace
&
place
)
{
self
.
mutable_data
<
int
>
(
place
);
})
.
def
(
"alloc_int"
,
[](
pd
::
Tensor
&
self
)
{
self
.
mutable_data
<
int
>
(
p
addle
::
platform
::
CPUPlace
()
);
[](
pd
::
Tensor
&
self
,
paddle
::
platform
::
GPUPlace
&
place
)
{
self
.
mutable_data
<
int
>
(
p
lace
);
})
.
def
(
"set"
,
paddle
::
pybind
::
PyTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
paddle
::
pybind
::
PyTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
paddle
::
pybind
::
PyCPUTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
paddle
::
pybind
::
PyCPUTensorSetFromArray
<
int
>
)
#ifndef PADDLE_ONLY_CPU
.
def
(
"set"
,
paddle
::
pybind
::
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
paddle
::
pybind
::
PyCUDATensorSetFromArray
<
int
>
)
#endif
.
def
(
"shape"
,
[](
pd
::
Tensor
&
self
)
{
return
pd
::
vectorize
(
self
.
dims
());
})
.
def
(
"set_float_element"
,
[](
pd
::
Tensor
&
self
,
size_t
offset
,
float
f
)
{
...
...
@@ -144,11 +171,27 @@ All parameter, weight, gradient are variables in Paddle.
"The module will return special predefined variable name in Paddle"
)
.
def
(
"empty"
,
pd
::
OperatorBase
::
EMPTY_VAR_NAME
)
.
def
(
"temp"
,
pd
::
OperatorBase
::
TMP_VAR_NAME
);
// clang-format off
py
::
class_
<
paddle
::
platform
::
DeviceContext
>
(
m
,
"DeviceContext"
)
.
def_static
(
"cpu_context"
,
[]()
->
paddle
::
platform
::
DeviceContext
*
{
.
def_static
(
"create"
,
[](
paddle
::
platform
::
CPUPlace
&
place
)
->
paddle
::
platform
::
DeviceContext
*
{
return
new
paddle
::
platform
::
CPUDeviceContext
();
})
.
def_static
(
"create"
,
[](
paddle
::
platform
::
GPUPlace
&
place
)
->
paddle
::
platform
::
DeviceContext
*
{
#ifdef PADDLE_ONLY_CPU
PADDLE_THROW
(
"GPUPlace is not supported in CPU device."
);
#else
return
new
paddle
::
platform
::
CUDADeviceContext
(
place
);
#endif
});
// clang-format on
py
::
class_
<
paddle
::
platform
::
GPUPlace
>
(
m
,
"GPUPlace"
).
def
(
py
::
init
<
int
>
());
py
::
class_
<
paddle
::
platform
::
CPUPlace
>
(
m
,
"CPUPlace"
).
def
(
py
::
init
<>
());
py
::
class_
<
pd
::
OperatorBase
,
std
::
shared_ptr
<
pd
::
OperatorBase
>>
operator_base
(
m
,
"Operator"
);
...
...
@@ -162,6 +205,13 @@ All parameter, weight, gradient are variables in Paddle.
desc
.
InitializationErrorString
());
return
pd
::
OpRegistry
::
CreateOp
(
desc
);
});
operator_base
.
def
(
"backward"
,
[](
const
pd
::
OperatorBase
&
forwardOp
,
const
std
::
unordered_set
<
std
::
string
>&
no_grad_vars
)
{
return
pd
::
Backward
(
forwardOp
,
no_grad_vars
);
});
ExposeOperator
(
operator_base
);
py
::
class_
<
pd
::
NetOp
,
std
::
shared_ptr
<
pd
::
NetOp
>>
net
(
m
,
"Net"
);
...
...
@@ -184,5 +234,7 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"unique_integer"
,
UniqueIntegerGenerator
);
m
.
def
(
"is_compile_gpu"
,
IsCompileGPU
);
return
m
.
ptr
();
}
paddle/pybind/tensor_bind.h
浏览文件 @
fbcadb75
...
...
@@ -13,9 +13,11 @@
limitations under the License. */
#pragma once
#include <paddle/framework/tensor.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <string>
#include "paddle/framework/tensor.h"
#include "paddle/memory/memcpy.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
namespace
py
=
pybind11
;
...
...
@@ -40,9 +42,6 @@ template <size_t I, typename... ARGS>
struct
CastToPyBufferImpl
<
true
,
I
,
ARGS
...
>
{
using
CUR_TYPE
=
typename
std
::
tuple_element
<
I
,
std
::
tuple
<
ARGS
...
>>::
type
;
py
::
buffer_info
operator
()(
framework
::
Tensor
&
tensor
)
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
tensor
.
holder_
->
place
()),
"Only CPU tensor can cast to numpy array"
);
if
(
std
::
type_index
(
typeid
(
CUR_TYPE
))
==
tensor
.
holder_
->
type
())
{
auto
dim_vec
=
framework
::
vectorize
(
tensor
.
dims
());
std
::
vector
<
size_t
>
dims_outside
;
...
...
@@ -56,12 +55,17 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
strides
[
i
-
1
]
=
sizeof
(
CUR_TYPE
)
*
prod
;
prod
*=
dims_outside
[
i
-
1
];
}
framework
::
Tensor
dst_tensor
;
if
(
paddle
::
platform
::
is_gpu_place
(
tensor
.
holder_
->
place
()))
{
dst_tensor
.
CopyFrom
<
CUR_TYPE
>
(
tensor
,
platform
::
CPUPlace
());
}
else
if
(
paddle
::
platform
::
is_cpu_place
(
tensor
.
holder_
->
place
()))
{
dst_tensor
=
tensor
;
}
return
py
::
buffer_info
(
tensor
.
mutable_data
<
CUR_TYPE
>
(
tensor
.
holder_
->
place
()),
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
dst_
tensor
.
holder_
->
place
()),
sizeof
(
CUR_TYPE
),
py
::
format_descriptor
<
CUR_TYPE
>::
format
(),
(
size_t
)
framework
::
arity
(
tensor
.
dims
()),
(
size_t
)
framework
::
arity
(
dst_
tensor
.
dims
()),
dims_outside
,
strides
);
}
else
{
...
...
@@ -77,9 +81,10 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
}
template
<
typename
T
>
void
PyTensorSetFromArray
(
void
Py
CPU
TensorSetFromArray
(
framework
::
Tensor
&
self
,
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
)
{
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
paddle
::
platform
::
CPUPlace
&
place
)
{
std
::
vector
<
int
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
...
...
@@ -87,9 +92,28 @@ void PyTensorSetFromArray(
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
T
>
(
p
addle
::
platform
::
CPUPlace
()
);
auto
*
dst
=
self
.
mutable_data
<
T
>
(
p
lace
);
std
::
memcpy
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
());
}
#ifndef PADDLE_ONLY_CPU
template
<
typename
T
>
void
PyCUDATensorSetFromArray
(
framework
::
Tensor
&
self
,
py
::
array_t
<
T
,
py
::
array
::
c_style
|
py
::
array
::
forcecast
>
array
,
paddle
::
platform
::
GPUPlace
&
place
)
{
std
::
vector
<
int
>
dims
;
dims
.
reserve
(
array
.
ndim
());
for
(
size_t
i
=
0
;
i
<
array
.
ndim
();
++
i
)
{
dims
.
push_back
((
int
)
array
.
shape
()[
i
]);
}
self
.
Resize
(
framework
::
make_ddim
(
dims
));
auto
*
dst
=
self
.
mutable_data
<
T
>
(
place
);
paddle
::
platform
::
GpuMemcpySync
(
dst
,
array
.
data
(),
sizeof
(
T
)
*
array
.
size
(),
cudaMemcpyHostToDevice
);
}
#endif
}
// namespace pybind
}
// namespace paddle
paddle/scripts/docker/build.sh
浏览文件 @
fbcadb75
...
...
@@ -148,7 +148,7 @@ cat >> /paddle/build/Dockerfile <<EOF
ADD *.deb /
# run paddle version to install python packages first
RUN apt-get update &&
\
apt-get install -y python-pip && pip install -U pip &&
\
apt-get install -y
wget
python-pip && pip install -U pip &&
\
dpkg -i /*.deb ; apt-get install -f -y &&
\
apt-get clean -y &&
\
rm -f /*.deb &&
\
...
...
proto/ModelConfig.proto
浏览文件 @
fbcadb75
...
...
@@ -298,6 +298,11 @@ message DetectionOutputConfig {
optional
uint32
width
=
9
[
default
=
1
];
}
message
ClipConfig
{
required
double
min
=
1
;
required
double
max
=
2
;
}
message
LayerInputConfig
{
required
string
input_layer_name
=
1
;
optional
string
input_parameter_name
=
2
;
...
...
@@ -318,6 +323,7 @@ message LayerInputConfig {
optional
RowConvConfig
row_conv_conf
=
15
;
optional
MultiBoxLossConfig
multibox_loss_conf
=
16
;
optional
DetectionOutputConfig
detection_output_conf
=
17
;
optional
ClipConfig
clip_conf
=
18
;
}
message
LayerConfig
{
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
fbcadb75
...
...
@@ -2198,6 +2198,20 @@ class RowConvLayer(LayerBase):
self
.
create_input_parameter
(
0
,
psize
,
dims
)
@
config_layer
(
'clip'
)
class
ClipLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
min
,
max
,
**
xargs
):
super
(
ClipLayer
,
self
).
__init__
(
name
,
'clip'
,
0
,
inputs
=
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'ClipLayer must have one and only one input.'
)
config_assert
(
min
<
max
,
'min must be less than max.'
)
input_layer
=
self
.
get_input_layer
(
0
)
self
.
set_layer_size
(
input_layer
.
size
)
self
.
config
.
inputs
[
0
].
clip_conf
.
min
=
min
self
.
config
.
inputs
[
0
].
clip_conf
.
max
=
max
# key: cost type
# value: cost class
g_cost_map
=
{}
...
...
@@ -2754,6 +2768,16 @@ class SumToOneNormLayer(LayerBase):
self
.
set_layer_size
(
input_layer0
.
size
)
@
config_layer
(
'row_l2_norm'
)
class
RowL2NormLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
super
(
RowL2NormLayer
,
self
).
__init__
(
name
,
'row_l2_norm'
,
0
,
inputs
=
inputs
,
**
xargs
)
config_assert
(
len
(
self
.
inputs
)
==
1
,
'RowL2NormLayer must have 1 input'
)
input_layer
=
self
.
get_input_layer
(
0
)
self
.
set_layer_size
(
input_layer
.
size
)
@
config_layer
(
'cos_vm'
)
class
CosSimVecMatLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
inputs
,
cos_scale
=
1.0
,
device
=
None
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
fbcadb75
...
...
@@ -76,6 +76,7 @@ __all__ = [
'trans_layer'
,
'rotate_layer'
,
'sum_to_one_norm_layer'
,
'row_l2_norm_layer'
,
'get_output_layer'
,
'LayerType'
,
'context_projection'
,
...
...
@@ -128,6 +129,7 @@ __all__ = [
'prelu_layer'
,
'gated_unit_layer'
,
'crop_layer'
,
'clip_layer'
,
'slice_projection'
,
]
...
...
@@ -160,6 +162,7 @@ class LayerType(object):
BATCH_NORM_LAYER
=
'batch_norm'
NORM_LAYER
=
'norm'
SUM_TO_ONE_NORM_LAYER
=
'sum_to_one_norm'
ROW_L2_NORM_LAYER
=
'row_l2_norm'
ADDTO_LAYER
=
'addto'
CONCAT_LAYER
=
'concat'
...
...
@@ -221,6 +224,7 @@ class LayerType(object):
PRELU
=
'prelu'
CROP_LAYER
=
'crop'
CLIP_LAYER
=
'clip'
@
staticmethod
def
is_layer_type
(
type_name
):
...
...
@@ -2889,6 +2893,42 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
name
,
LayerType
.
SUM_TO_ONE_NORM_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
@
wrap_name_default
()
@
layer_support
()
def
row_l2_norm_layer
(
input
,
name
=
None
,
layer_attr
=
None
):
"""
A layer for L2-normalization in each row.
.. math::
out[i] =
\f
rac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}}
where the size of :math:`in` is (batchSize x dataDim) ,
and the size of :math:`out` is a (batchSize x dataDim) .
The example usage is:
.. code-block:: python
row_l2_norm_layer = row_l2_norm_layer(input=layer)
:param input: Input layer.
:type input: LayerOutput
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer
(
name
=
name
,
type
=
LayerType
.
ROW_L2_NORM_LAYER
,
inputs
=
[
input
.
name
],
**
ExtraAttr
.
to_kwargs
(
layer_attr
))
return
LayerOutput
(
name
,
LayerType
.
ROW_L2_NORM_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
@
wrap_name_default
(
"addto"
)
@
wrap_act_default
(
act
=
LinearActivation
())
@
wrap_bias_attr_default
(
has_bias
=
False
)
...
...
@@ -6046,3 +6086,36 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
layer_type
=
LayerType
.
CROP_LAYER
,
parents
=
input
,
size
=
l
.
config
.
size
)
@
wrap_name_default
(
"clip"
)
def
clip_layer
(
input
,
min
,
max
,
name
=
None
):
"""
A layer for clipping the input value by the threshold.
.. math::
out[i] = \min\left(\max\left(in[i],p_{1}
\r
ight),p_{2}
\r
ight)
.. code-block:: python
clip = clip_layer(input=input_layer, min=-10, max=10)
:param name: The Layer Name.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput.
:param min: The lower threshold for clipping.
:type min: double
:param max: The upper threshold for clipping.
:type max: double
:return: LayerOutput
"""
Layer
(
name
=
name
,
type
=
LayerType
.
CLIP_LAYER
,
inputs
=
[
input
.
name
],
min
=
min
,
max
=
max
)
return
LayerOutput
(
name
,
LayerType
.
CLIP_LAYER
,
parents
=
[
input
],
size
=
input
.
size
)
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
浏览文件 @
fbcadb75
...
...
@@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer
)
test_recursive_topology test_gated_unit_layer
test_clip_layer test_row_l2_norm_layer
)
export
whole_configs
=(
test_split_datasource
)
python/paddle/trainer_config_helpers/tests/configs/protostr/test_clip_layer.protostr
0 → 100644
浏览文件 @
fbcadb75
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__clip_0__"
type: "clip"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
clip_conf {
min: -10
max: 10
}
}
}
input_layer_names: "input"
output_layer_names: "__clip_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__clip_0__"
input_layer_names: "input"
output_layer_names: "__clip_0__"
is_recurrent_layer_group: false
}
python/paddle/trainer_config_helpers/tests/configs/protostr/test_row_l2_norm_layer.protostr
0 → 100644
浏览文件 @
fbcadb75
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__row_l2_norm_layer_0__"
type: "row_l2_norm"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
}
}
input_layer_names: "input"
output_layer_names: "__row_l2_norm_layer_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__row_l2_norm_layer_0__"
input_layer_names: "input"
output_layer_names: "__row_l2_norm_layer_0__"
is_recurrent_layer_group: false
}
python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
0 → 100644
浏览文件 @
fbcadb75
from
paddle.trainer_config_helpers
import
*
data
=
data_layer
(
name
=
'input'
,
size
=
300
)
clip
=
clip_layer
(
input
=
data
,
min
=-
10
,
max
=
10
)
outputs
(
clip
)
python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
0 → 100644
浏览文件 @
fbcadb75
from
paddle.trainer_config_helpers
import
*
data
=
data_layer
(
name
=
'input'
,
size
=
300
)
row_l2_norm
=
row_l2_norm_layer
(
input
=
data
)
outputs
(
row_l2_norm
)
python/paddle/v2/framework/tests/CMakeLists.txt
浏览文件 @
fbcadb75
...
...
@@ -8,7 +8,6 @@ add_python_test(test_framework
test_fc_op.py
test_add_two_op.py
test_sgd_op.py
test_cross_entropy_op.py
test_mul_op.py
test_mean_op.py
test_sigmoid_op.py
...
...
python/paddle/v2/framework/tests/op_test_util.py
浏览文件 @
fbcadb75
...
...
@@ -26,14 +26,19 @@ class OpTestMeta(type):
scope
=
core
.
Scope
()
kwargs
=
dict
()
places
=
[]
places
.
append
(
core
.
CPUPlace
())
if
core
.
is_compile_gpu
():
places
.
append
(
core
.
GPUPlace
(
0
))
for
place
in
places
:
for
in_name
in
func
.
all_input_args
:
if
hasattr
(
self
,
in_name
):
kwargs
[
in_name
]
=
in_name
var
=
scope
.
new_var
(
in_name
).
get_tensor
()
arr
=
getattr
(
self
,
in_name
)
var
.
set_dims
(
arr
.
shape
)
var
.
set
(
arr
)
var
.
set
(
arr
,
place
)
else
:
kwargs
[
in_name
]
=
"@EMPTY@"
...
...
@@ -50,7 +55,7 @@ class OpTestMeta(type):
op
.
infer_shape
(
scope
)
ctx
=
core
.
DeviceContext
.
cpu_context
(
)
ctx
=
core
.
DeviceContext
.
create
(
place
)
op
.
run
(
scope
,
ctx
)
for
out_name
in
func
.
all_output_args
:
...
...
python/paddle/v2/framework/tests/test_add_two_op.py
浏览文件 @
fbcadb75
import
unittest
from
op_test_util
import
OpTestMeta
import
numpy
import
paddle.v2.framework.core
as
core
import
paddle.v2.framework.create_op_creation_methods
as
creation
from
op_test_util
import
OpTestMeta
class
TestAddOp
(
unittest
.
TestCase
):
...
...
@@ -8,10 +12,19 @@ class TestAddOp(unittest.TestCase):
def
setUp
(
self
):
self
.
type
=
"add_two"
self
.
X
=
numpy
.
random
.
random
((
342
,
34
5
)).
astype
(
"float32"
)
self
.
Y
=
numpy
.
random
.
random
((
342
,
34
5
)).
astype
(
"float32"
)
self
.
X
=
numpy
.
random
.
random
((
102
,
10
5
)).
astype
(
"float32"
)
self
.
Y
=
numpy
.
random
.
random
((
102
,
10
5
)).
astype
(
"float32"
)
self
.
Out
=
self
.
X
+
self
.
Y
class
TestAddGradOp
(
unittest
.
TestCase
):
def
test_add_grad
(
self
):
op
=
creation
.
op_creations
.
add_two
(
X
=
"X"
,
Y
=
"Y"
,
Out
=
"Out"
)
backward_op
=
core
.
Operator
.
backward
(
op
,
set
())
self
.
assertEqual
(
backward_op
.
type
(),
"add_two_grad"
)
expected
=
'''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).'''
self
.
assertEqual
(
expected
,
str
(
backward_op
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/framework/tests/test_fc_op.py
浏览文件 @
fbcadb75
...
...
@@ -7,17 +7,19 @@ import paddle.v2.framework.create_op_creation_methods as creation
class
TestFc
(
unittest
.
TestCase
):
def
test_fc
(
self
):
scope
=
core
.
Scope
()
place
=
core
.
CPUPlace
()
x
=
scope
.
new_var
(
"X"
)
x_tensor
=
x
.
get_tensor
()
x_tensor
.
set_dims
([
1000
,
784
])
x_tensor
.
alloc_float
()
x_tensor
.
alloc_float
(
place
)
w
=
scope
.
new_var
(
"W"
)
w_tensor
=
w
.
get_tensor
()
w_tensor
.
set_dims
([
784
,
100
])
w_tensor
.
alloc_float
()
w_tensor
.
alloc_float
(
place
)
w_tensor
.
set
(
numpy
.
random
.
random
((
784
,
100
)).
astype
(
"float32"
))
w_tensor
.
set
(
numpy
.
random
.
random
((
784
,
100
)).
astype
(
"float32"
)
,
place
)
# Set a real numpy array here.
# x_tensor.set(numpy.array([]))
...
...
@@ -32,7 +34,7 @@ class TestFc(unittest.TestCase):
op
.
infer_shape
(
scope
)
self
.
assertEqual
([
1000
,
100
],
tensor
.
shape
())
ctx
=
core
.
DeviceContext
.
c
pu_context
(
)
ctx
=
core
.
DeviceContext
.
c
reate
(
place
)
op
.
run
(
scope
,
ctx
)
...
...
python/paddle/v2/framework/tests/test_mul_op.py
浏览文件 @
fbcadb75
...
...
@@ -8,8 +8,8 @@ class TestMulOp(unittest.TestCase):
def
setUp
(
self
):
self
.
type
=
"mul"
self
.
X
=
np
.
random
.
random
((
32
,
7
84
)).
astype
(
"float32"
)
self
.
Y
=
np
.
random
.
random
((
7
84
,
100
)).
astype
(
"float32"
)
self
.
X
=
np
.
random
.
random
((
32
,
84
)).
astype
(
"float32"
)
self
.
Y
=
np
.
random
.
random
((
84
,
100
)).
astype
(
"float32"
)
self
.
Out
=
np
.
dot
(
self
.
X
,
self
.
Y
)
...
...
python/paddle/v2/framework/tests/test_rowwise_add_op.py
浏览文件 @
fbcadb75
...
...
@@ -8,8 +8,8 @@ class TestRowwiseAddOp(unittest.TestCase):
def
setUp
(
self
):
self
.
type
=
"rowwise_add"
self
.
X
=
np
.
random
.
random
((
32
,
7
84
)).
astype
(
"float32"
)
self
.
b
=
np
.
random
.
random
(
7
84
).
astype
(
"float32"
)
self
.
X
=
np
.
random
.
random
((
32
,
84
)).
astype
(
"float32"
)
self
.
b
=
np
.
random
.
random
(
84
).
astype
(
"float32"
)
self
.
Out
=
np
.
add
(
self
.
X
,
self
.
b
)
...
...
python/paddle/v2/framework/tests/test_sgd_op.py
浏览文件 @
fbcadb75
...
...
@@ -8,8 +8,8 @@ class TestSGD(unittest.TestCase):
def
setUp
(
self
):
self
.
type
=
"sgd"
self
.
param
=
numpy
.
random
.
random
((
342
,
34
5
)).
astype
(
"float32"
)
self
.
grad
=
numpy
.
random
.
random
((
342
,
34
5
)).
astype
(
"float32"
)
self
.
param
=
numpy
.
random
.
random
((
102
,
10
5
)).
astype
(
"float32"
)
self
.
grad
=
numpy
.
random
.
random
((
102
,
10
5
)).
astype
(
"float32"
)
self
.
learning_rate
=
0.1
self
.
param_out
=
self
.
param
-
self
.
learning_rate
*
self
.
grad
...
...
python/paddle/v2/framework/tests/test_tensor.py
浏览文件 @
fbcadb75
...
...
@@ -7,16 +7,17 @@ class TestScope(unittest.TestCase):
def
test_int_tensor
(
self
):
scope
=
core
.
Scope
()
var
=
scope
.
new_var
(
"test_tensor"
)
place
=
core
.
CPUPlace
()
tensor
=
var
.
get_tensor
()
tensor
.
set_dims
([
1000
,
784
])
tensor
.
alloc_int
()
tensor
.
alloc_int
(
place
)
tensor_array
=
numpy
.
array
(
tensor
)
self
.
assertEqual
((
1000
,
784
),
tensor_array
.
shape
)
tensor_array
[
3
,
9
]
=
1
tensor_array
[
19
,
11
]
=
2
tensor
.
set
(
tensor_array
)
tensor
.
set
(
tensor_array
,
place
)
tensor_array_2
=
numpy
.
array
(
tensor
)
self
.
assertEqual
(
1.0
,
tensor_array_2
[
3
,
9
])
...
...
@@ -25,16 +26,18 @@ class TestScope(unittest.TestCase):
def
test_float_tensor
(
self
):
scope
=
core
.
Scope
()
var
=
scope
.
new_var
(
"test_tensor"
)
place
=
core
.
CPUPlace
()
tensor
=
var
.
get_tensor
()
tensor
.
set_dims
([
1000
,
784
])
tensor
.
alloc_float
()
tensor
.
alloc_float
(
place
)
tensor_array
=
numpy
.
array
(
tensor
)
self
.
assertEqual
((
1000
,
784
),
tensor_array
.
shape
)
tensor_array
[
3
,
9
]
=
1.0
tensor_array
[
19
,
11
]
=
2.0
tensor
.
set
(
tensor_array
)
tensor
.
set
(
tensor_array
,
place
)
tensor_array_2
=
numpy
.
array
(
tensor
)
self
.
assertAlmostEqual
(
1.0
,
tensor_array_2
[
3
,
9
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录