提交 d851dafe 编写于 作者: D dangqingqing

Update and resolve conflicts.

...@@ -79,9 +79,8 @@ if(NOT DEFINED IOS_ARCH) ...@@ -79,9 +79,8 @@ if(NOT DEFINED IOS_ARCH)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future # FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set(IOS_ARCH "arm64") set(IOS_ARCH "arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR") elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
set(IOS_ARCH "i386;x86_64") # FIXME(liuyiqun): support "i386;x86_64" future
elseif(IOS_PLATFORM STREQUAL "WATCHOS") set(IOS_ARCH "x86_64")
set(IOS_ARCH armv7k)
endif() endif()
endif() endif()
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_GPU)
return()
endif()
include(ExternalProject) include(ExternalProject)
set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl) set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
......
INCLUDE(ExternalProject) # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SET(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind) if(NOT WITH_PYTHON)
return()
endif()
include(ExternalProject)
INCLUDE_DIRECTORIES(${PYBIND_SOURCE_DIR}/src/extern_pybind/include) set(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind)
include_directories(${PYBIND_SOURCE_DIR}/src/extern_pybind/include)
ExternalProject_Add( ExternalProject_Add(
extern_pybind extern_pybind
...@@ -17,14 +35,12 @@ ExternalProject_Add( ...@@ -17,14 +35,12 @@ ExternalProject_Add(
TEST_COMMAND "" TEST_COMMAND ""
) )
if (${CMAKE_VERSION} VERSION_LESS "3.3.0") if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c) set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") file(WRITE ${dummyfile} "const char * dummy_pybind = \"${dummyfile}\";")
add_library(pybind STATIC ${dummyfile}) add_library(pybind STATIC ${dummyfile})
else() else()
add_library(pybind INTERFACE) add_library(pybind INTERFACE)
endif() endif()
add_dependencies(pybind extern_pybind) add_dependencies(pybind extern_pybind)
LIST(APPEND external_project_dependencies pybind)
# This file is use to check all support level of AVX on your machine # This file is use to check all support level of AVX on your machine
# so that PaddlePaddle can unleash the vectorization power of muticore. # so that PaddlePaddle can unleash the vectorization power of muticore.
INCLUDE(CheckCXXSourceRuns) include(CheckCXXSourceRuns)
INCLUDE(CheckCXXSourceCompiles) include(CheckCXXSourceCompiles)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(MMX_FLAG "-mmmx") set(MMX_FLAG "-mmmx")
set(SSE2_FLAG "-msse2") set(SSE2_FLAG "-msse2")
set(SSE3_FLAG "-msse3") set(SSE3_FLAG "-msse3")
SET(AVX_FLAG "-mavx") set(AVX_FLAG "-mavx")
SET(AVX2_FLAG "-mavx2") set(AVX2_FLAG "-mavx2")
ELSEIF(MSVC) elseif(MSVC)
set(MMX_FLAG "/arch:MMX") set(MMX_FLAG "/arch:MMX")
set(SSE2_FLAG "/arch:SSE2") set(SSE2_FLAG "/arch:SSE2")
set(SSE3_FLAG "/arch:SSE3") set(SSE3_FLAG "/arch:SSE3")
SET(AVX_FLAG "/arch:AVX") SET(AVX_FLAG "/arch:AVX")
SET(AVX2_FLAG "/arch:AVX2") SET(AVX2_FLAG "/arch:AVX2")
ENDIF() endif()
set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS})
# Check MMX # Check MMX
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG}) set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
set(MMX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <mmintrin.h> #include <mmintrin.h>
int main() int main()
...@@ -32,6 +33,7 @@ int main() ...@@ -32,6 +33,7 @@ int main()
# Check SSE2 # Check SSE2
set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG}) set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
set(SSE2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <emmintrin.h> #include <emmintrin.h>
int main() int main()
...@@ -42,6 +44,7 @@ int main() ...@@ -42,6 +44,7 @@ int main()
# Check SSE3 # Check SSE3
set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG}) set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
set(SSE3_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <pmmintrin.h> #include <pmmintrin.h>
int main() int main()
...@@ -55,6 +58,7 @@ int main() ...@@ -55,6 +58,7 @@ int main()
# Check AVX # Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
...@@ -67,6 +71,7 @@ int main() ...@@ -67,6 +71,7 @@ int main()
# Check AVX 2 # Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
......
...@@ -145,7 +145,7 @@ PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以 ...@@ -145,7 +145,7 @@ PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以
Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。 Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。
PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Nodebook。 PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Notebook。
如果您想要更深入了解deep learning,PaddlePaddle Book一定是您最好的选择。 如果您想要更深入了解deep learning,PaddlePaddle Book一定是您最好的选择。
我们提供可以直接运行PaddlePaddle Book的Docker镜像,直接运行: 我们提供可以直接运行PaddlePaddle Book的Docker镜像,直接运行:
......
...@@ -63,7 +63,7 @@ ...@@ -63,7 +63,7 @@
</tr> </tr>
<tr> <tr>
<td class="left" rowspan="15">训练</td><td class="left">dot_period</td> <td class="left" rowspan="14">训练</td><td class="left">dot_period</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td> <td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr> </tr>
......
...@@ -8,3 +8,4 @@ PaddlePaddle 文档 ...@@ -8,3 +8,4 @@ PaddlePaddle 文档
howto/index_cn.rst howto/index_cn.rst
api/index_cn.rst api/index_cn.rst
faq/index_cn.rst faq/index_cn.rst
mobile/index_cn.rst
...@@ -7,3 +7,4 @@ PaddlePaddle Documentation ...@@ -7,3 +7,4 @@ PaddlePaddle Documentation
getstarted/index_en.rst getstarted/index_en.rst
howto/index_en.rst howto/index_en.rst
api/index_en.rst api/index_en.rst
mobile/index_en.rst
...@@ -20,10 +20,32 @@ $ docker build -t username/paddle-android:dev . -f Dockerfile.android ...@@ -20,10 +20,32 @@ $ docker build -t username/paddle-android:dev . -f Dockerfile.android
构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。 构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。
Android的Docker开发镜像向用户提供两个可配置的参数: Android的Docker开发镜像向用户提供两个可配置的参数:
| Argument | Optional Values | Default | <table class="docutils">
|-----------------|-------------------------|---------| <colgroup>
|`ANDROID_ABI` |`armeabi-v7a, arm64-v8a` | `armeabi-v7a` | <col width="25%" />
|`ANDROID_API` |`>= 21` | `21` | <col width="50%" />
<col width="25%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd">
<th class="head">Argument</th>
<th class="head">Optional Values</th>
<th class="head">Default</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even">
<td>ANDROID_ABI</td>
<td>armeabi-v7a, arm64-v8a</td>
<td>armeabi-v7a</td>
</tr>
<tr class="row-odd">
<td>ANDROID_API</td>
<td>>= 21</td>
<td>21</td>
</tr>
</tbody>
</table>
- 编译`armeabi-v7a``Android API 21`的PaddlePaddle库 - 编译`armeabi-v7a``Android API 21`的PaddlePaddle库
```bash ```bash
......
...@@ -26,10 +26,32 @@ $ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_A ...@@ -26,10 +26,32 @@ $ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_A
The Docker image accepts two arguments `ANDROID_ABI` and `ANDROID_API`: The Docker image accepts two arguments `ANDROID_ABI` and `ANDROID_API`:
| Argument | Optional Values | Default | <table class="docutils">
|-----------------|-------------------------|---------| <colgroup>
|`ANDROID_ABI` |`armeabi-v7a, arm64-v8a` | `armeabi-v7a` | <col width="25%" />
|`ANDROID_API` |`>= 21` | `21` | <col width="50%" />
<col width="25%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd">
<th class="head">Argument</th>
<th class="head">Optional Values</th>
<th class="head">Default</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even">
<td>ANDROID_ABI</td>
<td>armeabi-v7a, arm64-v8a</td>
<td>armeabi-v7a</td>
</tr>
<tr class="row-odd">
<td>ANDROID_API</td>
<td>>= 21</td>
<td>21</td>
</tr>
</tbody>
</table>
The ARM-64 architecture (`arm64-v8a`) requires at least level 21 of Android API. The ARM-64 architecture (`arm64-v8a`) requires at least level 21 of Android API.
......
...@@ -27,10 +27,28 @@ iOS平台可选配置参数: ...@@ -27,10 +27,28 @@ iOS平台可选配置参数:
- `SIMULATOR`,构建目标为`x86`架构的模拟器平台。 - `SIMULATOR`,构建目标为`x86`架构的模拟器平台。
- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示: - `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示:
| IOS_PLATFORM | IOS_ARCH | <table class="docutils">
|--------------|----------------------| <colgroup>
| OS | armv7, armv7s, arm64 (默认) | <col width="35%" />
| SIMULATOR | i386, x86_64 (默认) | <col width="65%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd">
<th class="head">IOS_PLATFORM</th>
<th class="head">IOS_ARCH</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even">
<td>OS</td>
<td>armv7, armv7s, arm64 (默认)</td>
</tr>
<tr class="row-odd">
<td>SIMULATOR</td>
<td>i386, x86_64 (默认)</td>
</tr>
</tbody>
</table>
- `IOS_DEPLOYMENT_TARGET`,最小的iOS部署版本,默认值为`7.0` - `IOS_DEPLOYMENT_TARGET`,最小的iOS部署版本,默认值为`7.0`
- `IOS_ENABLE_BITCODE`,是否使能[Bitcode](https://developer.apple.com/library/content/documentation/IDEs/Conceptual/AppDistributionGuide/AppThinning/AppThinning.html#//apple_ref/doc/uid/TP40012582-CH35-SW3),可设置`ON/OFF`,默认值为`ON` - `IOS_ENABLE_BITCODE`,是否使能[Bitcode](https://developer.apple.com/library/content/documentation/IDEs/Conceptual/AppDistributionGuide/AppThinning/AppThinning.html#//apple_ref/doc/uid/TP40012582-CH35-SW3),可设置`ON/OFF`,默认值为`ON`
......
MOBILE
======
.. toctree::
:maxdepth: 1
cross_compiling_for_android_cn.md
cross_compiling_for_ios_cn.md
cross_compiling_for_raspberry_cn.md
MOBILE
======
.. toctree::
:maxdepth: 1
cross_compiling_for_android_en.md
cross_compiling_for_raspberry_en.md
...@@ -29,32 +29,32 @@ add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER} ...@@ -29,32 +29,32 @@ add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER}
add_dependencies(paddle_capi paddle_proto) add_dependencies(paddle_capi paddle_proto)
# TODO: paddle_capi_whole will be removed. # TODO: paddle_capi_whole will be removed.
set(PADDLE_CAPI_LAYERS_LIBS
paddle_function
paddle_gserver)
if(MOBILE_INFERENCE) if(MOBILE_INFERENCE)
set(PADDLE_CAPI_INFER_LIBS set(PADDLE_CAPI_ENGINE_LIBS
paddle_utils paddle_utils
paddle_parameter paddle_parameter
paddle_math paddle_math
paddle_cuda paddle_cuda
paddle_function
paddle_gserver
paddle_proto) paddle_proto)
else() else()
set(PADDLE_CAPI_INFER_LIBS set(PADDLE_CAPI_ENGINE_LIBS
paddle_utils paddle_utils
paddle_parameter paddle_parameter
paddle_math paddle_math
paddle_cuda paddle_cuda
paddle_function
paddle_gserver
paddle_proto paddle_proto
paddle_pserver paddle_pserver
paddle_network) paddle_network)
endif() endif()
set(PADDLE_CAPI_INFER_LIBS ${PADDLE_CAPI_LAYERS_LIBS} ${PADDLE_CAPI_ENGINE_LIBS})
cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS}) cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS})
# Link the static library for inference # Link the static library for inference
cc_library(paddle_capi_engine DEPS paddle_capi paddle_utils paddle_parameter paddle_math paddle_cuda paddle_proto) cc_library(paddle_capi_engine DEPS paddle_capi ${PADDLE_CAPI_ENGINE_LIBS})
cc_library(paddle_capi_layers DEPS paddle_function paddle_gserver) cc_library(paddle_capi_layers DEPS ${PADDLE_CAPI_LAYERS_LIBS})
# Link the shared library for inference # Link the shared library for inference
if(NOT IOS) if(NOT IOS)
......
...@@ -45,8 +45,9 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -45,8 +45,9 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table)
cc_library(prune SRCS prune.cc DEPS framework_proto) cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
......
...@@ -21,7 +21,9 @@ limitations under the License. */ ...@@ -21,7 +21,9 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
...@@ -70,10 +72,14 @@ static void CreateTensor(Variable* var, VarDesc::VarType var_type) { ...@@ -70,10 +72,14 @@ static void CreateTensor(Variable* var, VarDesc::VarType var_type) {
var->GetMutable<FeedFetchList>(); var->GetMutable<FeedFetchList>();
} else if (var_type == VarDesc::STEP_SCOPES) { } else if (var_type == VarDesc::STEP_SCOPES) {
var->GetMutable<std::vector<framework::Scope>>(); var->GetMutable<std::vector<framework::Scope>>();
} else if (var_type == VarDesc::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>();
} else if (var_type == VarDesc::LOD_TENSOR_ARRAY) {
var->GetMutable<LoDTensorArray>();
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"Variable type %d is not in " "Variable type %d is not in "
"[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST]", "[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST, LOD_RANK_TABLE]",
var_type); var_type);
} }
} }
......
...@@ -109,6 +109,11 @@ message LoDTensorDesc { ...@@ -109,6 +109,11 @@ message LoDTensorDesc {
optional int32 lod_level = 2 [ default = 0 ]; optional int32 lod_level = 2 [ default = 0 ];
} }
message LoDTensorArrayDesc {
required TensorDesc tensor = 1;
optional int32 lod_level = 2 [ default = 0 ];
}
message VarDesc { message VarDesc {
enum VarType { enum VarType {
LOD_TENSOR = 1; LOD_TENSOR = 1;
...@@ -116,11 +121,14 @@ message VarDesc { ...@@ -116,11 +121,14 @@ message VarDesc {
FEED_MINIBATCH = 3; FEED_MINIBATCH = 3;
FETCH_LIST = 4; FETCH_LIST = 4;
STEP_SCOPES = 5; STEP_SCOPES = 5;
LOD_RANK_TABLE = 6;
LOD_TENSOR_ARRAY = 7;
} }
required string name = 1; required string name = 1;
required VarType type = 2; required VarType type = 2;
optional LoDTensorDesc lod_tensor = 3; optional LoDTensorDesc lod_tensor = 3;
optional TensorDesc selected_rows = 4; optional TensorDesc selected_rows = 4;
optional LoDTensorArrayDesc tensor_array = 6;
optional bool persistable = 5 [ default = false ]; optional bool persistable = 5 [ default = false ];
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_rank_table.h"
namespace paddle {
namespace framework {
void LoDRankTable::Reset(const LoD& lod, size_t level) {
this->coarse_lod_.clear();
this->items_.clear();
PADDLE_ENFORCE(level < lod.size(),
"Cannot rank lod since the level %d is less than lod size %d",
level, lod.size());
coarse_lod_.reserve(level);
for (size_t i = 0; i < level; ++i) {
coarse_lod_.push_back(lod[i]);
}
auto& vec = lod[level];
for (size_t i = 0; i < vec.size() - 1; ++i) {
TableItem item;
item.index = i;
item.length = vec[i + 1] - vec[i];
items_.emplace_back(item);
}
// NOTE(yuyang18):
//
// The time complexity of stable_sort is O(N*log(N)) if additional memory is
// available. It is easy to debug and unit test when using `stable_sort`
// instead of `sort`. Also, the items of a rank table will not be too large.
std::stable_sort(items_.begin(), items_.end(),
[](const TableItem& a, const TableItem& b) {
return a.length > b.length;
});
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor.h"
namespace paddle {
namespace framework {
// LoD Rank Table stores the `level` of `lod` which is ordered by sequence
// length in descending order. It is useful when implement dynamic RNN and is
// shared by dynamic RNN memory, dynamic RNN slice input and dynamic RNN slice
// output operators.
//
// The table item contains two element. The length of sequence and the index of
// sequence in that level.
//
// LoDRankTable also stores the coarse_lod, which is the lod information whose
// level is less than input level, in order to restore the output LoD
// information.
class LoDRankTable {
public:
struct TableItem {
size_t index;
size_t length;
};
LoDRankTable() {}
void Reset(const LoD& lod, size_t level);
const std::vector<TableItem>& items() const { return this->items_; }
const LoD& coarse_lod() const { return this->coarse_lod_; }
size_t level() const { return coarse_lod_.size(); }
private:
LoD coarse_lod_;
std::vector<TableItem> items_;
};
} // namespace framework
} // namespace paddle
...@@ -135,5 +135,43 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, ...@@ -135,5 +135,43 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty."); PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty.");
ShareDataWith(Slice(begin, end)); ShareDataWith(Slice(begin, end));
} }
void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx,
std::vector<std::vector<size_t>>* lod_length,
size_t* start_offset) {
lod_length->clear();
PADDLE_ENFORCE(start_idx < lod.size() - 1,
"start_idx should be >= 0 and < lod.size() - 1.");
PADDLE_ENFORCE(end_idx < lod.size(),
"end_idx should be >= 0 and < lod.size().");
PADDLE_ENFORCE_LE(start_idx, end_idx,
"start_idx should be less than end_idx.");
for (size_t level_idx = 0; level_idx < lod.size(); ++level_idx) {
std::vector<size_t> level_lens;
for (size_t i = start_idx; i < end_idx; ++i) {
level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
}
lod_length->emplace_back(level_lens);
start_idx = lod[level_idx][start_idx];
end_idx = lod[level_idx][end_idx];
}
*start_offset = start_idx;
}
void AppendLoD(LoD* lod, const std::vector<std::vector<size_t>>& lod_length) {
PADDLE_ENFORCE_EQ(
lod->size(), lod_length.size(),
"The lod_length should has the same size with the appended lod.");
for (size_t i = 0; i < lod->size(); ++i) {
auto& level = (*lod)[i];
if (level.empty()) {
level.push_back(0);
}
for (size_t len : lod_length[i]) {
level.push_back(level.back() + len);
}
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -181,5 +181,11 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, ...@@ -181,5 +181,11 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
return tensor; return tensor;
} }
void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx,
std::vector<std::vector<size_t>>* lod_length,
size_t* start_offset);
void AppendLoD(LoD* lod, const std::vector<std::vector<size_t>>& lod_length);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/framework/lod_tensor.h"
namespace paddle {
namespace framework {
using LoDTensorArray = std::vector<LoDTensor>;
}
} // namespace paddle
...@@ -144,5 +144,47 @@ TEST(LodExpand, test) { ...@@ -144,5 +144,47 @@ TEST(LodExpand, test) {
} }
} }
TEST(LoD, GetFineGrainedLoDLength) {
LoD lod;
lod.push_back(std::vector<size_t>{0, 2, 4, 5});
lod.push_back(std::vector<size_t>{0, 1, 6, 8, 10, 11});
lod.push_back(
std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26, 29});
std::vector<std::vector<size_t>> lod_length;
size_t start_offset;
paddle::framework::GetFineGrainedLoDLength(lod, 1, 2, &lod_length,
&start_offset);
std::vector<std::vector<size_t>> expected;
expected.push_back(std::vector<size_t>{2});
expected.push_back(std::vector<size_t>{2, 2});
expected.push_back(std::vector<size_t>{2, 3, 4, 2});
EXPECT_EQ(lod_length, expected);
EXPECT_EQ(start_offset, 15UL);
}
TEST(LoD, AppendLoD) {
std::vector<std::vector<size_t>> lod_lens;
lod_lens.push_back(std::vector<size_t>{2});
lod_lens.push_back(std::vector<size_t>{2, 2});
lod_lens.push_back(std::vector<size_t>{2, 3, 4, 2});
LoD origin;
origin.push_back(std::vector<size_t>{0, 2});
origin.push_back(std::vector<size_t>{0, 1, 6});
origin.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15});
paddle::framework::AppendLoD(&origin, lod_lens);
LoD expected;
expected.push_back(std::vector<size_t>{0, 2, 4});
expected.push_back(std::vector<size_t>{0, 1, 6, 8, 10});
expected.push_back(
std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26});
EXPECT_EQ(origin, expected);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase { ...@@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase {
// indicate kernel DataType by input data. Defaultly all input data must be // indicate kernel DataType by input data. Defaultly all input data must be
// same. // same.
virtual DataType IndicateDataType(const ExecutionContext& ctx) const { virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
VLOG(3) << "Default IndicateDataType " << this->Type();
auto& scope = ctx.scope(); auto& scope = ctx.scope();
int data_type = -1; int data_type = -1;
for (auto& input : this->inputs_) { for (auto& input : this->inputs_) {
...@@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase { ...@@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase {
} }
if (t != nullptr) { if (t != nullptr) {
int tmp = static_cast<int>(ToDataType(t->type())); int tmp = static_cast<int>(ToDataType(t->type()));
VLOG(3) << "Input " << ipt_name << " with data_type " << tmp;
PADDLE_ENFORCE(tmp == data_type || data_type == -1, PADDLE_ENFORCE(tmp == data_type || data_type == -1,
"DataType of Paddle Op %s must be the same.", "DataType of Paddle Op %s must be the same.",
Type()); Type());
......
...@@ -37,13 +37,27 @@ std::vector<int64_t> VarDescBind::Shape() const { ...@@ -37,13 +37,27 @@ std::vector<int64_t> VarDescBind::Shape() const {
DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); } DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); }
void VarDescBind::SetLoDLevel(int32_t lod_level) { void VarDescBind::SetLoDLevel(int32_t lod_level) {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); switch (desc_.type()) {
case VarDesc::LOD_TENSOR:
desc_.mutable_lod_tensor()->set_lod_level(lod_level); desc_.mutable_lod_tensor()->set_lod_level(lod_level);
break;
case VarDesc::LOD_TENSOR_ARRAY:
desc_.mutable_tensor_array()->set_lod_level(lod_level);
break;
default:
PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type());
}
} }
int32_t VarDescBind::GetLodLevel() const { int32_t VarDescBind::GetLodLevel() const {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); switch (desc_.type()) {
case VarDesc::LOD_TENSOR:
return desc_.lod_tensor().lod_level(); return desc_.lod_tensor().lod_level();
case VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().lod_level();
default:
PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type());
}
} }
const TensorDesc &VarDescBind::tensor_desc() const { const TensorDesc &VarDescBind::tensor_desc() const {
...@@ -53,6 +67,8 @@ const TensorDesc &VarDescBind::tensor_desc() const { ...@@ -53,6 +67,8 @@ const TensorDesc &VarDescBind::tensor_desc() const {
return desc_.selected_rows(); return desc_.selected_rows();
case VarDesc::LOD_TENSOR: case VarDesc::LOD_TENSOR:
return desc_.lod_tensor().tensor(); return desc_.lod_tensor().tensor();
case VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("Unexpected branch.");
} }
...@@ -66,6 +82,8 @@ TensorDesc *VarDescBind::mutable_tensor_desc() { ...@@ -66,6 +82,8 @@ TensorDesc *VarDescBind::mutable_tensor_desc() {
return desc_.mutable_selected_rows(); return desc_.mutable_selected_rows();
case VarDesc::LOD_TENSOR: case VarDesc::LOD_TENSOR:
return desc_.mutable_lod_tensor()->mutable_tensor(); return desc_.mutable_lod_tensor()->mutable_tensor();
case VarDesc::LOD_TENSOR_ARRAY:
return desc_.mutable_tensor_array()->mutable_tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("Unexpected branch.");
} }
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include <vector>
#include "glog/logging.h"
#include "paddle/framework/framework.pb.h" #include "paddle/framework/framework.pb.h"
namespace paddle { namespace paddle {
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNAddtoLayer.h"
using namespace mkldnn; // NOLINT
namespace paddle {
REGISTER_LAYER(mkldnn_addto, MKLDNNAddtoLayer);
bool MKLDNNAddtoLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
if (!MKLDNNLayer::init(layerMap, parameterMap)) {
return false;
}
layerSize_ = getSize();
for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(layerSize_, inputLayers_[i]->getSize()) << "input size must equal";
}
if (biasParameter_.get() != NULL) {
biases_ =
std::unique_ptr<Weight>(new Weight(1, layerSize_, biasParameter_, 0));
}
return true;
}
void MKLDNNAddtoLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed";
reshapeInput(bs, ih, iw);
ic = inputLayers_[0]->getSize() / ih / iw;
CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize());
CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw);
for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize());
CHECK_EQ(layerSize_, inputLayers_[i]->getSize());
}
oc = ic;
oh = ih;
ow = iw;
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
if (biases_) {
LOG(FATAL) << "not implemented yet";
}
resetFwdBuffers(inVals_, out);
in = inVals_[0];
std::shared_ptr<sum::primitive_desc> fwdPD;
resetFwdPD(fwdPD, inVals_, out);
resetFwdPipeline(pipeline, fwdPD, inVals_, out);
}
void MKLDNNAddtoLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
resetBwdBuffers(inGrads_, out);
in = inGrads_[0];
// backward only need share output grad to input grad
for (size_t i = 0; i < inGrads_.size(); i++) {
if (inGrads_[i] != nullptr) {
inGrads_[i] = out;
inputLayers_[i]->getOutputGrad()->setData(inGrads_[i]->getData());
}
}
}
void MKLDNNAddtoLayer::updateWeights(const UpdateCallback& callback) {
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
}
void MKLDNNAddtoLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) {
resetInValue(inputs[i], nullptr, i);
CHECK(inputs[i]);
inputs[i]->downSpatial();
}
for (size_t i = 1; i < inputs.size(); i++) {
CHECK_PRIMITIVE_DESC_EQ(inputs[i], inputs[0]->getPrimitiveDesc());
}
resetOutValue(out, inputs[0]->getPrimitiveDesc());
}
void MKLDNNAddtoLayer::resetFwdPD(std::shared_ptr<sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr out) {
std::vector<double> scales(inputs.size(), 1.0);
std::vector<memory::primitive_desc> srcPDs;
for (size_t i = 0; i < inputs.size(); i++) {
srcPDs.push_back(inputs[i]->getPrimitiveDesc());
}
CHECK(out);
pd.reset(new sum::primitive_desc(out->getMemoryDesc(), scales, srcPDs));
CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc());
}
void MKLDNNAddtoLayer::resetFwdPipeline(
std::vector<primitive>& pipeline,
std::shared_ptr<sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
std::vector<primitive::at> srcs;
for (size_t i = 0; i < inputs.size(); i++) {
srcs.push_back(*(inputs[i]));
}
fwd_.reset(new sum(*pd, srcs, *out));
pipeline.push_back(*fwd_);
}
void MKLDNNAddtoLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
CHECK(outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
CHECK(out);
inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) {
resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i);
CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc());
}
}
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "MKLDNNLayer.h"
#include "mkldnn.hpp"
namespace paddle {
/**
* @brief A subclass of MKLDNNLayer Addto layer.
*
* The config file api is mkldnn_addto
*/
class MKLDNNAddtoLayer : public MKLDNNLayer {
protected:
std::vector<MKLDNNMatrixPtr> inVals_;
std::vector<MKLDNNMatrixPtr> inGrads_;
// layer size == ic * ih * iw == oc * oh *ow, and can not be changed
size_t layerSize_;
// TODO(TJ): this part has not been optimized by MKL-DNN
std::unique_ptr<Weight> biases_;
public:
explicit MKLDNNAddtoLayer(const LayerConfig& config) : MKLDNNLayer(config) {}
~MKLDNNAddtoLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override;
void printValueFormat() override {
for (size_t i = 0; i < inVals_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>";
}
if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
}
if (extOutVal_) {
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
}
}
void printGradFormat() override {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< ";
}
for (size_t i = 0; i < inGrads_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inGrads_[i]->getFormat() << "<<<";
}
}
protected:
/**
* Forward functions: reset buffers(inputs, output, bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<mkldnn::sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr out);
void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<mkldnn::sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(inputs, output, bias)
*/
void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
};
} // namespace paddle
...@@ -77,7 +77,7 @@ void MKLDNNLayer::forward(PassType passType) { ...@@ -77,7 +77,7 @@ void MKLDNNLayer::forward(PassType passType) {
needResetBwd_ = true; needResetBwd_ = true;
} }
if (inputLayers_[0]->getType() == "data") { if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) {
// Update input value data when input layer is "data" type, // Update input value data when input layer is "data" type,
// since the input value data address might be changed. // since the input value data address might be changed.
CHECK(extInVal_); CHECK(extInVal_);
...@@ -171,14 +171,16 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, ...@@ -171,14 +171,16 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn,
} }
void MKLDNNLayer::resetInValue( void MKLDNNLayer::resetInValue(
MKLDNNMatrixPtr& in, const std::shared_ptr<memory::primitive_desc>& intPD) { MKLDNNMatrixPtr& in,
const std::shared_ptr<memory::primitive_desc>& intPD,
size_t inputIdx) {
cvtInVal_ = nullptr; cvtInVal_ = nullptr;
extInVal_ = nullptr; extInVal_ = nullptr;
in = nullptr; in = nullptr;
CHECK_GT(bs_ * ic_ * ih_ * iw_, 0); CHECK_GT(bs_ * ic_ * ih_ * iw_, 0);
auto extPD = MKLDNNMatrix::createPrimitiveDesc( auto extPD = MKLDNNMatrix::createPrimitiveDesc(
{bs_, ic_, ih_, iw_}, format::nchw, engine_); {bs_, ic_, ih_, iw_}, format::nchw, engine_);
const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue();
in = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat); in = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
CHECK_EQ(inputIsOnlyMKLDNN(), in != nullptr); CHECK_EQ(inputIsOnlyMKLDNN(), in != nullptr);
if (in == nullptr || in->getFormat() == format::nc) { if (in == nullptr || in->getFormat() == format::nc) {
...@@ -216,11 +218,12 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, ...@@ -216,11 +218,12 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
} }
void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
memory::primitive_desc intPD) { memory::primitive_desc intPD,
size_t inputIdx) {
cvtInGrad_ = nullptr; cvtInGrad_ = nullptr;
extInGrad_ = nullptr; extInGrad_ = nullptr;
in = nullptr; in = nullptr;
LayerPtr& input = inputLayers_[0]; LayerPtr& input = inputLayers_[inputIdx];
if (input->getOutputGrad() == nullptr) { if (input->getOutputGrad() == nullptr) {
// no need input grad // no need input grad
return; return;
...@@ -245,7 +248,6 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, ...@@ -245,7 +248,6 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
return; return;
} }
// need create reorder // need create reorder
// TODO(TJ): add macro definition to simplify it
CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat())) CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat()))
<< "should have external input value and the format must be nchw(nc)"; << "should have external input value and the format must be nchw(nc)";
extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat); extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat);
......
...@@ -199,7 +199,8 @@ protected: ...@@ -199,7 +199,8 @@ protected:
*/ */
void resetInValue( void resetInValue(
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr); const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr,
size_t inputIdx = 0);
/** /**
* reset output value from internal primitive desc. * reset output value from internal primitive desc.
...@@ -212,7 +213,9 @@ protected: ...@@ -212,7 +213,9 @@ protected:
* reset input grad from internal primitive desc. * reset input grad from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary. * reset both internal and external buffer and create reorder if necessary.
*/ */
void resetInGrad(MKLDNNMatrixPtr& in, mkldnn::memory::primitive_desc intPD); void resetInGrad(MKLDNNMatrixPtr& in,
mkldnn::memory::primitive_desc intPD,
size_t inputIdx = 0);
/** /**
* reset output grad from internal primitive desc. * reset output grad from internal primitive desc.
......
...@@ -98,8 +98,19 @@ void SubSequenceLayer::forward(PassType passType) { ...@@ -98,8 +98,19 @@ void SubSequenceLayer::forward(PassType passType) {
CHECK_EQ(numSequences2, numSequences3); CHECK_EQ(numSequences2, numSequences3);
MatrixPtr inputValue = input.value; MatrixPtr inputValue = input.value;
IVectorPtr offsetValue = offsetSeq.ids; IVectorPtr offsetValue;
IVectorPtr sizeValue = sizeSeq.ids; IVectorPtr sizeValue;
if (useGpu_) {
// copy to cpu
IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false);
IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false);
offsetValue->copyFrom(*offsetSeq.ids);
sizeValue->copyFrom(*sizeSeq.ids);
} else {
offsetValue = offsetSeq.ids;
sizeValue = sizeSeq.ids;
}
CHECK_EQ(offsetValue->getSize(), numSequences1); CHECK_EQ(offsetValue->getSize(), numSequences1);
CHECK_EQ(sizeValue->getSize(), numSequences1); CHECK_EQ(sizeValue->getSize(), numSequences1);
...@@ -176,8 +187,21 @@ void SubSequenceLayer::backward(const UpdateCallback& callback) { ...@@ -176,8 +187,21 @@ void SubSequenceLayer::backward(const UpdateCallback& callback) {
size_t numSequences1 = startPositions1->getSize() - 1; size_t numSequences1 = startPositions1->getSize() - 1;
const int* starts1 = startPositions1->getData(); const int* starts1 = startPositions1->getData();
IVectorPtr offsetValue = getInput(1).ids; const Argument& offsetSeq = getInput(1);
IVectorPtr sizeValue = getInput(2).ids; const Argument& sizeSeq = getInput(2);
IVectorPtr offsetValue;
IVectorPtr sizeValue;
if (useGpu_) {
// copy to cpu
IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false);
IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false);
offsetValue->copyFrom(*offsetSeq.ids);
sizeValue->copyFrom(*sizeSeq.ids);
} else {
offsetValue = offsetSeq.ids;
sizeValue = sizeSeq.ids;
}
int* offsets = offsetValue->getData(); int* offsets = offsetValue->getData();
int* sizes = sizeValue->getData(); int* sizes = sizeValue->getData();
......
...@@ -132,7 +132,7 @@ void MKLDNNTester::checkForward() { ...@@ -132,7 +132,7 @@ void MKLDNNTester::checkForward() {
VLOG(MKLDNN_TESTS) << "Check Forward"; VLOG(MKLDNN_TESTS) << "Check Forward";
printTopDatas(); printTopDatas();
double delta = double delta =
compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue()); compareMatrix(refLayer_->getOutputValue(), dnnLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
...@@ -147,7 +147,7 @@ void MKLDNNTester::checkBackwardData() { ...@@ -147,7 +147,7 @@ void MKLDNNTester::checkBackwardData() {
VLOG(MKLDNN_ALL) << "Reference Backward Result: InputGrad " << i; VLOG(MKLDNN_ALL) << "Reference Backward Result: InputGrad " << i;
printMatrix(refDiff); printMatrix(refDiff);
double delta = compareMatrix(dnnDiff, refDiff); double delta = compareMatrix(refDiff, dnnDiff);
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
if (isBN) { if (isBN) {
// the other two inputs in batch norm are for moving mean and var // the other two inputs in batch norm are for moving mean and var
...@@ -177,7 +177,7 @@ void MKLDNNTester::checkBackwardWgts() { ...@@ -177,7 +177,7 @@ void MKLDNNTester::checkBackwardWgts() {
<< parameters_[REF][i]->getName(); << parameters_[REF][i]->getName();
printVector(ref); printVector(ref);
double delta = compareVector(dnn, ref); double delta = compareVector(ref, dnn);
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
......
...@@ -271,20 +271,53 @@ TEST(MKLDNNLayer, BatchNormLayer) { ...@@ -271,20 +271,53 @@ TEST(MKLDNNLayer, BatchNormLayer) {
testBatchNormLayer({16, 32, 16, 16}); testBatchNormLayer({16, 32, 16, 16});
} }
struct testActDesc { struct testImageDesc {
int bs, ic, ih, iw; int bs, ic, ih, iw;
}; };
static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) { static void getAddtoConfig(TestConfig& cfg,
const testImageDesc& pm,
const size_t nInputs = 1) {
cfg.biasSize = 0; cfg.biasSize = 0;
cfg.layerConfig.set_type("addto"); cfg.layerConfig.set_type("addto");
size_t layerSize = pm.ic * pm.ih * pm.iw; size_t layerSize = pm.ic * pm.ih * pm.iw;
cfg.layerConfig.set_size(layerSize); cfg.layerConfig.set_size(layerSize);
cfg.inputDefs.push_back({INPUT_DATA, "layer_0", layerSize, 0}); cfg.layerConfig.set_active_type("relu");
cfg.layerConfig.add_inputs(); for (size_t i = 0; i < nInputs; ++i) {
std::stringstream ss;
ss << "layer_" << i;
cfg.inputDefs.push_back({INPUT_DATA, ss.str(), layerSize, 0});
LayerInputConfig* input = cfg.layerConfig.add_inputs();
ImageConfig* img_conf = input->mutable_image_conf();
img_conf->set_channels(pm.ic);
img_conf->set_img_size_y(pm.ih);
img_conf->set_img_size(pm.iw);
}
}
void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) {
CHECK_GE(nInputs, 1);
TestConfig dnnConfig;
getAddtoConfig(dnnConfig, pm, nInputs);
dnnConfig.layerConfig.set_type("mkldnn_addto");
// TODO(TJ): test with bias
for (auto withBias : {false}) {
if (withBias) {
dnnConfig.biasSize = pm.ic * pm.ih * pm.iw;
} else {
dnnConfig.biasSize = 0;
}
RUN_MKLDNN_TEST_LAYER(dnnConfig, "addto", pm)
}
}
TEST(MKLDNNLayer, AddtoLayer) {
testAddtoLayer({16, 5, 14, 14}, 1);
testAddtoLayer({8, 10, 8, 8}, 2);
testAddtoLayer({4, 12, 1, 1}, 3);
} }
void testActivation(std::string actType, const testActDesc& pm) { void testActivation(std::string actType, const testImageDesc& pm) {
// TODO(TJ): remove me when paddle support elu activation // TODO(TJ): remove me when paddle support elu activation
if (actType == "mkldnn_elu") { if (actType == "mkldnn_elu") {
return; return;
......
...@@ -69,6 +69,20 @@ function(op_library TARGET) ...@@ -69,6 +69,20 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n")
endif() endif()
# conv_op contains several operators
if ("${TARGET}" STREQUAL "conv_op")
set(pybind_flag 1)
# It's enough to just adding one operator to pybind
file(APPEND ${pybind_file} "USE_OP(conv2d);\n")
endif()
# conv_transpose_op contains several operators
if ("${TARGET}" STREQUAL "conv_transpose_op")
set(pybind_flag 1)
# It's enough to just adding one operator to pybind
file(APPEND ${pybind_file} "USE_OP(conv2d_transpose);\n")
endif()
# pool_cudnn_op contains several operators # pool_cudnn_op contains several operators
if ("${TARGET}" STREQUAL "pool_cudnn_op") if ("${TARGET}" STREQUAL "pool_cudnn_op")
set(pybind_flag 1) set(pybind_flag 1)
...@@ -139,24 +153,36 @@ set(DEPS_OPS ...@@ -139,24 +153,36 @@ set(DEPS_OPS
sum_op sum_op
pool_op pool_op
pool_with_index_op pool_with_index_op
conv_op
lstm_op
conv_transpose_op
nccl_op nccl_op
sequence_conv_op sequence_conv_op
lstm_op) sequence_pool_op
lod_rank_table_op
lstm_op
gru_op)
op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(conv_op DEPS vol2col)
op_library(sum_op DEPS net_op selected_rows_functor) op_library(sum_op DEPS net_op selected_rows_functor)
op_library(pool_op DEPS pooling) op_library(pool_op DEPS pooling)
op_library(pool_with_index_op DEPS pooling) op_library(pool_with_index_op DEPS pooling)
op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table)
if(WITH_GPU) if(WITH_GPU)
op_library(nccl_op DEPS nccl_common) op_library(nccl_op DEPS nccl_common)
endif() endif()
op_library(sequence_conv_op DEPS context_project) op_library(sequence_conv_op DEPS context_project)
op_library(sequence_pool_op DEPS sequence_pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(conv_transpose_op DEPS vol2col)
op_library(gru_op DEPS sequence2batch gru_compute)
op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS net_op tensor_array) DEPS net_op tensor_array)
op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) op_library(recurrent_op SRCS recurrent_op.cc DEPS executor)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS}) foreach(src ${GENERAL_OPS})
op_library(${src}) op_library(${src})
......
...@@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
auto inference_dim = ctx->GetInputDim("Out"); auto inference_dim = ctx->GetInputDim("Out");
auto label_dim = ctx->GetInputDim("Label"); auto label_dim = ctx->GetInputDim("Label");
// Assume indices has same shape with infernece, because // Assume indices has same shape as inference, because
// it's the output of topk. // it's the output of topk.
PADDLE_ENFORCE_EQ(label_dim.size(), 2, "label's rank must be 2."); PADDLE_ENFORCE_EQ(label_dim.size(), 2, "label's rank must be 2.");
...@@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
// TODO(typhoonzero): support both inference value and indices. // TODO(typhoonzero): support both inference value and indices.
AddInput("Out", "topk (inferences) the network output"); AddInput("Out", "The network output of topk (inferences)");
AddInput("Indices", "topk (indices) the network output"); AddInput("Indices", "The the network output of topk (indices)");
AddInput("Label", "Label of the training data"); AddInput("Label", "Label of the training data");
// TODO(typhoonzero): AddInput("Weight", ... // TODO(typhoonzero): AddInput("Weight", ...
AddOutput("Accuracy", "The accuracy of current batch"); AddOutput("Accuracy", "The accuracy of current batch");
AddComment(R"DOC( AddComment(R"DOC(
Accuracy. It will print accuracy rate for classification. Accuracy Operator.
The accuracy is:
.. math:: It will print accuracy rate for classification.
accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples}) The accuracy is calculated as follows:
$$accuracy = \frac{NumOfCorrectPredicts}{NumOfAllSamples}$$
Both the input Out and Label can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD information
with the input Out(Inference).
Both the input `Out` and `Label` can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD with input `Inference`.
)DOC"); )DOC");
} }
}; };
......
...@@ -44,7 +44,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -44,7 +44,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Sigmoid operator"); AddInput("X", "Input of Sigmoid operator");
AddOutput("Y", "Output of Sigmoid operator"); AddOutput("Y", "Output of Sigmoid operator");
AddComment(R"DOC( AddComment(R"DOC(
Sigmoid activation operator. Sigmoid Activation Operator.
$y = 1 / (1 + e^{-x})$ $y = 1 / (1 + e^{-x})$
...@@ -60,7 +60,7 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -60,7 +60,7 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of LogSigmoid operator"); AddInput("X", "Input of LogSigmoid operator");
AddOutput("Y", "Output of LogSigmoid operator"); AddOutput("Y", "Output of LogSigmoid operator");
AddComment(R"DOC( AddComment(R"DOC(
Logsigmoid activation operator. Logsigmoid Activation Operator.
$y = \log(1 / (1 + e^{-x}))$ $y = \log(1 / (1 + e^{-x}))$
...@@ -75,7 +75,7 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -75,7 +75,7 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Exp operator"); AddInput("X", "Input of Exp operator");
AddOutput("Y", "Output of Exp operator"); AddOutput("Y", "Output of Exp operator");
AddComment(R"DOC( AddComment(R"DOC(
Exp activation operator. Exp Activation Operator.
$y = e^x$ $y = e^x$
...@@ -90,7 +90,7 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -90,7 +90,7 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Relu operator"); AddInput("X", "Input of Relu operator");
AddOutput("Y", "Output of Relu operator"); AddOutput("Y", "Output of Relu operator");
AddComment(R"DOC( AddComment(R"DOC(
Relu activation operator. Relu Activation Operator.
$y = \max(x, 0)$ $y = \max(x, 0)$
...@@ -109,7 +109,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -109,7 +109,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("alpha", "The small negative slope") AddAttr<AttrType>("alpha", "The small negative slope")
.SetDefault(static_cast<AttrType>(0.02f)); .SetDefault(static_cast<AttrType>(0.02f));
AddComment(R"DOC( AddComment(R"DOC(
LeakyRelu activation operator. LeakyRelu Activation Operator.
$y = \max(x, \alpha * x)$ $y = \max(x, \alpha * x)$
...@@ -128,7 +128,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -128,7 +128,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("lambda", "non-negative offset") AddAttr<AttrType>("lambda", "non-negative offset")
.SetDefault(static_cast<AttrType>(0.5f)); .SetDefault(static_cast<AttrType>(0.5f));
AddComment(R"DOC( AddComment(R"DOC(
Softshrink activation operator. Softshrink Activation Operator.
$$ $$
y = \begin{cases} y = \begin{cases}
...@@ -149,7 +149,7 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -149,7 +149,7 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Tanh operator"); AddInput("X", "Input of Tanh operator");
AddOutput("Y", "Output of Tanh operator"); AddOutput("Y", "Output of Tanh operator");
AddComment(R"DOC( AddComment(R"DOC(
Tanh activation operator. Tanh Activation Operator.
$$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ $$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
...@@ -165,7 +165,7 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -165,7 +165,7 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of TanhShrink operator"); AddInput("X", "Input of TanhShrink operator");
AddOutput("Y", "Output of TanhShrink operator"); AddOutput("Y", "Output of TanhShrink operator");
AddComment(R"DOC( AddComment(R"DOC(
TanhShrink activation operator. TanhShrink Activation Operator.
$$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ $$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
...@@ -184,7 +184,7 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -184,7 +184,7 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The value of threshold for HardShrink") AddAttr<AttrType>("threshold", "The value of threshold for HardShrink")
.SetDefault(static_cast<AttrType>(0.5)); .SetDefault(static_cast<AttrType>(0.5));
AddComment(R"DOC( AddComment(R"DOC(
HardShrink activation operator. HardShrink Activation Operator.
$$ $$
y = \begin{cases} y = \begin{cases}
...@@ -205,7 +205,7 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -205,7 +205,7 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Sqrt operator"); AddInput("X", "Input of Sqrt operator");
AddOutput("Y", "Output of Sqrt operator"); AddOutput("Y", "Output of Sqrt operator");
AddComment(R"DOC( AddComment(R"DOC(
Sqrt activation operator. Sqrt Activation Operator.
$y = \sqrt{x}$ $y = \sqrt{x}$
...@@ -220,7 +220,7 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -220,7 +220,7 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Abs operator"); AddInput("X", "Input of Abs operator");
AddOutput("Y", "Output of Abs operator"); AddOutput("Y", "Output of Abs operator");
AddComment(R"DOC( AddComment(R"DOC(
Abs activation operator. Abs Activation Operator.
$y = |x|$ $y = |x|$
...@@ -236,7 +236,7 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -236,7 +236,7 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Reciprocal operator"); AddInput("X", "Input of Reciprocal operator");
AddOutput("Y", "Output of Reciprocal operator"); AddOutput("Y", "Output of Reciprocal operator");
AddComment(R"DOC( AddComment(R"DOC(
Reciprocal activation operator. Reciprocal Activation Operator.
$$y = \frac{1}{x}$$ $$y = \frac{1}{x}$$
...@@ -251,7 +251,7 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -251,7 +251,7 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Log operator"); AddInput("X", "Input of Log operator");
AddOutput("Y", "Output of Log operator"); AddOutput("Y", "Output of Log operator");
AddComment(R"DOC( AddComment(R"DOC(
Log activation operator. Log Activation Operator.
$y = \ln(x)$ $y = \ln(x)$
...@@ -268,7 +268,7 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -268,7 +268,7 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Square operator"); AddInput("X", "Input of Square operator");
AddOutput("Y", "Output of Square operator"); AddOutput("Y", "Output of Square operator");
AddComment(R"DOC( AddComment(R"DOC(
Square activation operator. Square Activation Operator.
$y = x^2$ $y = x^2$
...@@ -284,7 +284,7 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -284,7 +284,7 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Softplus operator"); AddInput("X", "Input of Softplus operator");
AddOutput("Y", "Output of Softplus operator"); AddOutput("Y", "Output of Softplus operator");
AddComment(R"DOC( AddComment(R"DOC(
Softplus activation operator. Softplus Activation Operator.
$y = \ln(1 + e^{x})$ $y = \ln(1 + e^{x})$
...@@ -300,7 +300,7 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -300,7 +300,7 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Softsign operator"); AddInput("X", "Input of Softsign operator");
AddOutput("Y", "Output of Softsign operator"); AddOutput("Y", "Output of Softsign operator");
AddComment(R"DOC( AddComment(R"DOC(
Softsign activation operator. Softsign Activation Operator.
$$y = \frac{x}{1 + |x|}$$ $$y = \frac{x}{1 + |x|}$$
...@@ -320,7 +320,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -320,7 +320,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("t_max", "The max marginal value of BRelu") AddAttr<AttrType>("t_max", "The max marginal value of BRelu")
.SetDefault(static_cast<AttrType>(24)); .SetDefault(static_cast<AttrType>(24));
AddComment(R"DOC( AddComment(R"DOC(
BRelu activation operator. BRelu Activation Operator.
$y = \max(\min(x, t_{min}), t_{max})$ $y = \max(\min(x, t_{min}), t_{max})$
...@@ -339,7 +339,7 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -339,7 +339,7 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold value of SoftRelu") AddAttr<AttrType>("threshold", "The threshold value of SoftRelu")
.SetDefault(static_cast<AttrType>(40)); .SetDefault(static_cast<AttrType>(40));
AddComment(R"DOC( AddComment(R"DOC(
SoftRelu activation operator. SoftRelu Activation Operator.
$y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ $y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$
...@@ -357,7 +357,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -357,7 +357,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("alpha", "The alpha value of ELU") AddAttr<AttrType>("alpha", "The alpha value of ELU")
.SetDefault(static_cast<AttrType>(1.0f)); .SetDefault(static_cast<AttrType>(1.0f));
AddComment(R"DOC( AddComment(R"DOC(
ELU activation operator. ELU Activation Operator.
Applies the following element-wise computation on the input according to Applies the following element-wise computation on the input according to
https://arxiv.org/abs/1511.07289. https://arxiv.org/abs/1511.07289.
...@@ -378,7 +378,7 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -378,7 +378,7 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold value of Relu6") AddAttr<AttrType>("threshold", "The threshold value of Relu6")
.SetDefault(static_cast<AttrType>(6)); .SetDefault(static_cast<AttrType>(6));
AddComment(R"DOC( AddComment(R"DOC(
Relu6 activation operator. Relu6 Activation Operator.
$y = \min(\max(0, x), 6)$ $y = \min(\max(0, x), 6)$
...@@ -396,7 +396,7 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -396,7 +396,7 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("factor", "The exponential factor of Pow") AddAttr<AttrType>("factor", "The exponential factor of Pow")
.SetDefault(static_cast<AttrType>(1)); .SetDefault(static_cast<AttrType>(1));
AddComment(R"DOC( AddComment(R"DOC(
Pow activation operator. Pow Activation Operator.
$y = x^{factor}$ $y = x^{factor}$
...@@ -416,7 +416,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -416,7 +416,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("scale_b", "The scale parameter of b for the input") AddAttr<AttrType>("scale_b", "The scale parameter of b for the input")
.SetDefault(static_cast<AttrType>(1.7159)); .SetDefault(static_cast<AttrType>(1.7159));
AddComment(R"DOC( AddComment(R"DOC(
STanh activation operator. STanh Activation Operator.
$$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ $$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
...@@ -435,7 +435,7 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -435,7 +435,7 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold location of activation") AddAttr<AttrType>("threshold", "The threshold location of activation")
.SetDefault(static_cast<AttrType>(1.0)); .SetDefault(static_cast<AttrType>(1.0));
AddComment(R"DOC( AddComment(R"DOC(
ThresholdedRelu activation operator. ThresholdedRelu Activation Operator.
$$ $$
y = \begin{cases} y = \begin{cases}
...@@ -461,7 +461,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -461,7 +461,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid") AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.5)); .SetDefault(static_cast<AttrType>(0.5));
AddComment(R"DOC( AddComment(R"DOC(
HardSigmoid activation operator. HardSigmoid Activation Operator.
Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391), Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391),
which is much faster than sigmoid. which is much faster than sigmoid.
......
...@@ -64,16 +64,15 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -64,16 +64,15 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddInput("AvgSquaredGrad", AddInput("AvgSquaredGrad", "(Tensor) Input average of squared gradient");
"(Tensor) Input expectation of squared gradient");
AddInput("AvgSquaredUpdate", AddInput("AvgSquaredUpdate",
"(Tensor) Input expectation of squared parameter updates"); "(Tensor) Input average of squared parameter updates");
AddOutput("ParamOut", "(Tensor) Output parameter"); AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("AvgSquaredGradOut", AddOutput("AvgSquaredGradOut",
"(Tensor) Output expectation of squared gradient"); "(Tensor) Output average of squared gradient");
AddOutput("AvgSquaredUpdateOut", AddOutput("AvgSquaredUpdateOut",
"(Tensor) Output expectation of squared parameter updates"); "(Tensor) Output average of squared parameter updates");
AddAttr<float>("rho", AddAttr<float>("rho",
"(float, default 0.95) Exponential decay rate " "(float, default 0.95) Exponential decay rate "
...@@ -84,22 +83,21 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -84,22 +83,21 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker {
"numerical stability") "numerical stability")
.SetDefault(1.0e-6f); .SetDefault(1.0e-6f);
AddComment(R"DOC( AddComment(R"DOC(
Adadelta Updates Operator. Adadelta Optimizer.
This implements the Adadelta optimizer[1]. Adadelta is a per-dimension Adadelta optimizer is implemented as explained in:
adaptive learning rate method for gradient descent. https://arxiv.org/abs/1212.5701
Adadelta is a per-dimension adaptive learning rate method used
for gradient descent.
Adadelta updates: Adadelta updates are as follows:
avg_squared_grad_out = rho * avg_squared_grad + (1 - rho) * grad * grad $$avgSquaredGradOut = \rho * avgSquaredGrad + (1 - \rho) * grad * grad \break
param_update = - sqrt((avg_squared_update + epsilon) / paramUpdate = - $\sqrt{((avgSquaredUpdate + \epsilon) /
(avg_squared_grad_out + epsilon)) * grad (avgSquaredGrad_out + \epsilon))}$ * grad \break
avg_squared_update_out = rho * avg_squared_update + (1 - rho) * param_update**2 avgSquaredUpdateOut = \rho * avgSquaredUpdate + (1 - \rho) *
param_out = param + param_update {(paramUpdate)}^2 \break
paramOut = param + paramUpdate$$
References:
[1] ADADELTA: An Adaptive Learning Rate Method
https://arxiv.org/abs/1212.5701
)DOC"); )DOC");
} }
......
...@@ -73,12 +73,16 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -73,12 +73,16 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
Adaptive Gradient Algorithm (Adagrad). Adaptive Gradient Algorithm (Adagrad).
moment_out = moment + grad * grad The update is done as follows:
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
$$momentOut = moment + grad * grad \break
paramOut = param - learningRate * grad / ($\sqrt{momentOut}$ + \epsilon) \break
$$
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have the epsilon attribute. It is added here for numerical stability does not have the epsilon attribute. It is added here in our implementation
by avoiding division by zero. as also proposed here: http://cs231n.github.io/neural-networks-3/#ada
for numerical stability to avoid the division by zero error.
)DOC"); )DOC");
} }
......
...@@ -51,8 +51,8 @@ class AdamOp : public framework::OperatorWithKernel { ...@@ -51,8 +51,8 @@ class AdamOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension"); "Beta1 power accumulator should have 1 dimension");
auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow"); auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, PADDLE_ENFORCE_EQ(framework::product(beta2_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension"); "Beta2 power accumulator should have 1 dimension");
auto param_dims = ctx->GetInputDim("Param"); auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -60,10 +60,10 @@ class AdamOp : public framework::OperatorWithKernel { ...@@ -60,10 +60,10 @@ class AdamOp : public framework::OperatorWithKernel {
"Param and Grad input of AdamOp should have same dimension"); "Param and Grad input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment1"), param_dims, ctx->GetInputDim("Moment1"),
"Param and Moment input of AdamOp should have same dimension"); "Param and Moment1 input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment2"), param_dims, ctx->GetInputDim("Moment2"),
"Param and InfNorm input of AdamOp should have same dimension"); "Param and Moment2 input of AdamOp should have same dimension");
ctx->SetOutputDim("ParamOut", param_dims); ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("Moment1Out", param_dims); ctx->SetOutputDim("Moment1Out", param_dims);
...@@ -103,23 +103,20 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -103,23 +103,20 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(1.0e-8f); .SetDefault(1.0e-8f);
AddComment(R"DOC( AddComment(R"DOC(
Adam Updates Operator. Adam Optimizer.
This implements the Adam optimizer from Section 2 of the Adam This implements the Adam optimizer from Section 2 of the Adam
paper[1]. Adam is a first-order gradient-based optimization paper : https://arxiv.org/abs/1412.6980.
method based on adaptive estimates of lower-order moments. Adam is a first-order gradient-based optimization method based on
adaptive estimates of lower-order moments.
Adam updates: Adam updates:
moment1_out = beta1 * moment1 + (1 − beta1) * grad $$moment_1_{out} = \beta_1 * moment_1 + (1 - \beta_1) * grad \break
moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad moment_2_{out} = \beta_2 * moment_2 + (1 - \beta_2) * grad * grad \break
learning_rate_t = learning_rate_t * learningRate = learningRate *
sqrt(1 - beta2_pow) / (1 - beta1_pow) $\sqrt{(1 - \beta_2_{pow})}$ / (1 - \beta_1_{pow}) \break
param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon) paramOut = param - learningRate * moment_1/ ($\sqrt{(moment_2)} + \epsilon)$$
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC"); )DOC");
} }
......
...@@ -99,26 +99,22 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -99,26 +99,22 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
"Constant for numerical stability") "Constant for numerical stability")
.SetDefault(1.0e-8f); .SetDefault(1.0e-8f);
AddComment(R"DOC( AddComment(R"DOC(
Adamax Updates Operator. Adamax Optimizer.
This implements the Adamax optimizer from Section 7 of the Adam We implement the Adamax optimizer from Section 7 of the Adam
paper[1]. Adamax is a variant of the paper: https://arxiv.org/abs/1412.6980. Adamax is a variant of the
Adam algorithm based on the infinity norm. Adam algorithm based on the infinity norm.
Adamax updates: Adamax updates:
moment_out = beta1 * moment + (1 - beta1) * grad $$momentOut = \beta_1 * moment + (1 - \beta_1) * grad \break
inf_norm_out = max(beta2 * inf_norm + epsilon, abs(grad)) infNormOut = max(\beta_2 * infNorm + \epsilon, |grad|) \break
learning_rate_t = learning_rate/(1 - beta1_pow) learningRate = learningRate /(1 - \beta_1_{pow}) \break
param_out = param - learning_rate_t * moment_out/inf_norm_out paramOut = param - learningRate * momentPut / infNormOut$$
The original paper does not have an epsilon attribute. The original paper does not have an epsilon attribute.
However, it is added here for numerical stability However, it is added here for numerical stability to prevent the
by preventing divide by 0. division by 0 error.
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC"); )DOC");
} }
......
...@@ -23,11 +23,11 @@ class AucOp : public framework::OperatorWithKernel { ...@@ -23,11 +23,11 @@ class AucOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Out"), "Input of Out must be initialized."); PADDLE_ENFORCE(ctx->HasInput("Out"), "Input of Out should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Indices"), PADDLE_ENFORCE(ctx->HasInput("Indices"),
"Input of Indices must be initialized."); "Input of Indices should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), PADDLE_ENFORCE(ctx->HasInput("Label"),
"Input of Label must be initialized."); "Input of Label should not be null.");
auto inference_height = ctx->GetInputDim("Out")[0]; auto inference_height = ctx->GetInputDim("Out")[0];
auto label_height = ctx->GetInputDim("Label")[0]; auto label_height = ctx->GetInputDim("Label")[0];
...@@ -52,20 +52,20 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -52,20 +52,20 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Out", AddInput("Out",
"A floating point 2D tensor, values are in the range [0, 1]." "A floating point 2D tensor, values are in the range [0, 1]."
"Each row is descend sorted. This input should be the" "Each row is sorted in descending order. This input should be the"
"output of topk." "output of topk."
"Typically, this tensor indicates the probability of each label"); "Typically, this tensor indicates the probability of each label");
AddInput("Indices", AddInput("Indices",
"An int 2D tensor, indicating the indices of original" "An int 2D tensor, indicating the indices of original"
"tensor before sort. Typically, this tensor indicates which label" "tensor before sorting. Typically, this tensor indicates which "
"the probability stands for."); "label the probability stands for.");
AddInput("Label", AddInput("Label",
"A 2D int tensor indicating the label of the training data." "A 2D int tensor indicating the label of the training data."
"The height is batch size and width is always 1."); "The height is batch size and width is always 1.");
// TODO(typhoonzero): support weight input // TODO(typhoonzero): support weight input
AddOutput("AUC", AddOutput("AUC",
"A scalar representing the " "A scalar representing the "
"current area-under-curve."); "current area-under-the-curve.");
AddAttr<std::string>("curve", "Curve type, can be 'ROC' or 'PR'.") AddAttr<std::string>("curve", "Curve type, can be 'ROC' or 'PR'.")
.SetDefault("ROC"); .SetDefault("ROC");
...@@ -74,19 +74,18 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -74,19 +74,18 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
" roc curve.") " roc curve.")
.SetDefault(200); .SetDefault(200);
AddComment( AddComment(R"DOC(
R"DOC(Computes the AUC according forward output and label. Area Under The Curve (AUC) Operator.
Best to use for binary classification evaluations.
This implementation computes the AUC according to forward output and label.
It is used very widely in binary classification evaluation. As a note:
If input label contains values other than 0 and 1, it will be cast If input label contains values other than 0 and 1, it will be cast
to bool. to bool. You can find the relevant definitions here:
You can find the definations here:
https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve
Possible curves are: There are two types of possible curves:
- ROC: Receiver operating characteristic 1. ROC: Receiver operating characteristic
- PR: Precision Recall 2. PR: Precision Recall
)DOC"); )DOC");
} }
}; };
......
...@@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), "");
PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), "");
const float epsilon = ctx->Attrs().Get<float>("epsilon");
PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0");
PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large");
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
"Mean and MeanOut should share the same memory"); "Mean and MeanOut should share the same memory");
...@@ -66,7 +70,7 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -66,7 +70,7 @@ class BatchNormOp : public framework::OperatorWithKernel {
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5,
"Input x must have 3 to 5 dimensions."); "Input X must have 3 to 5 dimensions.");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C);
...@@ -93,16 +97,16 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -93,16 +97,16 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "The input tensor"); AddInput("X", "The input tensor");
AddInput("Scale", AddInput("Scale",
"Scale is a 1-dimensional tensor of size C " "Scale is a 1-dimensional tensor of size C "
"to be applied to the output"); "that is applied to the output");
AddInput("Bias", AddInput("Bias",
"Bias is a 1-dimensional tensor of size C " "Bias is a 1-dimensional tensor of size C "
"to be applied to the output"); "that is applied to the output");
AddInput("Mean", AddInput("Mean",
"The global mean (for training) or the " "The global mean (for training) or "
"estimated mean (for testing)"); "estimated mean (for testing)");
AddInput("Variance", AddInput("Variance",
"The global variance (for training) " "The global variance (for training) "
"or the estimated Variance (for testing)"); "or estimated Variance (for testing)");
AddOutput("Y", "result after normalization"); AddOutput("Y", "result after normalization");
AddOutput("MeanOut", AddOutput("MeanOut",
"Share memory with Mean. " "Share memory with Mean. "
...@@ -119,10 +123,14 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -119,10 +123,14 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
"will apply to output when training") "will apply to output when training")
.AsIntermediate(); .AsIntermediate();
AddComment(R"DOC( AddComment(R"DOC(
https://arxiv.org/pdf/1502.03167.pdf Batch Normalization.
NHWC `[batch, in_height, in_width, in_channels]` Batch Norm has been implemented as discussed in the paper:
NCHW `[batch, in_channels, in_height, in_width]` https://arxiv.org/pdf/1502.03167.pdf
Can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
)DOC"); )DOC");
} }
...@@ -297,7 +305,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel { ...@@ -297,7 +305,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
framework::DataType IndicateDataType( framework::DataType IndicateDataType(
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
VLOG(3) << "IndicateDataType " << this->Type();
const auto *var = ctx.InputVar(framework::GradVarName("Y")); const auto *var = ctx.InputVar(framework::GradVarName("Y"));
if (var == nullptr) { if (var == nullptr) {
PADDLE_THROW("can't find Y@GRAD"); PADDLE_THROW("can't find Y@GRAD");
......
...@@ -23,13 +23,17 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -23,13 +23,17 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker {
CastOpProtoMaker(framework::OpProto *proto, CastOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensor of cast op"); AddInput("X", "The input tensor of cast op");
AddOutput("Out", "the output tensor of cast op"); AddOutput("Out", "The output tensor of cast op");
AddComment(R"DOC(Cast operator.
cast the input tensor to other data type.
)DOC");
AddAttr<int>("out_data_type", "output data type"); AddAttr<int>("out_data_type", "output data type");
AddAttr<int>("in_data_type", "input data type"); AddAttr<int>("in_data_type", "input data type");
AddComment(R"DOC(
Cast Operator.
This Operator casts the input tensor to another data type and
returns tha Output Tensor.
)DOC");
} }
}; };
......
...@@ -49,8 +49,11 @@ class ClipOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,8 +49,11 @@ class ClipOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>( AddAttr<AttrType>(
"max", "(float)Maximum value, above which element is replaced by max"); "max", "(float)Maximum value, above which element is replaced by max");
AddComment(R"DOC( AddComment(R"DOC(
Clip operator limits the given input within an interval. The interval is Clip Operator.
The clip operator limits the value of given input within an interval. The interval is
specified with arguments 'min' and 'max'. specified with arguments 'min' and 'max'.
)DOC"); )DOC");
} }
}; };
......
...@@ -56,20 +56,24 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -56,20 +56,24 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensors of concat operator.").AsDuplicable(); AddInput("X", "Input tensors of concat operator.").AsDuplicable();
AddOutput("Out", "the output tensor of concat operator."); AddOutput("Out", "Output tensor of concat operator.");
AddAttr<int>("axis",
"The axis along which the input tensors will be concatenated.")
.SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
Join the input tensors along with the axis. Concat Operator.
Examples:
Concatenate the input tensors along dimension axis.
Examples:
Input[0] = [[1,2],[3,4]] Input[0] = [[1,2],[3,4]]
Input[1] = [[5,6]] Input[1] = [[5,6]]
axis = 0 axis = 0
Output = [[1,2], Output = [[1,2],
[3,4], [3,4],
[5,6]] [5,6]]
)DOC");
AddAttr<int>("axis", "The axis which the inputs will be joined with.") )DOC");
.SetDefault(0);
} }
}; };
......
...@@ -216,11 +216,12 @@ class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker { ...@@ -216,11 +216,12 @@ class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("IndexTensors", "Index Tensors contains indices for true/false"); AddOutput("IndexTensors", "Index Tensors contains indices for true/false");
AddComment(R"DOC( AddComment(R"DOC(
Sample dependent Cond Operator: Sample Dependent Conditional Operator.
Given Cond[i] as a 1/0 vector to indicate true/false
The equation is: Given Cond[i] as a 1/0 vector to indicate true/false:
Out[i] = subnet_t[i], if Cond[i] == true Out[i] = subnet_true[i], if Cond[i] == true
Out[i] = subnet_t[i], if Cond[i] == false Out[i] = subnet_false[i], if Cond[i] == false
)DOC"); )DOC");
} }
}; };
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -38,13 +38,13 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { ...@@ -38,13 +38,13 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(conv2d_transpose_cudnn, ops::Conv2DTransposeOp, REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad, ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad,
ops::Conv2DTransposeOpGrad); ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn, conv2d_transpose_cudnn,
ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn_grad, conv2d_transpose_cudnn_grad,
ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
#include "paddle/platform/assert.h" #include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h" #include "paddle/platform/cudnn_helper.h"
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_op.h" #include "paddle/operators/conv_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker { ...@@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
"workspace is a section of GPU memory which will be " "workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger " "allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires " "workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.") "better hardware. This size should be chosen carefully.")
.SetDefault(4096); .SetDefault(4096);
} }
}; };
...@@ -38,10 +38,11 @@ class CudnnConvOpMaker : public Conv2DOpMaker { ...@@ -38,10 +38,11 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad, REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops::Conv2DOpGrad); ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(
conv_cudnn, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(conv_cudnn,
ops::GemmConvKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv_cudnn_grad, conv_cudnn_grad,
ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>); ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>);
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_op.h" #include "paddle/operators/conv_op.h"
#include "paddle/platform/assert.h" #include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h" #include "paddle/platform/cudnn_helper.h"
......
...@@ -12,18 +12,18 @@ ...@@ -12,18 +12,18 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_op.h" #include "paddle/operators/conv_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const { void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"), PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DOp should not be null."); "Input(Input) of ConvOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"), PADDLE_ENFORCE(ctx->HasInput("Filter"),
"Input(Filter) of Conv2DOp should not be null."); "Input(Filter) of ConvOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"), PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of Conv2DOp should not be null."); "Output(Output) of ConvOp should not be null.");
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
...@@ -33,8 +33,17 @@ void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -33,8 +33,17 @@ void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const {
int input_channels = in_dims[1]; int input_channels = in_dims[1];
int output_channels = filter_dims[0]; int output_channels = filter_dims[0];
PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D."); PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D."); "Conv intput should be 4-D or 5-D tensor.");
PADDLE_ENFORCE_EQ(
in_dims.size(), filter_dims.size(),
"Conv input dimension and filter dimension should be the same.");
PADDLE_ENFORCE(
in_dims.size() - strides.size() == 2U,
"Conv input dimension and strides dimension should be consistent.");
PADDLE_ENFORCE_EQ(
paddings.size(), strides.size(),
"Conv paddings dimension and Conv strides dimension should be the same.");
PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups,
"The number of input channels should be equal to filter " "The number of input channels should be equal to filter "
"channels * groups."); "channels * groups.");
...@@ -42,12 +51,12 @@ void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -42,12 +51,12 @@ void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const {
output_channels % groups, 0, output_channels % groups, 0,
"The number of output channels should be divided by groups."); "The number of output channels should be divided by groups.");
auto output_height = std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]); for (size_t i = 0; i < paddings.size(); ++i) {
auto output_width = output_shape.push_back(OutputSize(in_dims[i + 2], filter_dims[i + 2],
OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]); paddings[i], strides[i]));
ctx->SetOutputDim("Output", }
{in_dims[0], filter_dims[0], output_height, output_width}); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
...@@ -55,18 +64,19 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, ...@@ -55,18 +64,19 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"Input", "Input",
"The input tensor of convolution operator. " "(Tensor) The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H and W is the height and width of image."); "number of channels, H is the height of the feature, "
"and W is the width of the feature.");
AddInput("Filter", AddInput("Filter",
"The filter tensor of convolution operator." "(Tensor) The filter tensor of convolution operator. "
"The format of the filter tensor is MCHW, where M is the number of " "The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, " "output image channels, C is the number of input image channels, "
"H and W is height and width of filter. " "H is the height of the filter, and W is the width of the filter. "
"If the groups attribute is greater than 1, C equal the number of " "If the groups attribute is greater than 1, C equals the number of "
"input image channels divided by the groups."); "input image channels divided by the groups.");
AddOutput("Output", AddOutput("Output",
"The output tensor of convolution operator." "(Tensor) The output tensor of convolution operator. "
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides", "strides of convolution operator.") AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
.SetDefault({1, 1}); .SetDefault({1, 1});
...@@ -74,20 +84,100 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, ...@@ -74,20 +84,100 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
.SetDefault({0, 0}); .SetDefault({0, 0});
AddAttr<int>( AddAttr<int>(
"groups", "groups",
"group size of convolution operator. " "(int default:1), the group size of convolution operator. "
"Refer to grouped convolution in Alex Krizhevsky's paper: " "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
"when group=2, the first half of the filters are only connected to the " "when group=2, the first half of the filters is only connected to the "
"first half of the input channels, and the second half only connected " "first half of the input channels, while the second half of the filters "
"to the second half.") "is only connected to the second half of the input channels.")
.SetDefault(1);
AddComment(R"DOC(
Convolution Operator.
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
size, C is the number of channels, H is the height of the feature, and W is
the width of the feature. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: (N, C_in, H_in, W_in)
Filter shape: (C_out, C_in, H_f, W_f)
Output:
Output shape: (N, C_out, H_out, W_out)
where
H_out = (H_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1;
W_out = (W_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1;
)DOC");
}
Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
"Input",
"(Tensor) The input tensor of convolution operator. "
"The format of input tensor is NCDHW. Where N is batch size, C is the "
"number of channels, D is the depth of the feature, H is the height of "
"the feature, "
"and W is the width of the feature.");
AddInput("Filter",
"(Tensor) The filter tensor of convolution operator. "
"The format of the filter tensor is MCDHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"D is the depth of the filter, H is the height of the filter, and W "
"is the width of the filter."
"If the groups attribute is greater than 1, C equals the number of "
"input image channels divided by the groups.");
AddOutput("Output",
"(Tensor) The output tensor of convolution operator."
"The format of output tensor is also NCDHW.");
AddAttr<std::vector<int>>(
"strides",
"(vector, default:{0, 0, 0}), the strides of convolution operator.")
.SetDefault({1, 1, 1});
AddAttr<std::vector<int>>(
"paddings",
"(vector, default:{0, 0, 0}), the paddings of convolution operator.")
.SetDefault({0, 0, 0});
AddAttr<int>(
"groups",
"(int default:1), the group size of convolution operator. "
"According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
"when group=2, the first half of the filters is only connected to the "
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels.")
.SetDefault(1); .SetDefault(1);
AddComment(R"DOC( AddComment(R"DOC(
Convolution3D Operator.
The convolution operation calculates the output based on the input, filter The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape. parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch
size, C is the number of channels,D is the depth of the feature, H is the height of
the feature, and W is the width of the feature. Parameters(ksize, strides, paddings)
are three elements. These three elements represent depth, height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: (N, C_in, D_in, H_in, W_in)
Filter shape: (C_out, C_in, D_f, H_f, W_f)
Output:
Output shape: (N, C_out, D_out, H_out, W_out)
where
D_out = (D_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1;
H_out = (H_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1;
W_out = (W_in - filter_size[2] + 2 * paddings[2]) / strides[2] + 1;
)DOC"); )DOC");
} }
void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const { void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) { if (ctx->HasOutput(framework::GradVarName("Input"))) {
...@@ -102,10 +192,18 @@ void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const { ...@@ -102,10 +192,18 @@ void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(conv2d, ops::Conv2DOp, ops::Conv2DOpMaker, conv2d_grad, REGISTER_OP(conv2d, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad,
ops::Conv2DOpGrad); ops::ConvOpGrad);
namespace ops = paddle::operators;
REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad,
ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(conv2d,
ops::GemmConvKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>); conv2d_grad, ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(conv3d,
ops::GemmConvKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_grad, ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>); conv3d_grad, ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>);
...@@ -12,11 +12,16 @@ ...@@ -12,11 +12,16 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_op.h" #include "paddle/operators/conv_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(conv2d,
ops::GemmConvKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
conv2d, ops::GemmConv2DKernel<paddle::platform::GPUPlace, float>); conv2d_grad, ops::GemmConvGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(conv3d,
ops::GemmConvKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
conv2d_grad, ops::GemmConvGrad2DKernel<paddle::platform::GPUPlace, float>); conv3d_grad, ops::GemmConvGradKernel<paddle::platform::GPUPlace, float>);
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/im2col.h" #include "paddle/operators/math/im2col.h"
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/vol2col.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -40,14 +41,20 @@ class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -40,14 +41,20 @@ class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker); framework::OpAttrChecker* op_checker);
}; };
class Conv2DOp : public framework::OperatorWithKernel { class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv3DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
class ConvOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override; void InferShape(framework::InferShapeContext* ctx) const override;
}; };
class Conv2DOpGrad : public framework::OperatorWithKernel { class ConvOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -55,7 +62,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel { ...@@ -55,7 +62,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel {
}; };
template <typename Place, typename T> template <typename Place, typename T>
class GemmConv2DKernel : public framework::OpKernel<T> { class GemmConvKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor* input = context.Input<Tensor>("Input");
...@@ -70,51 +77,78 @@ class GemmConv2DKernel : public framework::OpKernel<T> { ...@@ -70,51 +77,78 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
int groups = context.Attr<int>("groups"); int groups = context.Attr<int>("groups");
int batch_size = input->dims()[0]; const int batch_size = static_cast<int>(input->dims()[0]);
int input_channels = input->dims()[1];
int filter_height = filter.dims()[filter.dims().size() - 2]; // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
int filter_width = filter.dims()[filter.dims().size() - 1]; std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
int output_channels = output->dims()[1]; filter_shape_vec.erase(filter_shape_vec.begin(),
int output_height = output->dims()[2]; filter_shape_vec.begin() + 2);
int output_width = output->dims()[3];
// output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w}
paddle::operators::math::Im2ColFunctor< std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
paddle::operators::math::ColFormat::kCFO, Place, T> output_shape_vec.erase(output_shape_vec.begin(),
im2col; output_shape_vec.begin() + 2);
// use col_shape in the im2col calculation // use col_shape in the im2col calculation
framework::DDim col_shape = {input_channels / groups, filter_height, // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d,
filter_width, output_height, output_width}; // o_h, o_w}
std::vector<int64_t> col_shape_vec;
col_shape_vec.push_back(input->dims()[1] / groups);
col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(),
filter_shape_vec.end());
col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(),
output_shape_vec.end());
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation // use col_matrix_shape in the gemm calculation
framework::DDim col_matrix_shape = { // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d *
input_channels / groups * filter_height * filter_width, // o_h * o_w)
output_height * output_width}; framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
Tensor col; Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace()); col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col, // col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape // but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface. // to call the matrix multiplication interface.
Tensor col_matrix = col; Tensor col_matrix;
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape); col_matrix.Resize(col_matrix_shape);
framework::DDim input_shape = {input->dims()[1], input->dims()[2], framework::DDim input_shape = framework::slice_ddim(
input->dims()[3]}; input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0], framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]}; filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {output_channels, framework::DDim output_matrix_shape = {
output_height * output_width}; output->dims()[1],
// convolution operator: im2col + gemm output->numel() / (output->dims()[0] * output->dims()[1])};
int in_step = input_channels / groups;
int out_step = output_channels / groups; // convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) { for (int g = 0; g < groups; g++) {
// im2col
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
im2col(context.device_context(), in_slice, col, strides[0], strides[1],
paddings[0], paddings[0], paddings[1], paddings[1]); if (filter_shape_vec.size() == 2) {
// im2col
math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
im2col(context.device_context(), in_slice, col, strides[0],
strides[1], paddings[0], paddings[0], paddings[1],
paddings[1]);
} else if (filter_shape_vec.size() == 3) {
// vol2col
math::Vol2ColFunctor<Place, T> vol2col;
vol2col(context.device_context(), in_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
// gemm // gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
...@@ -127,7 +161,7 @@ class GemmConv2DKernel : public framework::OpKernel<T> { ...@@ -127,7 +161,7 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
}; };
template <typename Place, typename T> template <typename Place, typename T>
class GemmConvGrad2DKernel : public framework::OpKernel<T> { class GemmConvGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor* input = context.Input<Tensor>("Input");
...@@ -137,64 +171,79 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> { ...@@ -137,64 +171,79 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
context.Output<Tensor>(framework::GradVarName("Input")); context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad = Tensor* filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter")); context.Output<Tensor>(framework::GradVarName("Filter"));
// The filter and filter_grad will be reshaped in the calculations, // The filter and filter_grad will be reshaped in the calculations,
// so here use an assignment operation, // so here use an assignment operation,
// that avoids modifying the variable in the Scope. // that avoids modifying the variable in the Scope.
Tensor filter = *context.Input<Tensor>("Filter"); Tensor filter = *context.Input<Tensor>("Filter");
if (!input_grad && !filter_grad) return;
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
int groups = context.Attr<int>("groups"); int groups = context.Attr<int>("groups");
int batch_size = input->dims()[0]; const int batch_size = static_cast<int>(input->dims()[0]);
int input_channels = input->dims()[1];
int filter_height = filter.dims()[filter.dims().size() - 2]; // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
int filter_width = filter.dims()[filter.dims().size() - 1]; std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
int output_channels = output_grad->dims()[1]; filter_shape_vec.erase(filter_shape_vec.begin(),
int output_height = output_grad->dims()[2]; filter_shape_vec.begin() + 2);
int output_width = output_grad->dims()[3];
// output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w}
paddle::operators::math::Col2ImFunctor< std::vector<int64_t> output_shape_vec(
paddle::operators::math::ColFormat::kCFO, Place, T> framework::vectorize(output_grad->dims()));
col2im; output_shape_vec.erase(output_shape_vec.begin(),
paddle::operators::math::Im2ColFunctor< output_shape_vec.begin() + 2);
paddle::operators::math::ColFormat::kCFO, Place, T>
im2col; // use col_shape in the im2col calculation
// use col_shape in the im2col and col2im calculation // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d,
framework::DDim col_shape = {input_channels / groups, filter_height, // o_h, o_w}
filter_width, output_height, output_width}; std::vector<int64_t> col_shape_vec;
col_shape_vec.push_back(input->dims()[1] / groups);
col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(),
filter_shape_vec.end());
col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(),
output_shape_vec.end());
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation // use col_matrix_shape in the gemm calculation
framework::DDim col_matrix_shape = { // size: (i_c/g * k_h * k_w, o_h * o_w)
input_channels / groups * filter_height * filter_width, // or
output_height * output_width}; // (i_c/g * k_d * k_h * k_w, o_d * o_h * o_w)
Tensor col; framework::DDim col_matrix_shape =
col.mutable_data<T>(col_shape, context.GetPlace()); framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix = col;
col_matrix.Resize(col_matrix_shape);
framework::DDim input_shape = {input->dims()[1], input->dims()[2], framework::DDim input_shape = framework::slice_ddim(
input->dims()[3]}; input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim output_matrix_shape = {
output_grad->dims()[1],
output_grad->dims()[2] * output_grad->dims()[3]};
framework::DDim filter_matrix_shape = {filter.dims()[0], framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]}; filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
// convolution backward input operator: gemm + col2im framework::DDim output_matrix_shape = {
// convolution backward weight operator: im2col + gemm output_grad->dims()[1],
int in_step = input_channels / groups; output_grad->numel() /
int out_step = output_channels / groups; (output_grad->dims()[0] * output_grad->dims()[1])};
// convolution backward input operator: gemm + col2im(or col2vol)
// convolution backward weight operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output_grad->dims()[1]) / groups;
Tensor col;
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix;
col.mutable_data<T>(col_shape, context.GetPlace());
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
math::SetConstant<Place, T> set_zero;
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace()); input_grad->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*input_grad); set_zero(context.device_context(), input_grad, static_cast<T>(0));
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch = Tensor out_grad_batch =
...@@ -208,13 +257,22 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> { ...@@ -208,13 +257,22 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
math::matmul<Place, T>(context.device_context(), filter_slice, true, math::matmul<Place, T>(context.device_context(), filter_slice, true,
out_grad_slice, false, T(1.0), &col_matrix, out_grad_slice, false, T(1.0), &col_matrix,
T(0.0)); T(0.0));
// col2im // col2im
Tensor in_grad_slice = Tensor in_grad_slice =
in_grad_batch.Slice(g * in_step, (g + 1) * in_step); in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
if (filter_shape_vec.size() == 2) {
math::Col2ImFunctor<math::ColFormat::kCFO, Place, T> col2im;
col2im(context.device_context(), in_grad_slice, col, strides[0], col2im(context.device_context(), in_grad_slice, col, strides[0],
strides[1], paddings[0], paddings[0], paddings[1], strides[1], paddings[0], paddings[0], paddings[1],
paddings[1]); paddings[1]);
} else if (filter_shape_vec.size() == 3) {
math::Col2VolFunctor<Place, T> col2vol;
col2vol(context.device_context(), in_grad_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
} }
} }
} }
...@@ -223,8 +281,7 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> { ...@@ -223,8 +281,7 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
filter_grad->mutable_data<T>(context.GetPlace()); filter_grad->mutable_data<T>(context.GetPlace());
Tensor filter_grad_ = *filter_grad; Tensor filter_grad_ = *filter_grad;
filter_grad_.Resize(filter_matrix_shape); filter_grad_.Resize(filter_matrix_shape);
auto t = framework::EigenVector<T>::Flatten(filter_grad_); set_zero(context.device_context(), filter_grad, static_cast<T>(0));
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
Tensor out_grad_batch = Tensor out_grad_batch =
...@@ -235,9 +292,18 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> { ...@@ -235,9 +292,18 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
Tensor out_grad_slice = Tensor out_grad_slice =
out_grad_batch.Slice(g * out_step, (g + 1) * out_step); out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (filter_shape_vec.size() == 2) {
math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
im2col(context.device_context(), in_slice, col, strides[0], im2col(context.device_context(), in_slice, col, strides[0],
strides[1], paddings[0], paddings[0], paddings[1], strides[1], paddings[0], paddings[0], paddings[1],
paddings[1]); paddings[1]);
} else if (filter_shape_vec.size() == 3) {
math::Vol2ColFunctor<Place, T> vol2col;
vol2col(context.device_context(), in_slice, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
// gemm // gemm
Tensor filter_grad_slice = Tensor filter_grad_slice =
...@@ -250,6 +316,5 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> { ...@@ -250,6 +316,5 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
} }
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -96,14 +96,13 @@ as used in the Neural Turing Machine: https://arxiv.org/abs/1410.5401 ...@@ -96,14 +96,13 @@ as used in the Neural Turing Machine: https://arxiv.org/abs/1410.5401
The equation is: The equation is:
\f[ $$Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}$$
Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}
\f]
where X's index is computed modulo M, and b's index is computed modulo N. where X's index is computed modulo M, and Y's index is computed modulo N.
Both inputs X and Y can carry LoD (Level of Details) information.
However, the output only shares the LoD information with input X.
Both of the input `X` and `Y` can carry LoD (Level of Details) information.
However, the output only shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -12,18 +12,18 @@ ...@@ -12,18 +12,18 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const { void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"), PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DTransposeOp should not be null."); "Input(Input) of ConvTransposeOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"), PADDLE_ENFORCE(ctx->HasInput("Filter"),
"Input(Filter) of Conv2DTransposeOp should not be null."); "Input(Filter) of ConvTransposeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"), PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of Conv2DTransposeOp should not be null."); "Output(Output) of ConvTransposeOp should not be null.");
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
...@@ -35,17 +35,27 @@ void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -35,17 +35,27 @@ void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
"No Padding allowed in conv transpose op."); "No Padding allowed in conv transpose op.");
} }
PADDLE_ENFORCE_EQ(in_dims.size(), 4, PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
"Conv2DTransposeOp input should be 4-D tensor."); "ConvTransposeOp intput should be 4-D or 5-D tensor.");
PADDLE_ENFORCE_EQ(filter_dims.size(), 4, PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(),
"Conv2DTransposeOp filter should be 4-D tensor."); "ConvTransposeOp input dimension and filter dimension "
"should be the same.");
PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U,
"ConvTransposeOp input dimension and strides dimension should "
"be consistent.");
PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
"ConvTransposeOp paddings dimension and Conv strides "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0], PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
"input and kernel input dimension should be equal."); "In ConvTransposeOp, The input channel should be the same "
"as the number of filters.");
auto output_height = (in_dims[2] - 1) * strides[0] + filter_dims[2]; std::vector<int64_t> output_shape({in_dims[0], filter_dims[1]});
auto output_width = (in_dims[3] - 1) * strides[1] + filter_dims[3]; for (size_t i = 0; i < paddings.size(); ++i) {
ctx->SetOutputDim("Output", output_shape.push_back((in_dims[i + 2] - 1) * strides[i] +
{in_dims[0], filter_dims[1], output_height, output_width}); filter_dims[i + 2]);
}
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
...@@ -55,32 +65,108 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( ...@@ -55,32 +65,108 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
"Input", "Input",
"(Tensor) The input tensor of convolution transpose operator. " "(Tensor) The input tensor of convolution transpose operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW. Where N is batch size, C is the "
"number of input channels, H and W is the height and width of image."); "number of input channels, H is the height of the feature, and "
"W is the width of the feature.");
AddInput("Filter", AddInput("Filter",
"(Tensor) The filter tensor of convolution transpose operator." "(Tensor) The filter tensor of convolution transpose operator. "
"The format of the filter tensor is CMHW, where C is the number of " "The format of the filter tensor is CMHW, where C is the number of "
"output image channels, M is the number of input image channels, " "output image channels, M is the number of input image channels, "
"H and W is height and width of filter. " "H is the height of the filter, and W is the width of the filter. "
"We enforce groups number == 1 and padding == 0 in " "We enforce groups number == 1 and padding == 0 in "
"convolution transpose Scenario."); "the convolution transpose scenario.");
AddOutput("Output", AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator." "(Tensor) The output tensor of convolution transpose operator. "
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>(
"strides of convolution transpose operator.") "strides",
"(vector defalut:{1, 1}), strides of convolution transpose operator.")
.SetDefault({1, 1}); .SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings", AddAttr<std::vector<int>>(
"paddings of convolution transpose operator.") "paddings",
"(vector defalut:{0, 0}), paddings of convolution transpose operator.")
.SetDefault({0, 0}); .SetDefault({0, 0});
AddComment(R"DOC( AddComment(R"DOC(
Convolution2D Transpose Operator.
The convolution transpose operation calculates the output based on the input, filter The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape. parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
size, C is the number of channels, H is the height of the feature, and
W is the width of the feature. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: (N, C_in, H_in, W_in)
Filter shape: (C_in, C_out, H_f, W_f)
Output:
Output shape: (N, C_out, H_out, W_out)
where
H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0];
W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1];
)DOC"); )DOC");
} }
void Conv2DTransposeOpGrad::InferShape( Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(
framework::InferShapeContext* ctx) const { framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor) The input tensor of convolution transpose operator."
"The format of input tensor is NCDHW. Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the "
"height of the feature, and "
"W is the width of the feature.");
AddInput("Filter",
"(Tensor) The filter tensor of convolution transpose operator."
"The format of the filter tensor is CMDHW, where C is the number of "
"output image channels, M is the number of input image channels, D "
"is the depth of the filter, H is the height of the filter, and "
"W is the width of the filter."
"We enforce groups number == 1 and padding == 0 in "
"the convolution3d transpose scenario.");
AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator."
"The format of output tensor is also NCDHW."
"Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature.");
AddAttr<std::vector<int>>(
"strides",
"(vector defalut:{1, 1, 1}), strides of convolution transpose operator.")
.SetDefault({1, 1, 1});
AddAttr<std::vector<int>>(
"paddings",
"(vector defalut:{0, 0, 0}), paddings of convolution transpose operator.")
.SetDefault({0, 0, 0});
AddComment(R"DOC(
Convolution3D Transpose Operator.
The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch
size, C is the number of channels, D is the depth of the feature,
H is the height of the feature, and W is the width of the feature.
Parameters(ksize, strides, paddings) are three elements.
These three elements represent depth, height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: (N, C_in, D_in, H_in, W_in)
Filter shape: (C_in, C_out, D_f, H_f, W_f)
Output:
Output shape: (N, C_out, D_out, H_out, W_out)
where
D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0];
H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1];
W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + filter_size[2];
)DOC");
}
void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) { if (ctx->HasOutput(framework::GradVarName("Input"))) {
...@@ -95,13 +181,23 @@ void Conv2DTransposeOpGrad::InferShape( ...@@ -95,13 +181,23 @@ void Conv2DTransposeOpGrad::InferShape(
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(conv2d_transpose, ops::Conv2DTransposeOp,
ops::Conv2DTransposeOpMaker, conv2d_transpose_grad, REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker,
ops::Conv2DTransposeOpGrad); conv2d_transpose_grad, ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose, conv2d_transpose,
ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_grad, conv2d_transpose_grad,
ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(conv3d_transpose, ops::ConvTransposeOp, ops::Conv3DTransposeOpMaker,
conv3d_transpose_grad, ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
...@@ -12,13 +12,20 @@ ...@@ -12,13 +12,20 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
conv2d_transpose, conv2d_transpose,
ops::GemmConv2DTransposeKernel<paddle::platform::GPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
conv2d_transpose_grad, conv2d_transpose_grad,
ops::GemmConv2DTransposeGradKernel<paddle::platform::GPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv3d_transpose,
ops::GemmConvTransposeKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv3d_transpose_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::GPUPlace, float>);
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/im2col.h" #include "paddle/operators/math/im2col.h"
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/vol2col.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -33,7 +34,13 @@ class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -33,7 +34,13 @@ class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker); framework::OpAttrChecker* op_checker);
}; };
class Conv2DTransposeOp : public framework::OperatorWithKernel { class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv3DTransposeOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
class ConvTransposeOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -41,7 +48,7 @@ class Conv2DTransposeOp : public framework::OperatorWithKernel { ...@@ -41,7 +48,7 @@ class Conv2DTransposeOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override; void InferShape(framework::InferShapeContext* ctx) const override;
}; };
class Conv2DTransposeOpGrad : public framework::OperatorWithKernel { class ConvTransposeOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -50,41 +57,44 @@ class Conv2DTransposeOpGrad : public framework::OperatorWithKernel { ...@@ -50,41 +57,44 @@ class Conv2DTransposeOpGrad : public framework::OperatorWithKernel {
}; };
template <typename Place, typename T> template <typename Place, typename T>
class GemmConv2DTransposeKernel : public framework::OpKernel<T> { class GemmConvTransposeKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor* input = context.Input<Tensor>("Input");
// The filter will be reshaped, so it should not be constant pointer // The filter will be reshaped, so it should not be constant pointer
Tensor filter = *context.Input<Tensor>("Filter"); Tensor filter = *context.Input<Tensor>("Filter");
Tensor* output = context.Output<Tensor>("Output"); Tensor* output = context.Output<Tensor>("Output");
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
// TODO(Zhuoyuan): Paddings can be added in future. // TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2d_transpose. // groups will alway be disabled in conv2dtranspose.
const int batch_size = input->dims()[0]; const int batch_size = static_cast<int>(input->dims()[0]);
const int m = input->dims()[1];
const int h = input->dims()[2]; // input_shape_vec: {h, w} or {d, h, w}
const int w = input->dims()[3]; std::vector<int64_t> input_shape_vec = framework::vectorize(input->dims());
input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2);
const int k_h = filter.dims()[2];
const int k_w = filter.dims()[3]; // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
std::vector<int64_t> filter_shape_vec = framework::vectorize(filter.dims());
const int c = output->dims()[1]; // output channels filter_shape_vec.erase(filter_shape_vec.begin(),
const int o_h = output->dims()[2]; filter_shape_vec.begin() + 2);
const int o_w = output->dims()[3];
// use col_shape in the im2col and col2im (or vol2col and col2vol)
paddle::operators::math::Col2ImFunctor< // calculation
paddle::operators::math::ColFormat::kCFO, Place, T> // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w}
col2im; std::vector<int64_t> col_shape_vec;
col_shape_vec.push_back(output->dims()[1]);
// use col_shape in the im2col and col2im calculation col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(),
DDim col_shape = {c, k_h, k_w, h, w}; filter_shape_vec.end());
col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(),
input_shape_vec.end());
DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation // use col_matrix_shape in the gemm calculation
DDim col_matrix_shape = {c * k_h * k_w, h * w}; // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
Tensor col; Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace()); col.mutable_data<T>(col_shape, context.GetPlace());
...@@ -95,160 +105,189 @@ class GemmConv2DTransposeKernel : public framework::OpKernel<T> { ...@@ -95,160 +105,189 @@ class GemmConv2DTransposeKernel : public framework::OpKernel<T> {
col_matrix.ShareDataWith(col); col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape); col_matrix.Resize(col_matrix_shape);
DDim output_shape = {c, o_h, o_w}; // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
DDim input_matrix_shape = {m, h * w}; DDim output_shape =
framework::slice_ddim(output->dims(), 1, output->dims().size());
DDim filter_matrix_shape = {m, c * k_h * k_w}; // input matrix size: (m, h * w) or (m, d * h * w)
filter.Resize(filter_matrix_shape); DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]};
// convolution transpose: gemm + col2im (similar to conv-backward on input) // filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w)
DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]};
filter.Resize(filter_matrix_shape);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*output); math::SetConstant<Place, T> set_zero;
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0)); set_zero(context.device_context(), output, static_cast<T>(0));
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// on input)
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
// batch with size (M, h * w) // batch with size (m, h * w) or (m, d * h * w)
Tensor input_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); Tensor input_batch = input->Slice(i, i + 1).Resize(input_matrix_shape);
// filter size: (M, c * k_h * k_w)
// output size: (c, o_h, o_w) // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape); Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape);
// col_matrix = filter * input_batch // col_matrix = filter * input_batch
// of shape (c * k_h * k_w, h * w) // of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
math::matmul<Place, T>(context.device_context(), filter, true, math::matmul<Place, T>(context.device_context(), filter, true,
input_batch, false, T(1.0), &col_matrix, T(0.0)); input_batch, false, static_cast<T>(1.0),
&col_matrix, static_cast<T>(0.0));
if (filter_shape_vec.size() == 2) {
// col2im: col_matrix -> dy
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
math::Col2ImFunctor<math::ColFormat::kCFO, Place, T> col2im;
col2im(context.device_context(), output_batch, col, strides[0], col2im(context.device_context(), output_batch, col, strides[0],
strides[1], 0, 0, 0, 0); strides[1], 0, 0, 0, 0);
} else if (filter_shape_vec.size() == 3) {
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
math::Col2VolFunctor<Place, T> col2vol;
col2vol(context.device_context(), output_batch, col, strides[0],
strides[1], strides[2], 0, 0, 0);
}
} }
} }
}; };
template <typename Place, typename T> template <typename Place, typename T>
class GemmConv2DTransposeGradKernel : public framework::OpKernel<T> { class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor* input = context.Input<Tensor>("Input");
const Tensor* output_grad = const Tensor* output_grad =
context.Input<Tensor>(framework::GradVarName("Output")); context.Input<Tensor>(framework::GradVarName("Output"));
// For filter, we do not use const pointer b/c we will do reshape, // For filter, we do not use const pointer b/c we will do reshape,
// but we should avoid modifying its value. // but we should avoid modifying its value.
Tensor filter = *context.Input<Tensor>("Filter"); Tensor filter = *context.Input<Tensor>("Filter");
Tensor* input_grad = Tensor* input_grad =
context.Output<Tensor>(framework::GradVarName("Input")); context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad = Tensor* filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter")); context.Output<Tensor>(framework::GradVarName("Filter"));
if ((!input_grad) && (!filter_grad)) return;
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
// Actually, no paddings and groups allowed in conv transpose. // Actually, no paddings and groups allowed in conv transpose.
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
const int batch_size = input->dims()[0]; const int batch_size = static_cast<int>(input->dims()[0]);
const int m = input->dims()[1];
const int h = input->dims()[2];
const int w = input->dims()[3];
const int k_h = filter.dims()[2]; // input_shape_vec: {h, w} or {d, h, w}
const int k_w = filter.dims()[3]; std::vector<int64_t> input_shape_vec = framework::vectorize(input->dims());
input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2);
const int c = output_grad->dims()[1]; // output channels // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
const int o_h = output_grad->dims()[2]; std::vector<int64_t> filter_shape_vec = framework::vectorize(filter.dims());
const int o_w = output_grad->dims()[3]; filter_shape_vec.erase(filter_shape_vec.begin(),
filter_shape_vec.begin() + 2);
// Only im2col functor required for bp to get to the right shape // use col_shape in the im2col and col2im (or vol2col and col2vol)
paddle::operators::math::Im2ColFunctor< // calculation
paddle::operators::math::ColFormat::kCFO, Place, T> // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w}
im2col; std::vector<int64_t> col_shape_vec;
col_shape_vec.push_back(output_grad->dims()[1]);
// use col_shape in the im2col and col2im calculation col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(),
DDim col_shape = {c, k_h, k_w, h, w}; filter_shape_vec.end());
col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(),
input_shape_vec.end());
DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation // use col_matrix_shape in the gemm calculation
DDim col_matrix_shape_f = {c * h * w, k_h * k_w}; // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
Tensor col; // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
col.mutable_data<T>(col_shape, context.GetPlace()); DDim output_shape = framework::slice_ddim(output_grad->dims(), 1,
// col_matrix shares the same piece of data with col, output_grad->dims().size());
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
DDim output_shape = {c, o_h, o_w}; // input matrix size: (m, h * w) or (m, d * h * w)
DDim input_matrix_shape = {m, h * w}; DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]};
DDim filter_matrix_shape = {m, c * k_h * k_w}; // filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w)
DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
// convolution transpose grad on input: // convolution transpose grad on input:
// im2col + gemm (similar to conv-forward) // im2col + gemm (similar to conv-forward)
// input need to compute gradient // input need to compute gradient
if (input_grad) { if (input_grad || filter_grad) {
Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix; Tensor col_matrix;
col_matrix.ShareDataWith(col); col_matrix.ShareDataWith(col);
DDim col_matrix_shape = {c * k_h * k_w, h * w};
col_matrix.Resize(col_matrix_shape); col_matrix.Resize(col_matrix_shape);
Tensor filter_grad_;
math::SetConstant<Place, T> set_zero;
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace()); input_grad->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*input_grad); set_zero(context.device_context(), input_grad, static_cast<T>(0));
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0)); }
if (filter_grad) { // filter size (m, c, k_h, k_w)
filter_grad->mutable_data<T>(context.GetPlace());
set_zero(context.device_context(), filter_grad, static_cast<T>(0));
filter_grad_ = *filter_grad;
filter_grad_.Resize(filter_matrix_shape);
}
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
// batch with size (c, o_h * o_w) // batch with size (c, o_h * o_w)
Tensor output_grad_batch = Tensor output_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_shape); output_grad->Slice(i, i + 1).Resize(output_shape);
// filter of size (m, c * k_h * k_w)
if (filter_shape_vec.size() == 2) {
// im2col: dy -> col matrix
// from (c, o_h, o_w) to (c * k_h * k_w, h * w)
math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
im2col(context.device_context(), output_grad_batch, col, strides[0],
strides[1], paddings[0], paddings[0], paddings[1],
paddings[1]);
} else if (filter_shape_vec.size() == 3) {
// vol2col: dy -> col_matrix
// from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w)
math::Vol2ColFunctor<Place, T> vol2col;
vol2col(context.device_context(), output_grad_batch, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
if (input_grad) {
// batch with size (m, h, w) // batch with size (m, h, w)
Tensor input_grad_batch = Tensor input_grad_batch =
input_grad->Slice(i, i + 1).Resize(input_matrix_shape); input_grad->Slice(i, i + 1).Resize(input_matrix_shape);
// im2col: dy from (c, o_h, o_w) -> (c * k_h * k_w, h * w)
im2col(context.device_context(), output_grad_batch, col, strides[0],
strides[1], paddings[0], paddings[0], paddings[1], paddings[1]);
// gemm: dx = filter * dy // gemm: dx = filter * dy
// (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, c, h) // (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, h * w)
// or
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// d, h, w)
math::matmul<Place, T>(context.device_context(), filter, false, math::matmul<Place, T>(context.device_context(), filter, false,
col_matrix, false, T(1.0), &input_grad_batch, col_matrix, false, static_cast<T>(1.0),
T(0.0)); &input_grad_batch, static_cast<T>(0.0));
}
} }
// filter gradient required
if (filter_grad) { if (filter_grad) {
Tensor col_matrix_f;
col_matrix_f.ShareDataWith(col);
DDim col_matrix_shape_f = {c * h * w, k_h * k_w};
col_matrix_f.Resize(col_matrix_shape_f);
filter_grad->mutable_data<T>(context.GetPlace());
Tensor filter_grad_ = *filter_grad;
filter_grad_.Resize(filter_matrix_shape);
auto t = framework::EigenVector<T>::Flatten(filter_grad_);
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; ++i) {
// batch with size (c, o_h, o_w)
Tensor output_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_shape);
// input batch // input batch
Tensor in_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); Tensor in_batch = input->Slice(i, i + 1).Resize(input_matrix_shape);
// gemm: d_filter = x * dy^T
// im2col: (c * h * w, k_h * k_w) // (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, k_h * k_w)
im2col(context.device_context(), output_grad_batch, col, strides[0], // or
strides[1], paddings[0], paddings[0], paddings[1], paddings[1]); // (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// k_h * k_w)
// gemm: d_filter = x * y_grad^T
// (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, c, h)
math::matmul<Place, T>(context.device_context(), in_batch, false, math::matmul<Place, T>(context.device_context(), in_batch, false,
col_matrix_f, true, T(1.0), &filter_grad_, col_matrix, true, static_cast<T>(1.0),
T(1.0)); &filter_grad_, static_cast<T>(1.0));
}
} }
} }
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
Cosine Similarity Operator. Cosine Similarity Operator.
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)). $Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$
The input `X` and `Y` must have the same shape, except that the 1st dimension The input X and Y must have the same shape, except that the 1st dimension
of input `Y` could be just 1 (different from input `X`), which will be of input Y could be just 1 (different from input X), which will be
broadcasted to match the shape of input `X` before computing their cosine broadcasted to match the shape of input X before computing their cosine
similarity. similarity.
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input X.
)DOC"); )DOC");
} }
}; };
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/crf_decoding_op.h"
namespace paddle {
namespace operators {
class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
public:
CRFDecodingOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Emission",
"(LoDTensor, default: LoDTensor<float>). A LoDTensor with shape "
"[N x D] where N is the size of the mini-batch and D is the total "
"tag number. This input is the unscaled emission weight matrix of "
"the linear_chain_crf operator.");
AddInput(
"Transition",
"(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
"This input is the transition weights learned by the linear_chain_crf "
"operator, denoted as w. The 1st row of w are transition weights for "
"the start mask. The 2nd row of w are transition weights for the end "
"mask. Transition weights between other tags begin from the 3rd row of "
"w. See more details in comments of the linear_chain_crf operator.");
AddInput(
"Label",
"(LoDTensor, LoDTensor<int>). The ground truth with shape "
"[N x 1]. This input is optional. See more details in the operator's "
"comments.")
.AsDispensable();
AddOutput("ViterbiPath",
"(LoDTensor, LoDTensor<int>). The decoding results. What to "
"return changes depending on whether the Input(Label) (the groud "
"truth) is given. See more details in the operator's comment.");
AddComment(R"DOC(
The crf_decoding operator reads the emission feature weights and the transition
freature weights learned by the linear_chain_crf operator. It implements the
Viterbi algorithm which is a dynamic programming algorithm for finding the most
likely sequence of hidden states, called the Viterbi path, that results in a
sequence of observed tags.
The output of this operator changes according to whether Input(Label) is given:
1. Input(Label) is given:
This happens in training. This operator is used to co-work with the chunk_eval
operator.
When Input(Label) is given, the crf_decoding operator returns a row vector
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
prediction, or 1 indicating a tag is correctly predicted. Such an ouput is the
input to chunk_eval operator.
2. Input(Label) is not given:
This is the standard decoding process.
The crf_decoding operator returns a row vecotr with shape [N x 1] whose values
range from 0 to maximum tag number - 1. Each element indicates an index of a
predicted tag.
)DOC");
}
};
class CRFDecodingOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Emission"),
"Input(Emission) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Transition"),
"Input(Transition) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("ViterbiPath"),
"Output(ViterbiPath) should be not null.");
auto emission_dims = ctx->GetInputDim("Emission");
PADDLE_ENFORCE_EQ(emission_dims.size(), 2UL,
"The Input(Emission) should be a 2-D tensor.");
PADDLE_ENFORCE(emission_dims[0], "An empty mini-batch is not allowed.");
auto transition_dims = ctx->GetInputDim("Transition");
PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL,
"The Input(Transition) should be a 2-D tensor.");
PADDLE_ENFORCE_EQ(
transition_dims[0] - 2, transition_dims[1],
"An invalid dimension for the Input(Transition), which should "
"be a 2-D tensor with shape [(D + 2) x D].");
PADDLE_ENFORCE_EQ(
emission_dims[1], transition_dims[1],
"The 2nd dimension of the Input(Emission) and the Input(Transition) "
"should be equal to the tag number.");
if (ctx->HasInput("Label")) {
auto label_dims = ctx->GetInputDim("Label");
PADDLE_ENFORCE(label_dims.size() == 2UL && label_dims[1] == 1UL,
"The Input(Label) should be a 2-D tensor with the 2nd "
"dimensions fixed to 1.");
PADDLE_ENFORCE_EQ(
emission_dims[0], label_dims[0],
"The height of Input(Emission) and the height of Input(Label) "
"should be the same.");
}
ctx->ShareLoD("Emission", /*->*/ "ViterbiPath");
ctx->SetOutputDim("ViterbiPath", {emission_dims[0], 1});
}
protected:
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(crf_decoding, ops::CRFDecodingOp,
ops::CRFDecodingOpMaker);
REGISTER_OP_CPU_KERNEL(
crf_decoding, ops::CRFDecodingOpKernel<paddle::platform::CPUPlace, float>,
ops::CRFDecodingOpKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
using framework::LoDTensor;
using framework::LoD;
using framework::Tensor;
template <typename Place, typename T>
class CRFDecodingOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"The crf_decoding operator can only run on CPU.");
auto* emission_weights = ctx.Input<LoDTensor>("Emission");
auto* transition_weights = ctx.Input<Tensor>("Transition");
auto* label = ctx.Input<LoDTensor>("Label");
auto* decoded_path = ctx.Output<Tensor>("ViterbiPath");
PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL,
"The Input(Emission) should be a sequence.");
auto lod = emission_weights->lod();
PADDLE_ENFORCE(lod.size(), "Input(Emission) must be a sequence.");
const size_t level = 0;
const size_t seq_num = lod[level].size() - 1;
int* path = decoded_path->mutable_data<int>(platform::CPUPlace());
math::SetConstant<platform::CPUPlace, int>()(ctx.device_context(),
decoded_path, 0);
for (size_t i = 0; i < seq_num; ++i) {
int start_pos = static_cast<int>(lod[level][i]);
int end_pos = static_cast<int>(lod[level][i + 1]);
Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights,
&decoded_path_one_seq);
}
if (label) {
PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL,
"The Input(Label) should be a sequence.");
const int* label_value = label->data<int>();
size_t batch_size = emission_weights->dims()[0];
for (size_t i = 0; i < batch_size; ++i) {
path[i] = label_value[i] == path[i] ? 1 : 0;
}
}
}
private:
void Decode(const Tensor& emission_weights, const Tensor& transition_weights,
Tensor* decoded_path) const {
auto emission_dims = emission_weights.dims();
const size_t seq_len = emission_dims[0];
const size_t tag_num = emission_dims[1];
const size_t state_trans_base_idx = 2;
const T* x = emission_weights.data<T>();
const T* w = transition_weights.data<T>();
int* path = decoded_path->data<int>();
// alpha is a memo table. An element alpha(k, v) records the score of the
// best sequence of tags from position 1 to position k with v being the end
// tag.
Tensor alpha;
T* alpha_value = alpha.mutable_data<T>(emission_dims, platform::CPUPlace());
Tensor track;
int* track_value =
track.mutable_data<int>(emission_dims, platform::CPUPlace());
for (size_t i = 0; i < tag_num; ++i) alpha_value[i] = w[i] + x[i];
for (size_t k = 1; k < seq_len; ++k) {
for (size_t i = 0; i < tag_num; ++i) {
T max_score = -std::numeric_limits<T>::max();
int max_j = 0;
for (size_t j = 0; j < tag_num; ++j) {
T score = alpha_value[(k - 1) * tag_num + j] +
w[(j + state_trans_base_idx) * tag_num + i];
if (score > max_score) {
max_score = score;
max_j = j;
}
}
alpha_value[k * tag_num + i] = max_score + x[k * tag_num + i];
track_value[k * tag_num + i] = max_j;
}
}
T max_score = -std::numeric_limits<T>::max();
int max_i = 0;
for (size_t i = 0; i < tag_num; ++i) {
T score = alpha_value[(seq_len - 1) * tag_num + i] + w[tag_num + i];
if (score > max_score) {
max_score = score;
max_i = i;
}
}
path[seq_len - 1] = max_i;
for (int k = seq_len - 1; k >= 1; --k) {
path[k - 1] = max_i = track_value[k * tag_num + max_i];
}
}
};
} // namespace operators
} // namespace paddle
...@@ -56,34 +56,35 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -56,34 +56,35 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input of pad op. " "The input of pad op. "
"The input should be a k-D tensor(k > 0 and k < 7)"); "The input should be a k-D tensor(k > 0 and k < 7).");
AddInput("Y", AddInput("Y",
"The input used as reference for cropping" "The input used as reference for cropping, "
" with the same dimension as X. ") "which is of the same dimensions as X.")
.AsDispensable(); .AsDispensable();
AddOutput("Out", AddOutput("Out",
"The output of crop op " "The output of crop op, "
"with the same dimension as X."); "which is of the same dimensions as X.");
AddAttr<std::vector<int>>("offsets", AddAttr<std::vector<int>>("offsets",
"A list<int> describing offsets to be cropped." "A list<int> describing offsets to be cropped. "
"The size of offsets list should be as same as " "The size of offsets list should be the same as "
"dimension size of input X."); "the dimension size of input X.");
AddAttr<std::vector<int>>("shape", AddAttr<std::vector<int>>("shape",
"A list<int> describing the shape of output." "A list<int> describing the shape of output. "
"The size of shape list should be as same as " "The size of shape list should be the same as "
"dimension size of input X.") "the dimension size of input X.")
.SetDefault(std::vector<int>()); .SetDefault(std::vector<int>());
AddComment(R"DOC( AddComment(R"DOC(
Crop Operator. Crop Operator.
Crop input into output, as specified by offsets and shape. Crop input into output, as specified by offsets and shape.
There are two ways to set shape: There are two ways to set shape:
1. referenc input: crop input X as shape as reference input. 1. reference input: crop input X into the same shape as reference input.
The dimension of reference input should The dimension of reference input should
be as same as input X. be the same as the dimension of input X.
2. shape list: crop input X by shape described by a list<int>. 2. shape list: crop input X into the shape described by a list<int>.
The size of shape list should be as same as The size of shape list should be the same as
dimension size of input X. the dimension size of input X.
The input should be a k-D tensor(k > 0 and k < 7). As an example: The input should be a k-D tensor(k > 0 and k < 7). As an example:
...@@ -91,20 +92,20 @@ Given: ...@@ -91,20 +92,20 @@ Given:
X = [[0, 1, 2, 0, 0] X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0] [0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]] [0, 0, 0, 0, 0]],
and and
offsets = [0, 1] offsets = [0, 1],
and and
shape = [2, 2] shape = [2, 2],
then we get we get:
Out = [[1, 2], Out = [[1, 2],
[3, 4]] [3, 4]].
)DOC"); )DOC");
} }
......
...@@ -49,7 +49,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel { ...@@ -49,7 +49,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
} }
protected: protected:
// Explicitly set that data type of the output of the cross_entropy operator // Explicitly set that the data type of computation kernel of cross_entropy
// is determined by its input "X". // is determined by its input "X".
framework::DataType IndicateDataType( framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
...@@ -96,7 +96,8 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { ...@@ -96,7 +96,8 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
} }
protected: protected:
// CrossEntropy's data type just determined by "X" // Explicitly set that the data type of computation kernel of cross_entropy
// is determined by its input "X".
framework::DataType IndicateDataType( framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::ToDataType(ctx.Input<Tensor>("X")->type()); return framework::ToDataType(ctx.Input<Tensor>("X")->type());
...@@ -113,21 +114,17 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -113,21 +114,17 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
"where N is the batch size and D is the number of classes. " "where N is the batch size and D is the number of classes. "
"This input is a probability computed by the previous operator, " "This input is a probability computed by the previous operator, "
"which is almost always the result of a softmax operator."); "which is almost always the result of a softmax operator.");
AddInput( AddInput("Label",
"Label", "(Tensor), the ground truth which is a 2-D tensor. When "
"(Tensor, default Tensor<int>), the ground truth which is " "soft_label is set to false, Label is a Tensor<int64> with shape "
"a 2-D tensor. " "[N x 1]. When soft_label is set to true, Label is a "
"When soft_label is set to false, `Label` is a Tensor<int> with shape " "Tensor<float/double> with shape [N x K].");
"[N x 1]. "
"When soft_label is set to true, `Label` is a Tensor<float/double> "
"with shape [N x K].");
AddOutput("Y", AddOutput("Y",
"(Tensor, default Tensor<float>), a 2-D tensor " "(Tensor, default Tensor<float>), a 2-D tensor with shape "
"with shape [N x 1]. The cross entropy loss."); "[N x 1]. The cross entropy loss.");
AddAttr<bool>( AddAttr<bool>("soft_label",
"soft_label", "(bool, default false), a flag indicating whether to "
"(bool, default false), a flag to indicate whether to interpretate " "interpretate the given labels as soft labels.")
"the given labels as soft labels.")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
CrossEntropy Operator. CrossEntropy Operator.
...@@ -137,13 +134,13 @@ computation. ...@@ -137,13 +134,13 @@ computation.
1) One-hot cross-entropy: 1) One-hot cross-entropy:
soft_label = false, Label[i, 0] indicates the class index for sample i: soft_label = false, Label[i, 0] indicates the class index for sample i:
Y[i] = -log(X[i, Label[i]]) $Y[i] = -\log(X[i, Label[i]])$
2) Soft-label cross-entropy: 2) Soft-label cross-entropy:
soft_label = true, Label[i, j] indicates the soft label of class j soft_label = true, Label[i, j] indicates the soft label of class j
for sample i: for sample i:
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} $Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$
Please make sure that in this case the summuation of each row of Label Please make sure that in this case the summuation of each row of Label
equals one. equals one.
...@@ -153,8 +150,9 @@ computation. ...@@ -153,8 +150,9 @@ computation.
non-zero element (equals 1), soft-label cross-entropy degenerates to a non-zero element (equals 1), soft-label cross-entropy degenerates to a
one-hot cross-entropy with one-hot label representation. one-hot cross-entropy with one-hot label representation.
Both the input `X` and `Label` can carry the LoD (Level of Details) information, Both the input X and Label can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input X.
)DOC"); )DOC");
} }
}; };
......
...@@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
"Constant for numerical stability") "Constant for numerical stability")
.SetDefault(1.0e-6f); .SetDefault(1.0e-6f);
AddComment(R"DOC( AddComment(R"DOC(
Decayed Adagrad Optimizer.
Decayed Adagrad The update is done as follows:
moment_out = decay * moment + (1 - decay) * grad * grad $$
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon) moment\_out = decay * moment + (1 - decay) * grad * grad \\
param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon}
$$
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have an epsilon attribute. It is added here for numerical
stability to avoid the division by zero error.
)DOC"); )DOC");
} }
......
...@@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
DropoutOpMaker(framework::OpProto* proto, DropoutOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f);
AddAttr<bool>("is_training", "Whether in training phase.").SetDefault(true);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddInput("X", "The input of dropout op."); AddInput("X", "The input of dropout op.");
AddOutput("Out", "The output of dropout op."); AddOutput("Out", "The output of dropout op.");
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate(); AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f);
AddAttr<bool>("is_training", "True if in training phase.").SetDefault(true);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
Dropout Operator. Dropout Operator.
'Dropout' refers to randomly dropping out units in a nerual network. It is a Dropout refers to randomly dropping out units in a nerual network. It is a
regularization technique for reducing overfitting by preventing neuron regularization technique for reducing overfitting by preventing neuron
co-adaption during training. The dropout operator randomly set (according to co-adaption during training. The dropout operator randomly set (according to
the given dropout probability) the outputs of some units to zero, while others the given dropout probability) the outputs of some units to zero, while others
being set to their inputs. are set equal to their corresponding inputs.
)DOC"); )DOC");
} }
}; };
......
...@@ -386,12 +386,13 @@ class DynamicRecurrentOpProtoAndCheckerMaker ...@@ -386,12 +386,13 @@ class DynamicRecurrentOpProtoAndCheckerMaker
RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward]; RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward];
// inputs and outputs stored in proto // inputs and outputs stored in proto
AddInput(name.inlinks, AddInput(name.inlinks,
"the inputs that need to be segmented for each step.") "The inputs that need to be segmented for each step.")
.AsDuplicable(); .AsDuplicable();
AddInput(name.initial_states, "variables to initialize states.") AddInput(name.initial_states, "Variables to initialize the states.")
.AsDuplicable(); .AsDuplicable();
AddOutput(name.outlinks, "the outputs that need to concated for all steps.") AddOutput(name.outlinks,
"The outputs that need to be concatenated for all steps.")
.AsDuplicable(); .AsDuplicable();
AddOutput(name.step_scopes, "step scopes"); AddOutput(name.step_scopes, "step scopes");
...@@ -399,7 +400,12 @@ class DynamicRecurrentOpProtoAndCheckerMaker ...@@ -399,7 +400,12 @@ class DynamicRecurrentOpProtoAndCheckerMaker
AddAttr<std::vector<std::string>>(name.ex_states, "names of ex_states"); AddAttr<std::vector<std::string>>(name.ex_states, "names of ex_states");
AddAttr<std::vector<std::string>>(name.states, "names of states"); AddAttr<std::vector<std::string>>(name.states, "names of states");
AddComment("This is a RNN operator for varience-length sequences."); AddComment(R"DOC(
Dynamic Recurrent Operator.
This is a RNN operator for varience-length sequences.
)DOC");
} }
}; };
......
...@@ -22,7 +22,7 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker { ...@@ -22,7 +22,7 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
ElementwiseAddOpMaker(framework::OpProto* proto, ElementwiseAddOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("add", "Out = X + Y"); SetComment("Add", "$Out = X + Y$");
AddComment(comment_); AddComment(comment_);
} }
}; };
......
...@@ -22,7 +22,7 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker { ...@@ -22,7 +22,7 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
ElementwiseDivOpMaker(framework::OpProto* proto, ElementwiseDivOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("Div", "Out = X / Y"); SetComment("Div", "$Out = X / Y$");
AddComment(comment_); AddComment(comment_);
} }
}; };
......
...@@ -23,7 +23,7 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker { ...@@ -23,7 +23,7 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
ElementwiseMulOpMaker(framework::OpProto* proto, ElementwiseMulOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("Mul", "Out = X ⊙ Y"); SetComment("Mul", "$Out = X \\odot\\ Y$");
AddComment(comment_); AddComment(comment_);
} }
}; };
......
...@@ -46,29 +46,33 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -46,29 +46,33 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
ElementwiseOpMaker(framework::OpProto* proto, ElementwiseOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", R"DOC( AddInput("X", "(Tensor) The first input tensor of elementwise op");
The first input of elementwise op, it's a tensor of any dimensions. AddInput("Y", "(Tensor) The second input tensor of elementwise op");
)DOC"); AddOutput("Out", "The output of elementwise op");
AddInput("Y", R"DOC(
The sencond input of elementwise op, it's a tensor and it's dimensions
must be small or equal to X's dimensions.
)DOC");
AddAttr<int>("axis", AddAttr<int>("axis",
R"DOC( "(int, default -1) The starting dimension index "
When the shape(Y) does not equal the shape(X),Y will be broadcasted "for broadcasting Y onto X")
to match the shape of X and axis should be dimension index Y in X
)DOC")
.SetDefault(-1) .SetDefault(-1)
.EqualGreaterThan(-1); .EqualGreaterThan(-1);
AddOutput("Out", "The output of elementwise op");
comment_ = R"DOC( comment_ = R"DOC(
Limited elementwise {name} operator.The equation is: Out = {equation}. Limited Elementwise {name} Operator.
1. The shape of Y should be same with X or
2. Y's shape is a subset of X. The equation is:
Y will be broadcasted to match the shape of X and axis should be dimension index Y in X.
{equation}
example: X is a tensor of any dimension and the dimensions of tensor Y must be smaller than
or equal to the dimensions of X.
There are two cases for this operator:
1. The shape of Y is same with X;
2. The shape of Y is a subset of X.
For case 2:
Y will be broadcasted to match the shape of X and axis should be
the starting dimension index for broadcasting Y onto X.
example:
shape(X) = (2, 3, 4, 5), shape(Y) = (,) shape(X) = (2, 3, 4, 5), shape(Y) = (,)
shape(X) = (2, 3, 4, 5), shape(Y) = (5,) shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5) shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
...@@ -76,7 +80,8 @@ Limited elementwise {name} operator.The equation is: Out = {equation}. ...@@ -76,7 +80,8 @@ Limited elementwise {name} operator.The equation is: Out = {equation}.
shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0 shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
Both the input X and Y can carry the LoD (Level of Details) information, Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input X. or not. But the output only shares the LoD information with input X.
)DOC"; )DOC";
AddComment(comment_); AddComment(comment_);
} }
......
...@@ -22,7 +22,7 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker { ...@@ -22,7 +22,7 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
ElementwiseSubOpMaker(framework::OpProto* proto, ElementwiseSubOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("Sub", "Out = X - Y"); SetComment("Sub", "$Out = X - Y$");
AddComment(comment_); AddComment(comment_);
} }
}; };
......
...@@ -59,8 +59,13 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -59,8 +59,13 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of feed op"); AddInput("X", "The input of feed op");
AddOutput("Out", "The output of feed op"); AddOutput("Out", "The output of feed op");
AddComment("feed op, it should not be configured by users directly"); AddAttr<int>("col", "(int) The column of feed");
AddAttr<int>("col", "column of feed"); AddComment(R"DOC(
Feed Operator.
It should not be configured by users directly.
)DOC");
} }
}; };
......
...@@ -66,8 +66,13 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -66,8 +66,13 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of fetch op"); AddInput("X", "The input of fetch op");
AddOutput("Out", "The output of fetch op"); AddOutput("Out", "The output of fetch op");
AddComment("fetch op, it should not be configured by users directly"); AddAttr<int>("col", "(int) The column of fetch");
AddAttr<int>("col", "column of fetch"); AddComment(R"DOC(
Fetch Operator.
It should not be configured by users directly.
)DOC");
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -70,11 +70,16 @@ class FillConstantBatchSizeLikeOpMaker ...@@ -70,11 +70,16 @@ class FillConstantBatchSizeLikeOpMaker
"with the specified value"); "with the specified value");
AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output"); AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output");
AddAttr<int>("dim_idx", AddAttr<int>("dim_idx",
"(int, default 0) the index of batch size dimension") "(int, default 0) The index of batch size dimension")
.SetDefault(0); .SetDefault(0);
AddAttr<float>("value", "(float, default 0) The value to be filled") AddAttr<float>("value", "(float, default 0) The value to be filled")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC(Fill up a variable with specified constant value.)DOC"); AddComment(R"DOC(
FillConstantBatchSizeLike Operator.
Fill up a variable with specified constant value.
)DOC");
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -54,7 +54,12 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -54,7 +54,12 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", AddOutput("Out",
"(Tensor) Tensor of specified shape will be filled " "(Tensor) Tensor of specified shape will be filled "
"with the specified value"); "with the specified value");
AddComment(R"DOC(Fill up a variable with specified constant value.)DOC"); AddComment(R"DOC(
FillConstantBatchSizeLike Operator.
Fill up a variable with specified constant value.
)DOC");
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -37,11 +37,13 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -37,11 +37,13 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of fill-zeros-like op."); AddInput("X", "The input of fill-zeros-like op.");
AddOutput("Y", "The varibale will be filled up with zeros."); AddOutput("Y", "The variable will be filled up with zeros.");
AddComment(R"DOC( AddComment(R"DOC(
Fill up a vriable with zeros. FillZerosLike Operator.
Fill up a variable with zeros.
The output will have the same size as the input.
The output will have the same size with input.
)DOC"); )DOC");
} }
}; };
......
...@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The source input of gather op"); AddInput("X", "The source input of gather op");
AddInput("Index", "The index input of gather op"); AddInput("Index", "The index input of gather op");
AddOutput("Out", "The output of add op"); AddOutput("Out", "The output of gather op");
AddComment(R"DOC( AddComment(R"DOC(
Gather Operator by selecting from the first axis, Gather Operator.
$Out = X[Index]$
Out is obtained by gathering entries of the outer-most dimension
of X indexed by Index and concatenate them together.
Example:
X = [[1, 2],
[3, 4],
[5, 6]]
Index = [[1, 2]]
Then:
Out = [[3, 4],
[5, 6]]
Out = X[Index]
)DOC"); )DOC");
} }
}; };
......
...@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
GaussianRandomOpMaker(framework::OpProto* proto, GaussianRandomOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "output matrix of random op"); AddOutput("Out", "Output matrix of gaussian random op");
AddComment(R"DOC(
GaussianRandom operator.
Use to initialize tensor with gaussian random generator.
)DOC");
AddAttr<std::vector<int>>("shape", "The dimension of random tensor."); AddAttr<std::vector<int>>("shape",
AddAttr<float>("mean", "mean of random tensor.").SetDefault(.0f); "(vector<int>) "
AddAttr<float>("std", "std of random tensor.").SetDefault(1.0f); "The dimension of random tensor.");
AddAttr<float>("mean",
"(float, default 0.0) "
"mean of random tensor.")
.SetDefault(.0f);
AddAttr<float>("std",
"(float, default 1.0) "
"std of random tensor.")
.SetDefault(1.0f);
AddAttr<int>("seed", AddAttr<int>("seed",
"(int, default 0) "
"Random seed of generator." "Random seed of generator."
"0 means use system wide seed") "0 means use system wide seed.")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("data_type", "output data type") AddAttr<int>("data_type",
"(int, default 5(FP32)) "
"Output data type.")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::DataType::FP32);
AddComment(R"DOC(
GaussianRandom Operator.
Used to initialize tensors with gaussian random generator.
)DOC");
} }
}; };
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/gru_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class GRUOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasOutput("BatchGate"),
"Output(%s) of GRUOp should not be null.", "BatchGate");
PADDLE_ENFORCE(ctx->HasOutput("BatchResetHiddenPrev"),
"Output(%s) of GRUOp should not be null.",
"BatchResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasOutput("BatchHidden"),
"Output(%s) of GRUOp should not be null.", "BatchHidden");
PADDLE_ENFORCE(ctx->HasOutput("Hidden"),
"Output(%s) of GRUOp should not be null.", "Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto weight_dims = ctx->GetInputDim("Weight");
int input_size = input_dims[1];
int frame_size = weight_dims[0];
PADDLE_ENFORCE_EQ(input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUOp.");
PADDLE_ENFORCE_EQ(
weight_dims[1], frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
if (ctx->HasInput("H0")) {
auto h0_dims = ctx->GetInputDim("H0");
PADDLE_ENFORCE_EQ(h0_dims[1], frame_size,
"The width of H0 must be equal to frame_size.");
}
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
}
ctx->SetOutputDim("BatchGate", input_dims);
ctx->SetOutputDim("BatchResetHiddenPrev", {input_dims[0], frame_size});
ctx->SetOutputDim("BatchHidden", {input_dims[0], frame_size});
ctx->SetOutputDim("Hidden", {input_dims[0], frame_size});
ctx->ShareLoD("Input", "Hidden");
}
};
class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(LoDTensor) The first input is a LodTensor, which supports "
"variable-time length input sequence. The underlying tensor in "
"this LoDTenosr is a matrix with shape (T X 3D), where, T is the "
"total time steps in this mini-batch, D is the hidden size.");
AddInput("H0",
"(Tensor, optional) The initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size, D is the hidden size.")
.AsDispensable();
AddInput(
"Weight",
"(Tensor) The learnable hidden-hidden weight matrix with shape "
"(D x 3D), where D is the hidden size. The elements continuous in "
"memory can be divided into two parts. The first part are weights of "
"the update gate and reset gate with shape (D x 2D), and the second "
"part are weights of output candidate with shape (D x D).");
AddInput("Bias",
"(Tensor, optional) Bias vector with shape (1 x 3D) concating "
"bias of the update gate, reset gate and output candidate.")
.AsDispensable();
AddOutput("BatchGate",
"(LoDTensor) To compute with batches, sequence data will be "
"reorganized into several successive batches each containing "
"data from the same time step. The LoDTensor BatchGate contains "
"the update gate, reset gate and output candidate values "
"organized in batches. The LoD size is 2. The first LoD contains "
"the batch offsets and the second LoD contains the indexes in "
"the raw sequence data.")
.AsIntermediate();
AddOutput(
"BatchResetHiddenPrev",
"(LoDTensor) The reseted hidden state LoDTensor organized in batches. "
"This LoDTensor is a matrix with shape (T X D) and has the same LoD "
"with `BatchGate`.")
.AsIntermediate();
AddOutput(
"BatchHidden",
"(LoDTensor) The hidden state LoDTensor organized in batches. "
"This LoDTensor is a matrix with shape (T X D) and has the same LoD "
"with `BatchGate`.")
.AsIntermediate();
AddOutput(
"Hidden",
"(LoDTensor) the hidden state LoDTensor organized in sequences. "
"This LoDTensor is a matrix with shape (T X D) and has the same LoD "
"with `BatchGate`.");
AddAttr<std::string>("activation",
"(string, default tanh) "
"The activation type used for output candidate {h}_t.")
.SetDefault("tanh");
AddAttr<std::string>(
"gate_activation",
"(string, default sigmoid) "
"The activation type used in update gate and reset gate.")
.SetDefault("sigmoid");
AddAttr<bool>("is_reverse",
"(bool, defalut: False) "
"whether to compute reversed GRU.")
.SetDefault(false);
AddComment(R"DOC(
GRU Operator implements part calculations of the complete GRU as following:
\f[
update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
\f]
@note To implement the complete GRU, fully-connected operator must be used
before to feed xu, xr and xc as the Input of GRU operator.
)DOC");
}
};
class GRUGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUGradOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUGradOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("BatchGate"),
"Input(%s) of GRUGradOp should not be null.", "BatchGate");
PADDLE_ENFORCE(ctx->HasInput("BatchResetHiddenPrev"),
"Input(%s) of GRUGradOp should not be null.",
"BatchResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("BatchHidden"),
"Input(%s) of GRUOp should not be null.", "BatchHidden");
PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of GRUGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of GRUGradOp should not be null.", "Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto weight_dims = ctx->GetInputDim("Weight");
int input_size = input_dims[1];
int frame_size = weight_dims[0];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
if (ctx->HasInput("H0")) {
auto h0_dims = ctx->GetInputDim("H0");
PADDLE_ENFORCE_EQ(h0_dims[1], frame_size,
"The width of H0 must be equal to frame_size.");
auto h0_grad_name = framework::GradVarName("H0");
if (ctx->HasOutput(h0_grad_name))
ctx->SetOutputDim(h0_grad_name, h0_dims);
}
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto input_grad_name = framework::GradVarName("Input");
if (ctx->HasOutput(input_grad_name))
ctx->SetOutputDim(input_grad_name, input_dims);
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(weight_grad_name))
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gru, ops::GRUOp, ops::GRUOpMaker, gru_grad, ops::GRUGradOp);
REGISTER_OP_CPU_KERNEL(gru, ops::GRUKernel<paddle::platform::CPUPlace, float>,
ops::GRUKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(gru_grad,
ops::GRUGradKernel<paddle::platform::CPUPlace, float>,
ops::GRUGradKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/gru_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(gru, ops::GRUKernel<paddle::platform::GPUPlace, float>,
ops::GRUKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(gru_grad,
ops::GRUGradKernel<paddle::platform::GPUPlace, float>,
ops::GRUGradKernel<paddle::platform::GPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/math/gru_compute.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename Place, typename T>
class GRUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
const T* h0_data = h0 ? h0->data<T>() : nullptr;
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
context.ShareLoD("Input", "Hidden");
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<Place, T> to_batch;
to_batch(context.device_context(), *input, *batch_gate, true, is_reverse);
int frame_size = hidden_dims[1];
int batch_size = hidden_dims[0];
auto g = EigenMatrix<T>::From(*batch_gate);
auto place = context.GetEigenDevice<Place>();
if (bias) {
auto b = EigenMatrix<T>::From(*bias);
g.device(place) = g +
b.reshape(Eigen::array<int, 2>({{1, frame_size * 3}}))
.broadcast(Eigen::array<int, 2>({{batch_size, 1}}));
}
math::hl_gru_value<T> gru_value;
gru_value.gateWeight = const_cast<T*>(weight_data);
gru_value.stateWeight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
gru_value.prevOutValue = const_cast<T*>(h0_data);
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.outputValue = hidden_t.data<T>();
gru_value.gateValue = gate_t.data<T>();
gru_value.resetOutputValue = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<Place, T>::compute(
context.device_context(), gru_value, frame_size, cur_batch_size,
math::ActiveType(context.Attr<std::string>("activation")),
math::ActiveType(context.Attr<std::string>("gate_activation")));
gru_value.prevOutValue = gru_value.outputValue;
}
math::Batch2LoDTensorFunctor<Place, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(context.device_context(), *batch_hidden, *hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
template <typename Place, typename T>
class GRUGradKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* h0 = context.Input<Tensor>("H0");
const T* h0_data = h0 ? h0->data<T>() : nullptr;
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* batch_gate = context.Input<LoDTensor>("BatchGate");
auto* batch_reset_hidden_prev =
context.Input<LoDTensor>("BatchResetHiddenPrev");
auto* batch_hidden = context.Input<LoDTensor>("BatchHidden");
auto* hidden = context.Input<LoDTensor>("Hidden");
auto* hidden_grad =
context.Input<LoDTensor>(framework::GradVarName("Hidden"));
auto* input_grad =
context.Output<LoDTensor>(framework::GradVarName("Input"));
auto* h0_grad = context.Output<Tensor>(framework::GradVarName("H0"));
auto* weight_grad =
context.Output<Tensor>(framework::GradVarName("Weight"));
auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias"));
auto gate_dims = batch_gate->dims();
auto hidden_dims = hidden->dims();
int frame_size = hidden_dims[1];
math::LoDTensor2BatchFunctor<Place, T> to_batch;
LoDTensor batch_hidden_grad, batch_gate_grad, batch_reset_hidden_prev_grad;
batch_hidden_grad.mutable_data<T>(hidden_dims, context.GetPlace());
batch_gate_grad.mutable_data<T>(gate_dims, context.GetPlace());
batch_reset_hidden_prev_grad.mutable_data<T>(hidden_dims,
context.GetPlace());
math::SetConstant<Place, T> zero;
zero(context.device_context(), &batch_hidden_grad, static_cast<T>(0.0));
zero(context.device_context(), &batch_gate_grad, static_cast<T>(0.0));
zero(context.device_context(), &batch_reset_hidden_prev_grad,
static_cast<T>(0.0));
bool is_reverse = context.Attr<bool>("is_reverse");
batch_hidden_grad.set_lod(batch_hidden->lod());
to_batch(context.device_context(), *hidden_grad, batch_hidden_grad, false,
is_reverse);
math::hl_gru_value<T> gru_value;
gru_value.gateWeight = const_cast<T*>(weight_data);
gru_value.stateWeight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
math::hl_gru_grad<T> gru_grad;
if (weight_grad) {
gru_grad.gateWeightGrad =
weight_grad->mutable_data<T>(context.GetPlace());
zero(context.device_context(), weight_grad, static_cast<T>(0.0));
gru_grad.stateWeightGrad =
weight_grad->data<T>() + 2 * frame_size * frame_size;
} else {
gru_grad.gateWeightGrad = nullptr;
gru_grad.stateWeightGrad = nullptr;
}
auto batch_starts = batch_hidden_grad.lod()[0];
size_t num_batch = batch_starts.size() - 1;
for (int n = static_cast<int>(num_batch) - 1; n >= 0; n--) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
gru_value.gateValue = gate_t.data<T>();
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
gru_value.resetOutputValue = reset_hidden_prev_t.data<T>();
Tensor hidden_grad_t = batch_hidden_grad.Slice(bstart, bend);
gru_grad.outputGrad = hidden_grad_t.data<T>();
Tensor gate_grad_t = batch_gate_grad.Slice(bstart, bend);
gru_grad.gateGrad = gate_grad_t.data<T>();
Tensor reset_hidden_prev_grad_t =
batch_reset_hidden_prev_grad.Slice(bstart, bend);
gru_grad.resetOutputGrad = reset_hidden_prev_grad_t.data<T>();
if (n == 0) {
gru_value.prevOutValue = const_cast<T*>(h0_data);
if (h0_grad) {
T* h0_grad_data = h0_grad->mutable_data<T>(context.GetPlace());
zero(context.device_context(), h0_grad, static_cast<T>(0.0));
gru_grad.prevOutGrad = h0_grad_data;
} else {
gru_grad.prevOutGrad = nullptr;
}
} else {
int bstart_pre = static_cast<int>(batch_starts[n - 1]);
Tensor hidden_prev_t = batch_hidden->Slice(bstart_pre, bstart);
gru_value.prevOutValue = hidden_prev_t.data<T>();
Tensor hidden_prev_grad_t = batch_hidden_grad.Slice(bstart_pre, bstart);
gru_grad.prevOutGrad = hidden_prev_grad_t.data<T>();
}
math::GRUUnitGradFunctor<Place, T>::compute(
context.device_context(), gru_value, gru_grad, frame_size,
cur_batch_size,
math::ActiveType(context.Attr<std::string>("activation")),
math::ActiveType(context.Attr<std::string>("gate_activation")));
}
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
math::Batch2LoDTensorFunctor<Place, T> to_seq;
batch_gate_grad.set_lod(batch_gate->lod());
to_seq(context.device_context(), batch_gate_grad, *input_grad);
}
if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace());
auto d_b = EigenMatrix<T>::From(*bias_grad);
auto d_g = EigenMatrix<T>::From(batch_gate_grad);
auto place = context.GetEigenDevice<Place>();
d_b.device(place) = d_g.sum(Eigen::array<int, 1>({{0}}));
}
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
} // namespace operators
} // namespace paddle
...@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("HiddenPrev", AddInput("HiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the " "(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step."); "states of previous time step.");
AddInput("Weight", AddInput(
"Weight",
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. " "(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. " "The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate " "The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are " "with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]"); "weights of output candidate with shape [frame_size, frame_size].");
AddInput("Bias", AddInput(
"(Tensor) Bias vector with shape [1, frame_size * 3] concating " "Bias",
"(Tensor) Bias vector with shape [1, frame_size * 3] concatenating "
"bias of the update gate, reset gate and output candidate.") "bias of the update gate, reset gate and output candidate.")
.AsDispensable(); .AsDispensable();
AddOutput("Gate", AddOutput("Gate",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the " "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate") "output of update gate, reset gate and output candidate.")
.AsIntermediate(); .AsIntermediate();
AddOutput("ResetHiddenPrev", AddOutput("ResetHiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the " "(Tensor) Matrix with shape [batch_size, frame_size] for the "
...@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(sigmoid) .SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu}); .InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC( AddComment(R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following: GRUUnit Operator.
\f[ This operator implements partial calculations of the GRU unit as follows:
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\ $$
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\ update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev) reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\
\f] output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev})
$$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC"); )DOC");
} }
}; };
......
...@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
"The shape is same as Input(X) and will be reused in backward.") "The shape is same as Input(X) and will be reused in backward.")
.AsIntermediate(); .AsIntermediate();
AddOutput("Out", AddOutput("Out",
"The output tensor with shape [batch_size, 1] which represents " "The output tensor with shape [batch_size, 1] "
"the huber loss."); "which represents the huber loss.");
AddAttr<AttrType>("delta", "Hyper parameter in huber loss."); AddAttr<AttrType>("delta", "Hyper parameter in huber loss.");
AddComment(R"DOC( AddComment(R"DOC(
HuberLoss Operator.
Huber loss is a loss function used in robust regression. We define X as the Huber loss is a loss function used in robust regression. We define X as the
input value and Y as the target value. Huber loss can evaluate the fitness of input value and Y as the target value. Huber loss can evaluate the fitness of
X to Y. Different from MSE loss, Huber loss is more robust for outliers. The X to Y. Different from MSE loss, Huber loss is more robust for outliers. The
......
...@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input tensor of increment operator"); AddInput("X", "(Tensor) The input tensor of increment operator");
AddOutput("Out", "(Tensor) The output tensor of increment operator."); AddOutput("Out", "(Tensor) The output tensor of increment operator.");
AddComment(R"DOC(Increment operator
The equation is: Out = X + step
)DOC");
AddAttr<AttrType>("step", AddAttr<AttrType>("step",
"(float, default 1.0) "
"The step size by which the " "The step size by which the "
"input tensor will be incremented.") "input tensor will be incremented.")
.SetDefault(1.0); .SetDefault(1.0);
AddComment(R"DOC(
Increment Operator.
The equation is:
$$Out = X + step$$
)DOC");
} }
}; };
......
...@@ -57,7 +57,7 @@ L1 Norm Operator. ...@@ -57,7 +57,7 @@ L1 Norm Operator.
Computes the L1 norm of a tensor. Computes the L1 norm of a tensor.
Out = sum (abs(X)) $$Out = \sum{|X|}$$
)DOC"); )DOC");
} }
......
...@@ -22,52 +22,55 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -22,52 +22,55 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
LinearChainCRFOpMaker(framework::OpProto* proto, LinearChainCRFOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("Emission",
"Emission", "(LoDTensor, default LoDTensor<float>) "
"(LoDTensor, default: LoDTensor<float>). " "A 2-D LoDTensor with shape [N x D], where N is the size of the "
"The unscaled emission weight matrix for the linear chain CRF. " "mini-batch and D is the total tag number. The unscaled emission "
"This input is a LoDTensor with shape [N x D] where N is the size of " "weight matrix for the linear chain CRF. ");
"the mini-batch and D is the total tag number."); AddInput("Transition",
AddInput( "(Tensor, default Tensor<float>) A 2-D Tensor with shape "
"Transition", "[(D + 2) x D]. The learnable parameter for the linear_chain_crf "
"(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. " "operator. See more details in the operator's comments.");
"The learnable parameter for the linear_chain_crf operator. " AddInput("Label",
"See more details in the operator's comments."); "(LoDTensor, default LoDTensor<int>) A LoDTensor with shape "
AddInput( "[N x 1], where N is the total element number in a mini-batch. "
"Label", "The ground truth.");
"(LoDTensor, default: LoDTensor<int>). The ground truth which is a 2-D "
"LoDTensor with shape [N x 1], where N is the total element number in "
"a mini-batch.");
AddOutput( AddOutput(
"Alpha", "Alpha",
"Tensor, default: Tensor<float>. The forward vectors for the entire " "(Tensor, default Tensor<float>) A 2-D Tensor with shape [N x D]. "
"batch. A two dimensional tensor with shape [N x D], " "The forward vectors for the entire batch. Denote it as \f$\alpha\f$. "
"denoted as \f$\alpha\f$. \f$\alpha$\f is a memo table used to " "\f$\alpha$\f is a memo table used to calculate the normalization "
"calculate the normalization factor in CRF. \f$\alpha[k, v]$\f stores " "factor in CRF. \f$\alpha[k, v]$\f stores the unnormalized "
"the unnormalized probabilites of all possible unfinished sequences of " "probabilites of all possible unfinished sequences of tags that end at "
"tags that end at position \f$k$\f with tag \f$v$\f. For each \f$k$\f, " "position \f$k$\f with tag \f$v$\f. For each \f$k$\f, "
"\f$\alpha[k, v]$\f is a vector of length \f$D$\f with a component for " "\f$\alpha[k, v]$\f is a vector of length \f$D$\f with a component for "
"each tag value \f$v$\f. This vector is called a forward vecotr and " "each tag value \f$v$\f. This vector is called a forward vecotr and "
"will also be used in backward computations.") "will also be used in backward computations.")
.AsIntermediate(); .AsIntermediate();
AddOutput("EmissionExps", AddOutput(
"EmissionExps",
"(Tensor, default Tensor<float>) A 2-D Tensor with shape [N x D]. "
"The exponentials of Input(Emission). This is an intermediate " "The exponentials of Input(Emission). This is an intermediate "
"computational result in forward computation, and will be reused " "computational result in forward computation, and will be reused in "
"in backward computation.") "backward computation.")
.AsIntermediate(); .AsIntermediate();
AddOutput("TransitionExps", AddOutput(
"The exponentials of Input(Transition). This is an intermediate " "TransitionExps",
"computational result in forward computation, and will be reused " "(Tensor, default Tensor<float>) A 2-D Tensor with shape "
"in backward computation.") "[(D + 2) x D]. The exponentials of Input(Transition). This is an "
"intermediate computational result in forward computation, and "
"will be reused in backward computation.")
.AsIntermediate(); .AsIntermediate();
AddOutput( AddOutput(
"LogLikelihood", "LogLikelihood",
"(Tensor, default: Tensor<float>). The logarithm of the conditional " "(Tensor, default Tensor<float>) The logarithm of the conditional "
"likelihood of each training sample in a mini-batch. This is a 2-D " "likelihood of each training sample in a mini-batch. This is a 2-D "
"tensor with shape [S x 1], where S is the sequence number in a " "tensor with shape [S x 1], where S is the sequence number in a "
"mini-batch. Note: S is equal to the sequence number in a mini-batch. " "mini-batch. Note: S is equal to the sequence number in a mini-batch. "
"The output is no longer a LoDTensor."); "The output is no longer a LoDTensor.");
AddComment(R"DOC( AddComment(R"DOC(
LinearChainCRF Operator.
Conditional Random Field defines an undirected probabilistic graph with nodes Conditional Random Field defines an undirected probabilistic graph with nodes
denoting random variables and edges denoting dependencies between these denoting random variables and edges denoting dependencies between these
variables. CRF learns the conditional probability \f$P(Y|X)\f$, where variables. CRF learns the conditional probability \f$P(Y|X)\f$, where
...@@ -81,29 +84,28 @@ and output must be linear sequences. Thus, the graph of such a CRF is a simple ...@@ -81,29 +84,28 @@ and output must be linear sequences. Thus, the graph of such a CRF is a simple
chain or a line, which results in the linear chain CRF. chain or a line, which results in the linear chain CRF.
This operator implements the Forward-Backward algorithm for the linear chain This operator implements the Forward-Backward algorithm for the linear chain
CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf and CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and
http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for reference. http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details.
Equation: Equation:
1. Denote Input(Emission) to this operator as \f$x\f$ here.
- Denote Input(Emission) to this operator as \f$x\f$ here. 2. The first D values of Input(Transition) to this operator are for starting
- The first D values of Input(Transition) to this operator are for starting
weights, denoted as \f$a\f$ here. weights, denoted as \f$a\f$ here.
- The next D values of Input(Transition) of this operator are for ending 3. The next D values of Input(Transition) of this operator are for ending
weights, denoted as \f$b\f$ here. weights, denoted as \f$b\f$ here.
- The remaning values of Input(Transition) are for transition weights, 4. The remaning values of Input(Transition) are for transition weights,
denoted as \f$w\f$ here. denoted as \f$w\f$ here.
- Denote Input(Label) as \f$s\f$ here. 5. Denote Input(Label) as \f$s\f$ here.
The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as: The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as:
\f$P(s) = (1/Z) exp(a_{s_1} + b_{s_L} \f$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L}
+ \sum_{l=1}^L x_{s_l} + \sum_{l=1}^L x_{s_l}
+ \sum_{l=2}^L w_{s_{l-1},s_l})\f$ + \sum_{l=2}^L w_{s_{l-1},s_l})\f$
where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over
all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight
to the linear chain CRF. to the linear chain CRF.
Finaly, the linear chain CRF operator outputs the logarithm of the conditional Finally, the linear chain CRF operator outputs the logarithm of the conditional
likelihood of each training sample in a mini-batch. likelihood of each training sample in a mini-batch.
NOTE: NOTE:
...@@ -179,8 +181,8 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { ...@@ -179,8 +181,8 @@ class LinearChainCRFOp : public framework::OperatorWithKernel {
} }
protected: protected:
// Explicitly set that the data type of output of the linear_chain_crf // Explicitly set that the data type of computation kernel of linear_chain_crf
// operator is determined by its input "Emission". // is determined by its input "Emission".
framework::DataType IndicateDataType( framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type()); return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type());
......
...@@ -134,7 +134,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -134,7 +134,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
Tensor emission_row_max; Tensor emission_row_max;
emission_row_max.mutable_data<T>( emission_row_max.mutable_data<T>(
framework::make_ddim({static_cast<int>(batch_size), 1}), framework::make_ddim({static_cast<int64_t>(batch_size), 1}),
platform::CPUPlace()); platform::CPUPlace());
auto place = ctx.GetEigenDevice<platform::CPUPlace>(); auto place = ctx.GetEigenDevice<platform::CPUPlace>();
...@@ -273,7 +273,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -273,7 +273,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
const int* lbl = label.data<int>(); const int* lbl = label.data<int>();
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
*std::max_element(lbl, lbl + seq_length), tag_num, static_cast<size_t>(*std::max_element(lbl, lbl + seq_length)), tag_num,
"An invalid tag label that execesses the largest tag number."); "An invalid tag label that execesses the largest tag number.");
// Calculate the nominator part, which depends on the label sequence. // Calculate the nominator part, which depends on the label sequence.
......
...@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
LoadOpProtoMaker(framework::OpProto *proto, LoadOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "The tensor need to be loaded"); AddOutput("Out", "(Tensor) The tensor need to be loaded");
AddComment(R"DOC(Load Operator
Load operator will load a tensor variable from disk file.
)DOC");
AddAttr<std::string>("file_path", AddAttr<std::string>("file_path",
"(string) "
"Variable will be loaded from \"file_path\".") "Variable will be loaded from \"file_path\".")
.AddCustomChecker( .AddCustomChecker(
[](const std::string &path) { return !path.empty(); }); [](const std::string &path) { return !path.empty(); });
AddComment(R"DOC(
Load Operator.
Load operator will load a tensor variable from disk file.
)DOC");
} }
}; };
} // namespace operators } // namespace operators
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
class LoDRankTableOp : public framework::OperatorBase {
public:
LoDRankTableOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
}
};
class LoDRankTableOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
LoDRankTableOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(LoDTensor) input lod tensor, must contain lod information.");
AddOutput("Out", "(LoDRankTable) The rank table of specific level.");
AddAttr<int>("level", "(int) the specific lod level to rank.")
.SetDefault(0)
.EqualGreaterThan(0);
AddComment(R"DOC(Create LoDRanTable by LoDTensor
LoD Rank Table stores the `level` of `lod` which is ordered by sequence
length in descending order. It is useful when implement dynamic RNN and is
shared by dynamic RNN memory, dynamic RNN slice input and dynamic RNN slice
output operators.
)DOC");
}
};
class LoDRankTableInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
PADDLE_ENFORCE(context->HasInput("X"), "LoDRankTable must has input X");
}
};
class LoDRankTableInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind &op_desc,
framework::BlockDescBind *block) const override {
for (auto &o : op_desc.Output("Out")) {
block->Var(o)->SetType(framework::VarDesc::LOD_RANK_TABLE);
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(lod_rank_table, paddle::operators::LoDRankTableOp,
paddle::operators::LoDRankTableOpProtoMaker,
paddle::operators::LoDRankTableInferShape,
paddle::operators::LoDRankTableInferVarType,
paddle::framework::EmptyGradOpMaker);
...@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("W", AddInput("W",
"An input represents embedding tensors," "An input represents embedding tensors, "
" which is a learnable parameter."); "which is a learnable parameter.");
AddInput("Ids", AddInput("Ids",
"An input with type int32 or int64" "An input with type int32 or int64 "
"contains the ids to be looked up in W." "contains the ids to be looked up in W. "
"Ids must be a column vector with rank = 2." "Ids must be a column vector with rank = 2. "
"The 2nd dimension size must be 1"); "The 2nd dimension size must be 1.");
AddOutput("Out", "The lookup results, which have the same type with W."); AddOutput("Out", "The lookup results, which have the same type as W.");
AddAttr<bool>("is_sparse", "Sparse update").SetDefault(false); AddAttr<bool>("is_sparse",
"(boolean, default false) "
"Sparse update")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Lookup Table Operator.
This operator is used to perform lookups on the parameter W, This operator is used to perform lookups on the parameter W,
then concatenated into a dense tensor. then concatenated into a dense tensor.
The input `Ids` can carry the LoD (Level of Details) information, The input Ids can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD with input `Ids`. or not. And the output only shares the LoD information with input Ids.
)DOC"); )DOC");
} }
}; };
......
...@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LRNOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) LRNOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", R"DOC( AddInput("X",
(Tensor) The input of LRN operator. It must be a 4D tenor with NCHW format. "(Tensor) The input of LRN operator. "
)DOC"); "It must be a 4D tenor with NCHW format.");
AddOutput("Out", AddOutput("Out",
"(Tensor) The output of LRN operator, which is also the 4D " "(Tensor) The output of LRN operator, which is also the 4D "
"tensor with NCHW format."); "tensor with NCHW format.");
AddOutput("MidOut", R"Doc( AddOutput("MidOut",
(Tensor)Middle result of lrn op.It's computed in forward process "(Tensor) Middle result of LRN operator. It's computed in "
and also used in backward process. "forward process and also used in backward process.");
)Doc");
AddAttr<int>("n",
AddAttr<int>("n", R"DOC( "(int default 5) "
(int, default 5)n is “adjacent” kernel maps at the same spatial position. "n is the \"adjacent\" kernel that maps "
)DOC") "at the same spatial position.")
.SetDefault(5) .SetDefault(5)
.GreaterThan(0); .GreaterThan(0);
AddAttr<T>("k", R"DOC( AddAttr<T>("k",
(float, default 2.0)k is the bias. "(float, default 2.0) "
)DOC") "k is the bias.")
.SetDefault(2.0) .SetDefault(2.0)
.GreaterThan(0.0); .GreaterThan(0.0);
AddAttr<T>("alpha", R"DOC( AddAttr<T>("alpha",
(float, default 0.0001)alpha is the scale number. "(float, default 0.0001) "
)DOC") "alpha is the scale number.")
.SetDefault(0.0001) .SetDefault(0.0001)
.GreaterThan(0.0); .GreaterThan(0.0);
AddAttr<T>("beta", R"DOC( AddAttr<T>("beta",
(float, default 0.75)beta is the power number. "(float, default 0.75) "
)DOC") "beta is the power number.")
.SetDefault(0.75) .SetDefault(0.75)
.GreaterThan(0.0); .GreaterThan(0.0);
AddComment(R"DOC( AddComment(R"DOC(
Local Response Normalization. Local Response Normalization Operator.
This Function comes from the paper This operator comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks". "ImageNet Classification with Deep Convolutional Neural Networks".
The original formula is: The original formula is:
Input(i, x, y) $$
Output(i, x, y) = ---------------------------------------------- Output(i, x, y) = Input(i, x, y) / \left(
-- upper k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
(k + alpha * > (Input(j, x, y))^2) ^ (beta) (Input(j, x, y))^2
-- j = lower \right)^{\beta}
$$
upper is `min(C, c + n/2)` Function implementation:
lower if `max(0, c - n/2)`
Function implementation: Inputs and outpus are in NCHW format, while input.shape.ndims() equals 4.
And dimensions 0 ~ 3 represent batch size, feature maps, rows,
and columns, respectively.
inputs and outpus is NCHW format, while input.shape.ndims() is equal 4. Input and Output in the formula above is for each map(i) of one image, and
And the meaning of each dimension(0-3) is respectively batch size, Input(i, x, y), Output(i, x, y) represents an element in an image.
feature maps, rows and columns.
Input and Output in the above formula is for each map(i) of one image, and C is the number of feature maps of one image. n is a hyper-parameter
Input(i, x, y), Output(i, x, y) represents an element in an image. configured when operator is initialized. The sum in the denominator
is the sum of the same positions in the neighboring maps.
C is the number of feature maps of one image, and n is a hyper-parameters )DOC");
is configured when Function is initialized. The sum in the denominator
is the sum of the same position in the neighboring maps.
)DOC");
} }
}; };
......
...@@ -111,7 +111,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -111,7 +111,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("H0", AddInput("H0",
"(Tensor, optional) the initial hidden state is an optional " "(Tensor, optional) the initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the " "input. This is a tensor with shape (N x D), where N is the "
"batch size, D is the hidden size.") "batch size and D is the hidden size.")
.AsDispensable(); .AsDispensable();
AddInput("C0", AddInput("C0",
"(Tensor, optional) the initial cell state is an optional " "(Tensor, optional) the initial cell state is an optional "
...@@ -141,14 +141,14 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -141,14 +141,14 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("BatchGate", AddOutput("BatchGate",
"(LoDTensor) This LoDTensor contains input gate, forget gate " "(LoDTensor) This LoDTensor contains input gate, forget gate "
"and output gate after the nonlinear computation. This " "and output gate after the nonlinear computation. This "
"LoDTensor has the same shape with the reorganized input, which " "LoDTensor has the same shape as the reorganized input, which "
"is also be called batch input. The LoD size is 2. The first " "is also be called batch input. The LoD size is 2. The first "
"LoD is the batch offsets and the second LoD contains the " "LoD is the batch offsets and the second LoD contains the "
"indexes, which denote the position of reorganized sequence " "indexes, which denote the position of reorganized sequence "
"in the raw input.") "in the raw input.")
.AsIntermediate(); .AsIntermediate();
AddOutput("BatchCellPreAct", AddOutput("BatchCellPreAct",
"(LoDTensor) This LoDTensor is got in the forward and used " "(LoDTensor) This LoDTensor is obtained in the forward and used "
"in the backward.") "in the backward.")
.AsIntermediate(); .AsIntermediate();
AddAttr<bool>("use_peepholes", AddAttr<bool>("use_peepholes",
...@@ -174,52 +174,49 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -174,52 +174,49 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
"The activation for candidate hidden state, " "The activation for candidate hidden state, "
"`tanh` by default.") "`tanh` by default.")
.SetDefault("tanh"); .SetDefault("tanh");
AddComment(R"DOC(Long-Short Term Memory (LSTM) Operator AddComment(R"DOC(
Long-Short Term Memory (LSTM) Operator.
The defalut implementation is diagonal/peephole connection [1], the formula is The defalut implementation is diagonal/peephole connection
as follows (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) $$
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) \\
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) \\
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \\
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\
c_t = f_t ⊙ c_{t-1} + i_t ⊙ \tilde{c_t} c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\
h_t = o_t ⊙ act_h(c_t) h_t = o_t \odot act_h(c_t)
$$
where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix
of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$ of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$
are diagonal weight matrices for peephole connections. In our implenmention, are diagonal weight matrices for peephole connections. In our implementation,
We use vectors to reprenset these diagonal weight matrices. The b terms we use vectors to reprenset these diagonal weight matrices. The b terms
denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$ denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$
is the non-line actications, such as logistic sigmoid function, and is the non-line activations, such as logistic sigmoid function, and
\f$i, f, o\f$ and \f$c\f$ are respectively the input gate, forget gate, \f$i, f, o\f$ and \f$c\f$ are the input gate, forget gate, output gate,
output gate and cell activation vectors, all of which are the same size as and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector \f$h\f$. the cell output activation vector \f$h\f$.
The ⊙ is the element-wise product of the vectors, \f$act_g\f$ and \f$act_h\f$ The \f$\odot\f$ is the element-wise product of the vectors. \f$act_g\f$ and \f$act_h\f$
are the cell input and cell output activation functions, `tanh` is usually are the cell input and cell output activation functions and `tanh` is usually
used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state, used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state. which is computed based on the current input and the previous hidden state.
Set `use_peepholes` False to disable peephole connection [2]. The formula Set `use_peepholes` False to disable peephole connection
(http://www.bioinf.jku.at/publications/older/2604.pdf). The formula
is omitted here. is omitted here.
@note These \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$ Note that these \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$
operations on the input x_{t} were NOT included in this operator. operations on the input \f$x_{t}\f$ are NOT included in this operator.
Users can choose to use fully-connect operator before LSTM operator. Users can choose to use fully-connect operator before LSTM operator.
[1] Hasim Sak, Andrew Senior, and Francoise Beaufays. Long short-term memory
recurrent neural network architectures for large scale acoustic modeling.
INTERSPEECH, 2014.
[2] S. Hochreiter and J. Schmidhuber. Long Short-Term Memory.
Neural Computation, 9(8):1735-1780, 1997.
)DOC"); )DOC");
} }
}; };
......
...@@ -57,17 +57,22 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -57,17 +57,22 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
"The cell state tensor of last time-step in the Lstm Unit operator."); "The cell state tensor of last time-step in the Lstm Unit operator.");
AddOutput("C", "The cell tensor of Lstm Unit operator."); AddOutput("C", "The cell tensor of Lstm Unit operator.");
AddOutput("H", "The hidden state tensor of Lstm Unit operator."); AddOutput("H", "The hidden state tensor of Lstm Unit operator.");
AddAttr<float>("forget_bias",
AddComment(R"DOC(Lstm-Unit Operator "(float, default 0.0) "
"The forget bias of Lstm Unit.")
.SetDefault(0.0);
AddComment(R"DOC(
Lstm Unit Operator
Equation: Equation:
i, f, o, j = split(X)
C = C_prev * sigm(f + forget_bias) + sigm(i) * tanh(j) $$
H = C * sigm(o) i, f, o, j = split(X) \\
C = C_{prev} * sigm(f + forget\_bias) + sigm(i) * tanh(j) \\
H = C * sigm(o)
$$
)DOC"); )DOC");
AddAttr<float>("forget_bias", "The forget bias of Lstm Unit.")
.SetDefault(0.0);
} }
}; };
......
...@@ -55,8 +55,6 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -55,8 +55,6 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
"(2-D tensor with shape [batch_size x 1]) " "(2-D tensor with shape [batch_size x 1]) "
"The label indicating X1 ranked higher than X2 or not, " "The label indicating X1 ranked higher than X2 or not, "
"can only be +1 or -1."); "can only be +1 or -1.");
AddAttr<T>("margin", "(scalar, default 0) Margin for MarginRankLossOp.")
.SetDefault(static_cast<T>(0));
AddOutput("Activated", AddOutput("Activated",
"(2-D tensor with shape [batch_size x 1]) Intermediate tensor " "(2-D tensor with shape [batch_size x 1]) Intermediate tensor "
"to indicate whether each element of Output(Out) is activated.") "to indicate whether each element of Output(Out) is activated.")
...@@ -64,23 +62,26 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -64,23 +62,26 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", AddOutput("Out",
"(2-D tensor with shape [batch_size x 1]) " "(2-D tensor with shape [batch_size x 1]) "
"The output loss of MarginRankLoss operator."); "The output loss of MarginRankLoss operator.");
AddAttr<T>("margin", "(scalar, default 0) Margin for MarginRankLossOp.")
.SetDefault(static_cast<T>(0));
AddComment(R"DOC( AddComment(R"DOC(
MarginRankLoss Operator.
MarginRankLoss operator measures the loss given a pair of training sample This operator measures the loss given a pair of training sample
{`X1`, `X2`} and the `Label` with attribute `margin`, where `Label = +1` {`X1`, `X2`} and the `Label` with attribute `margin`, where `Label = +1`
indicating X1 is ranked higher than `X2`, otherwise `Label = -1`. The loss indicating X1 is ranked higher than `X2` and `Label = -1` otherwise. The loss
turns out is calculated as:
loss(X1, X2, Label) = max(0, -Label * (X1 - X2) + margin). $loss(X1, X2, Label) = \max(0, -Label * (X1 - X2) + margin)$
The attribute `margin` involved here helps make the predictions more robust. The attribute `margin` here helps make the predictions more robust.
Denote the item ranked higher as the positive sample, otherwise the negative Denote the item ranked higher as the positive sample, otherwise the negative
sample. If the score of the two samples satisfies sample. If the score of the two samples satisfies
positive sample - negative sample < margin, $positive sample - negative sample < margin$
the pair of samples will contribute to the final loss, which will backpropogate the pair of samples will contribute to the final loss, which will backpropagate
and train the ranking model to enlarge the difference of the two score. and train the ranking model to enlarge the difference between the two scores.
For batch input with size `batch_size`, `X1`, `X2` and `Label` For batch input with size `batch_size`, `X1`, `X2` and `Label`
all have the same shape [batch_size x 1]. all have the same shape [batch_size x 1].
......
...@@ -8,20 +8,24 @@ if(WITH_GPU) ...@@ -8,20 +8,24 @@ if(WITH_GPU)
nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator) nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator)
nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator)
nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context)
nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function)
nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context) nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context)
nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context) nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context)
nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context)
nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions)
else() else()
cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator) cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator)
cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function)
cc_library(softmax SRCS softmax.cc DEPS operator) cc_library(softmax SRCS softmax.cc DEPS operator)
cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator) cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator)
cc_library(pooling SRCS pooling.cc DEPS device_context) cc_library(pooling SRCS pooling.cc DEPS device_context)
cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function)
cc_library(vol2col SRCS vol2col.cc DEPS device_context) cc_library(vol2col SRCS vol2col.cc DEPS device_context)
cc_library(context_project SRCS context_project.cc DEPS device_context) cc_library(context_project SRCS context_project.cc DEPS device_context)
cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context)
cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions)
cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function)
endif() endif()
cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <type_traits>
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/operators/math/gru_compute.h"
namespace paddle {
namespace operators {
namespace math {
namespace detail {
#ifndef __NVCC__
template <class OpResetOutput, typename T>
void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput,
T *gateValue, T *resetOutputValue,
T *prevOutputValue, int frameSize,
activation_mode_t active_gate) {
T rValueUpdateGate;
T rValueResetGate;
T rValueResetOutput;
T rPrevOut = 0;
T *updateGate = gateValue;
T *resetGate = gateValue + frameSize;
for (int i = 0; i < frameSize; i++) {
rValueUpdateGate = updateGate[i];
rValueResetGate = resetGate[i];
if (prevOutputValue) {
rPrevOut = prevOutputValue[i];
}
opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut,
rValueResetOutput, active_gate);
updateGate[i] = rValueUpdateGate;
resetGate[i] = rValueResetGate;
resetOutputValue[i] = rValueResetOutput;
}
}
template <class OpFinalOutput, typename T>
void hl_naive_gru_forward_final_output(OpFinalOutput opFinalOutput,
T *gateValue, T *prevOutputValue,
T *outputValue, int frameSize,
activation_mode_t active_node) {
T rValueUpdateGate;
T rValueFrameState;
T rPrevOut = 0;
T rOutput;
T *updateGate = gateValue;
T *frameState = gateValue + frameSize * 2;
for (int i = 0; i < frameSize; i++) {
rValueUpdateGate = updateGate[i];
rValueFrameState = frameState[i];
if (prevOutputValue) {
rPrevOut = prevOutputValue[i];
}
opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput,
active_node);
frameState[i] = rValueFrameState;
outputValue[i] = rOutput;
}
}
template <class OpResetOutput, typename T>
void hl_avx_gru_forward_reset_output(OpResetOutput opResetOutput, T *gateValue,
T *resetOutputValue, T *prevOutputValue,
int frameSize,
activation_mode_t active_gate) {
#ifdef __AVX__
__m256 rValueUpdateGate;
__m256 rValueResetGate;
__m256 rValueResetOutput;
__m256 rPrevOut = _mm256_set1_ps(0.0f);
__m256 *updateGate = (__m256 *)gateValue;
__m256 *resetGate = (__m256 *)(gateValue + frameSize);
for (int i = 0; i < frameSize / 8; i++) {
rValueUpdateGate = updateGate[i];
rValueResetGate = resetGate[i];
if (prevOutputValue) {
rPrevOut = ((__m256 *)prevOutputValue)[i];
}
opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut,
rValueResetOutput, active_gate);
updateGate[i] = rValueUpdateGate;
resetGate[i] = rValueResetGate;
((__m256 *)resetOutputValue)[i] = rValueResetOutput;
}
#endif
}
template <class OpFinalOutput, typename T>
void hl_avx_gru_forward_final_output(OpFinalOutput opFinalOutput, T *gateValue,
T *prevOutputValue, T *outputValue,
int frameSize,
activation_mode_t active_node) {
#ifdef __AVX__
__m256 rValueUpdateGate;
__m256 rValueFrameState;
__m256 rPrevOut = _mm256_set1_ps(0.0f);
__m256 rOutput;
__m256 *updateGate = (__m256 *)gateValue;
__m256 *frameState = (__m256 *)(gateValue + frameSize * 2);
for (int i = 0; i < frameSize / 8; i++) {
rValueUpdateGate = updateGate[i];
rValueFrameState = frameState[i];
if (prevOutputValue) {
rPrevOut = ((__m256 *)prevOutputValue)[i];
}
opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput,
active_node);
frameState[i] = rValueFrameState;
((__m256 *)outputValue)[i] = rOutput;
}
#endif
}
template <class OpResetOutput, typename T>
inline void forward_reset_output(OpResetOutput opResetOutput,
hl_gru_value<T> value, int frameSize,
int batchSize, activation_mode_t active_gate) {
for (int b = 0; b < batchSize; b++) {
if (OpResetOutput::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_forward_reset_output(
opResetOutput, value.gateValue, value.resetOutputValue,
value.prevOutValue, frameSize, active_gate);
} else {
hl_naive_gru_forward_reset_output(
opResetOutput, value.gateValue, value.resetOutputValue,
value.prevOutValue, frameSize, active_gate);
}
value.gateValue += frameSize * 3;
value.resetOutputValue += frameSize;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
}
}
template <class OpFinalOutput, typename T>
inline void forward_final_output(OpFinalOutput opFinalOutput,
hl_gru_value<T> value, int frameSize,
int batchSize, activation_mode_t active_node) {
for (int b = 0; b < batchSize; b++) {
if (OpFinalOutput::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_forward_final_output(opFinalOutput, value.gateValue,
value.prevOutValue, value.outputValue,
frameSize, active_node);
} else {
hl_naive_gru_forward_final_output(opFinalOutput, value.gateValue,
value.prevOutValue, value.outputValue,
frameSize, active_node);
}
value.gateValue += frameSize * 3;
value.outputValue += frameSize;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
}
}
template <class OpStateGrad, typename T>
void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *outputGrad,
int frameSize,
activation_mode_t active_node) {
T rUpdateGateValue;
T rUpdateGateGrad;
T rFrameStateValue;
T rFrameStateGrad;
T rOutGrad;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T *updateGateValue = gateValue;
T *updateGateGrad = gateGrad;
T *frameStateValue = gateValue + frameSize * 2;
T *frameStateGrad = gateGrad + frameSize * 2;
for (int i = 0; i < frameSize; i++) {
rUpdateGateValue = updateGateValue[i];
rFrameStateValue = frameStateValue[i];
rOutGrad = outputGrad[i];
if (prevOutValue) {
rPrevOutValue = prevOutValue[i];
}
if (prevOutGrad) {
rPrevOutGrad = prevOutGrad[i];
}
opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue,
rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad,
active_node);
updateGateGrad[i] = rUpdateGateGrad;
frameStateGrad[i] = rFrameStateGrad;
if (prevOutGrad) {
prevOutGrad[i] = rPrevOutGrad;
}
}
}
template <class OpResetGrad, typename T>
void hl_naive_gru_backward_reset_grad(OpResetGrad opResetGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *resetOutputGrad,
int frameSize,
activation_mode_t active_gate) {
T rUpdateGateValue;
T rUpdateGateGrad;
T rResetGateValue;
T rResetGateGrad;
T rResetOutputGrad = 0;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T *updateGateValue = gateValue;
T *updateGateGrad = gateGrad;
T *resetGateValue = gateValue + frameSize;
T *resetGateGrad = gateGrad + frameSize;
for (int i = 0; i < frameSize; i++) {
rUpdateGateValue = updateGateValue[i];
rUpdateGateGrad = updateGateGrad[i];
rResetGateValue = resetGateValue[i];
if (prevOutValue && prevOutGrad) {
rResetOutputGrad = resetOutputGrad[i];
}
if (prevOutValue) {
rPrevOutValue = prevOutValue[i];
}
if (prevOutGrad) {
rPrevOutGrad = prevOutGrad[i];
}
opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue,
rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad,
active_gate);
updateGateGrad[i] = rUpdateGateGrad;
resetGateGrad[i] = rResetGateGrad;
if (prevOutGrad) {
prevOutGrad[i] = rPrevOutGrad;
}
}
}
template <class OpStateGrad, typename T>
void hl_avx_gru_backward_state_grad(OpStateGrad opStateGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *outputGrad,
int frameSize,
activation_mode_t active_node) {
#ifdef __AVX__
__m256 rUpdateGateValue;
__m256 rUpdateGateGrad;
__m256 rFrameStateValue;
__m256 rFrameStateGrad;
__m256 rOutGrad;
__m256 rPrevOutValue = _mm256_set1_ps(0.0f);
__m256 rPrevOutGrad = _mm256_set1_ps(0.0f);
__m256 *updateGateValue = (__m256 *)gateValue;
__m256 *updateGateGrad = (__m256 *)gateGrad;
__m256 *frameStateValue = (__m256 *)(gateValue + frameSize * 2);
__m256 *frameStateGrad = (__m256 *)(gateGrad + frameSize * 2);
for (int i = 0; i < frameSize / 8; i++) {
rUpdateGateValue = updateGateValue[i];
rFrameStateValue = frameStateValue[i];
rOutGrad = ((__m256 *)outputGrad)[i];
if (prevOutValue) {
rPrevOutValue = ((__m256 *)prevOutValue)[i];
}
if (prevOutGrad) {
rPrevOutGrad = ((__m256 *)prevOutGrad)[i];
}
opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue,
rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad,
active_node);
updateGateGrad[i] = rUpdateGateGrad;
frameStateGrad[i] = rFrameStateGrad;
if (prevOutGrad) {
((__m256 *)prevOutGrad)[i] = rPrevOutGrad;
}
}
#endif
}
template <class OpResetGrad, typename T>
void hl_avx_gru_backward_reset_grad(OpResetGrad opResetGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *resetOutputGrad,
int frameSize,
activation_mode_t active_gate) {
#ifdef __AVX__
__m256 rUpdateGateValue;
__m256 rUpdateGateGrad;
__m256 rResetGateValue;
__m256 rResetGateGrad;
__m256 rResetOutputGrad = _mm256_set1_ps(0.0f);
__m256 rPrevOutValue = _mm256_set1_ps(0.0f);
__m256 rPrevOutGrad = _mm256_set1_ps(0.0f);
__m256 *updateGateValue = (__m256 *)gateValue;
__m256 *updateGateGrad = (__m256 *)gateGrad;
__m256 *resetGateValue = (__m256 *)(gateValue + frameSize);
__m256 *resetGateGrad = (__m256 *)(gateGrad + frameSize);
for (int i = 0; i < frameSize / 8; i++) {
rUpdateGateValue = updateGateValue[i];
rUpdateGateGrad = updateGateGrad[i];
rResetGateValue = resetGateValue[i];
if (prevOutValue && prevOutGrad) {
rResetOutputGrad = ((__m256 *)resetOutputGrad)[i];
}
if (prevOutValue) {
rPrevOutValue = ((__m256 *)prevOutValue)[i];
}
if (prevOutGrad) {
rPrevOutGrad = ((__m256 *)prevOutGrad)[i];
}
opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue,
rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad,
active_gate);
updateGateGrad[i] = rUpdateGateGrad;
resetGateGrad[i] = rResetGateGrad;
if (prevOutGrad) {
((__m256 *)prevOutGrad)[i] = rPrevOutGrad;
}
}
#endif
}
template <class OpStateGrad, typename T>
inline void backward_state_grad(OpStateGrad opStateGrad, hl_gru_value<T> value,
hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node) {
for (int b = 0; b < batchSize; b++) {
if (OpStateGrad::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_backward_state_grad(
opStateGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.outputGrad, frameSize, active_node);
} else {
hl_naive_gru_backward_state_grad(
opStateGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.outputGrad, frameSize, active_node);
}
value.gateValue += frameSize * 3;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
grad.gateGrad += frameSize * 3;
grad.outputGrad += frameSize;
if (grad.prevOutGrad) {
grad.prevOutGrad += frameSize;
}
}
}
template <class OpResetGrad, typename T>
inline void backward_reset_grad(OpResetGrad opResetGrad, hl_gru_value<T> value,
hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_gate) {
for (int b = 0; b < batchSize; b++) {
if (OpResetGrad::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_backward_reset_grad(
opResetGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.resetOutputGrad, frameSize, active_gate);
} else {
hl_naive_gru_backward_reset_grad(
opResetGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.resetOutputGrad, frameSize, active_gate);
}
value.gateValue += frameSize * 3;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
grad.gateGrad += frameSize * 3;
grad.resetOutputGrad += frameSize;
if (grad.prevOutGrad) {
grad.prevOutGrad += frameSize;
}
}
}
#endif
} // namespace detail
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <type_traits>
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/operators/math/gru_compute.h"
#include "paddle/platform/cuda_helper.h"
#include "paddle/platform/device_context.h"
#include <glog/logging.h>
namespace paddle {
namespace operators {
namespace math {
namespace detail {
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpResetOutput, bool isBatch, typename T>
__global__ void KeGruForwardResetOutput(OpResetOutput opResetOutput,
T *gateValue, T *resetOutputValue,
T *prevOutputValue, int frameSize,
int batchSize,
activation_mode_t active_gate) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
resetOutputValue += batchIdx * frameSize;
}
T rPrevOut = 0;
T rValueResetOutput;
T rValueUpdateGate = gateValue[frameIdx + frameSize * 0];
T rValueResetGate = gateValue[frameIdx + frameSize * 1];
if (prevOutputValue) {
if (isBatch) prevOutputValue += batchIdx * frameSize;
rPrevOut = prevOutputValue[frameIdx];
}
opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut, rValueResetOutput,
active_gate);
gateValue[frameIdx + frameSize * 0] = rValueUpdateGate;
gateValue[frameIdx + frameSize * 1] = rValueResetGate;
resetOutputValue[frameIdx] = rValueResetOutput;
}
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpFinalOutput, bool isBatch, typename T>
__global__ void KeGruForwardFinalOutput(OpFinalOutput opFinalOutput,
T *gateValue, T *prevOutputValue,
T *outputValue, int frameSize,
int batchSize,
activation_mode_t active_node) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
outputValue += batchIdx * frameSize;
}
T rOutput;
T rPrevOut = 0;
T rValueUpdateGate = gateValue[frameIdx + frameSize * 0];
T rValueFrameState = gateValue[frameIdx + frameSize * 2];
if (prevOutputValue) {
if (isBatch) prevOutputValue += batchIdx * frameSize;
rPrevOut = prevOutputValue[frameIdx];
}
opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput,
active_node);
gateValue[frameIdx + frameSize * 2] = rValueFrameState;
outputValue[frameIdx] = rOutput;
}
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpStateGrad, bool isBatch, typename T>
__global__ void KeGruBackwardStateGrad(OpStateGrad opStateGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *outputGrad,
int frameSize, int batchSize,
activation_mode_t active_node) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
gateGrad += batchIdx * 3 * frameSize;
outputGrad += batchIdx * frameSize;
}
T rUpdateGateGrad;
T rFrameStateGrad;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T rUpdateGateValue = gateValue[frameIdx + frameSize * 0];
T rFrameStateValue = gateValue[frameIdx + frameSize * 2];
T rOutGrad = outputGrad[frameIdx];
if (prevOutValue && prevOutGrad) {
if (isBatch) prevOutValue += batchIdx * frameSize;
rPrevOutValue = prevOutValue[frameIdx];
if (isBatch) prevOutGrad += batchIdx * frameSize;
rPrevOutGrad = prevOutGrad[frameIdx];
}
opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue,
rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad,
active_node);
gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad;
gateGrad[frameIdx + frameSize * 2] = rFrameStateGrad;
if (prevOutGrad) {
prevOutGrad[frameIdx] = rPrevOutGrad;
}
}
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpResetGrad, bool isBatch, typename T>
__global__ void KeGruBackwardResetGrad(OpResetGrad opResetGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *resetOutputGrad,
int frameSize, int batchSize,
activation_mode_t active_gate) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
gateGrad += batchIdx * 3 * frameSize;
resetOutputGrad += batchIdx * frameSize;
}
T rResetGateGrad;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T rResetOutputGrad = 0;
T rUpdateGateValue = gateValue[frameIdx + frameSize * 0];
T rUpdateGateGrad = gateGrad[frameIdx + frameSize * 0];
T rResetGateValue = gateValue[frameIdx + frameSize * 1];
if (prevOutValue && prevOutGrad) {
if (isBatch) prevOutValue += batchIdx * frameSize;
if (isBatch) prevOutGrad += batchIdx * frameSize;
rPrevOutValue = prevOutValue[frameIdx];
rPrevOutGrad = prevOutGrad[frameIdx];
rResetOutputGrad = resetOutputGrad[frameIdx];
}
opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue,
rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad,
active_gate);
gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad;
gateGrad[frameIdx + frameSize * 1] = rResetGateGrad;
if (prevOutGrad) {
prevOutGrad[frameIdx] = rPrevOutGrad;
}
}
} // namespace detail
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/platform/hostdevice.h"
#include <type_traits>
// TODO(guosheng): refine code style in gru_kernel
namespace paddle {
namespace operators {
namespace math {
namespace detail {
namespace forward {
template <typename T>
class gru_resetOutput {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &valueResetGate, T &prevOut,
T &valueResetOutput, activation_mode_t actGate) {
valueUpdateGate = activation(valueUpdateGate, actGate);
valueResetGate = activation(valueResetGate, actGate);
valueResetOutput = prevOut * valueResetGate;
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &valueResetGate,
__m256 &prevOut, __m256 &valueResetOutput,
activation_mode_t actGate) {
valueUpdateGate = activation(valueUpdateGate, actGate);
valueResetGate = activation(valueResetGate, actGate);
valueResetOutput = _mm256_mul_ps(prevOut, valueResetGate);
}
#endif
#endif
};
template <typename T>
class gru_finalOutput {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &valueFrameState, T &prevOut,
T &valueOutput, activation_mode_t actInput) {
valueFrameState = activation(valueFrameState, actInput);
valueOutput = prevOut - (valueUpdateGate * prevOut) +
(valueUpdateGate * valueFrameState);
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &valueFrameState,
__m256 &prevOut, __m256 &valueOutput,
activation_mode_t actInput) {
valueFrameState = activation(valueFrameState, actInput);
valueOutput = _mm256_add_ps(
_mm256_sub_ps(prevOut, _mm256_mul_ps(valueUpdateGate, prevOut)),
_mm256_mul_ps(valueUpdateGate, valueFrameState));
}
#endif
#endif
};
} // namespace forward
namespace backward {
template <typename T>
class gru_stateGrad {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &gradUpdateGate,
T &valueFrameState, T &gradFrameState,
T &valuePrevOut, T &gradPrevOut, T &gradOutput,
activation_mode_t actInput) {
gradUpdateGate = (gradOutput * valueFrameState);
gradUpdateGate -= (gradOutput * valuePrevOut);
gradPrevOut -= (gradOutput * valueUpdateGate);
gradPrevOut += gradOutput;
gradFrameState =
activation(gradOutput * valueUpdateGate, valueFrameState, actInput);
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &gradUpdateGate,
__m256 &valueFrameState, __m256 &gradFrameState,
__m256 &valuePrevOut, __m256 &gradPrevOut,
__m256 &gradOutput, activation_mode_t actInput) {
gradUpdateGate = _mm256_mul_ps(gradOutput, valueFrameState);
gradUpdateGate =
_mm256_sub_ps(gradUpdateGate, _mm256_mul_ps(gradOutput, valuePrevOut));
gradPrevOut = _mm256_add_ps(
_mm256_sub_ps(gradPrevOut, _mm256_mul_ps(gradOutput, valueUpdateGate)),
gradOutput);
gradFrameState = activation(_mm256_mul_ps(gradOutput, valueUpdateGate),
valueFrameState, actInput);
}
#endif
#endif
};
template <typename T>
class gru_resetGrad {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &gradUpdateGate,
T &valueResetGate, T &gradResetGate,
T &valuePrevOut, T &gradPrevOut,
T &gradResetOutput, activation_mode_t actGate) {
gradResetGate = (gradResetOutput * valuePrevOut);
gradPrevOut += (gradResetOutput * valueResetGate);
gradUpdateGate = activation(gradUpdateGate, valueUpdateGate, actGate);
gradResetGate = activation(gradResetGate, valueResetGate, actGate);
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &gradUpdateGate,
__m256 &valueResetGate, __m256 &gradResetGate,
__m256 &valuePrevOut, __m256 &gradPrevOut,
__m256 &gradResetOutput,
activation_mode_t actGate) {
gradResetGate = _mm256_mul_ps(gradResetOutput, valuePrevOut);
gradPrevOut = _mm256_add_ps(gradPrevOut,
_mm256_mul_ps(gradResetOutput, valueResetGate));
gradUpdateGate = activation(gradUpdateGate, valueUpdateGate, actGate);
gradResetGate = activation(gradResetGate, valueResetGate, actGate);
}
#endif
#endif
};
} // namespace backward
} // namespace detail
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/gru_compute.h"
#include "paddle/operators/math/detail/gru_cpu_kernel.h"
#include "paddle/operators/math/detail/gru_kernel.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
struct GRUUnitFunctor<platform::CPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, int frameSize, int batchSize,
activation_mode_t active_node,
activation_mode_t active_gate) {
#ifndef __NVCC__
if (value.prevOutValue) {
math::gemm<platform::CPUPlace, T>(
context, false, false, batchSize, frameSize * 2, frameSize, 1,
value.prevOutValue, frameSize, value.gateWeight, frameSize * 2, 1,
value.gateValue, frameSize * 3);
}
detail::forward_reset_output(detail::forward::gru_resetOutput<T>(), value,
frameSize, batchSize, active_gate);
if (value.prevOutValue) {
math::gemm<platform::CPUPlace, T>(
context, false, false, batchSize, frameSize, frameSize, 1,
value.resetOutputValue, frameSize, value.stateWeight, frameSize, 1,
value.gateValue + frameSize * 2, frameSize * 3);
}
detail::forward_final_output(detail::forward::gru_finalOutput<T>(), value,
frameSize, batchSize, active_node);
#endif
}
};
template <typename T>
struct GRUUnitGradFunctor<platform::CPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node,
activation_mode_t active_gate) {
#ifndef __NVCC__
detail::backward_state_grad(detail::backward::gru_stateGrad<T>(), value,
grad, frameSize, batchSize, active_node);
if (value.prevOutValue && grad.prevOutGrad) {
math::gemm<platform::CPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize, 1,
grad.gateGrad + frameSize * 2, frameSize * 3, value.stateWeight,
frameSize, 0, grad.resetOutputGrad, frameSize);
if (grad.stateWeightGrad) {
math::gemm<platform::CPUPlace, T>(
context, true, false, frameSize, frameSize, batchSize, 1,
value.resetOutputValue, frameSize, grad.gateGrad + frameSize * 2,
frameSize * 3, 1, grad.stateWeightGrad, frameSize);
}
}
detail::backward_reset_grad(detail::backward::gru_resetGrad<T>(), value,
grad, frameSize, batchSize, active_gate);
if (grad.prevOutGrad && value.prevOutValue) {
math::gemm<platform::CPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize * 2, 1,
grad.gateGrad, frameSize * 3, value.gateWeight, frameSize * 2, 1,
grad.prevOutGrad, frameSize);
if (grad.gateWeightGrad) {
math::gemm<platform::CPUPlace, T>(
context, true, false, frameSize, frameSize * 2, batchSize, 1,
value.prevOutValue, frameSize, grad.gateGrad, frameSize * 3, 1,
grad.gateWeightGrad, frameSize * 2);
}
}
#endif
}
};
template struct GRUUnitFunctor<platform::CPUPlace, float>;
template struct GRUUnitFunctor<platform::CPUPlace, double>;
template struct GRUUnitGradFunctor<platform::CPUPlace, float>;
template struct GRUUnitGradFunctor<platform::CPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/detail/gru_gpu_kernel.h"
#include "paddle/operators/math/detail/gru_kernel.h"
#include "paddle/operators/math/gru_compute.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
struct GRUUnitFunctor<platform::GPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, int frameSize, int batchSize,
activation_mode_t active_node,
activation_mode_t active_gate) {
auto stream =
reinterpret_cast<const platform::CUDADeviceContext &>(context).stream();
dim3 threads;
dim3 grid;
if (batchSize == 1) {
int framePerBlock = frameSize <= 1024 ? frameSize : 1024;
int frameBlocks = (frameSize + 1024 - 1) / 1024;
threads = dim3(framePerBlock, 1);
grid = dim3(frameBlocks, 1);
} else {
threads = dim3(32, 32);
grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
}
if (value.prevOutValue) {
math::gemm<platform::GPUPlace, T>(
context, false, false, batchSize, frameSize * 2, frameSize, 1,
value.prevOutValue, frameSize, value.gateWeight, frameSize * 2, 1,
value.gateValue, frameSize * 3);
}
if (batchSize == 1) {
detail::KeGruForwardResetOutput<detail::forward::gru_resetOutput<T>,
/* isBatch= */ false,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_resetOutput<T>(), value.gateValue,
value.resetOutputValue, value.prevOutValue, frameSize, batchSize,
active_gate);
} else {
detail::KeGruForwardResetOutput<detail::forward::gru_resetOutput<T>,
/* isBatch= */ true,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_resetOutput<T>(), value.gateValue,
value.resetOutputValue, value.prevOutValue, frameSize, batchSize,
active_gate);
}
if (value.prevOutValue) {
math::gemm<platform::GPUPlace, T>(
context, false, false, batchSize, frameSize, frameSize, 1,
value.resetOutputValue, frameSize, value.stateWeight, frameSize, 1,
value.gateValue + frameSize * 2, frameSize * 3);
}
if (batchSize == 1) {
detail::KeGruForwardFinalOutput<detail::forward::gru_finalOutput<T>,
/* isBatch= */ false,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_finalOutput<T>(), value.gateValue,
value.prevOutValue, value.outputValue, frameSize, batchSize,
active_node);
} else {
detail::KeGruForwardFinalOutput<detail::forward::gru_finalOutput<T>,
/* isBatch= */ true,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_finalOutput<T>(), value.gateValue,
value.prevOutValue, value.outputValue, frameSize, batchSize,
active_node);
}
}
};
template <typename T>
struct GRUUnitGradFunctor<platform::GPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node,
activation_mode_t active_gate) {
auto stream =
reinterpret_cast<const platform::CUDADeviceContext &>(context).stream();
dim3 threads;
dim3 grid;
if (batchSize == 1) {
int framePerBlock = frameSize <= 1024 ? frameSize : 1024;
int frameBlocks = (frameSize + 1024 - 1) / 1024;
threads = dim3(framePerBlock, 1);
grid = dim3(frameBlocks, 1);
} else {
threads = dim3(32, 32);
grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
}
if (batchSize == 1) {
detail::KeGruBackwardStateGrad<
detail::backward::gru_stateGrad<T>,
/* isBatch= */ false><<<grid, threads, 0, stream>>>(
detail::backward::gru_stateGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.outputGrad, frameSize,
batchSize, active_node);
} else {
detail::KeGruBackwardStateGrad<
detail::backward::gru_stateGrad<T>,
/* isBatch= */ true><<<grid, threads, 0, stream>>>(
detail::backward::gru_stateGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.outputGrad, frameSize,
batchSize, active_node);
}
if (value.prevOutValue && grad.prevOutGrad) {
math::gemm<platform::GPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize, 1,
grad.gateGrad + frameSize * 2, frameSize * 3, value.stateWeight,
frameSize, 0, grad.resetOutputGrad, frameSize);
if (grad.stateWeightGrad) {
math::gemm<platform::GPUPlace, T>(
context, true, false, frameSize, frameSize, batchSize, 1,
value.resetOutputValue, frameSize, grad.gateGrad + frameSize * 2,
frameSize * 3, 1, grad.stateWeightGrad, frameSize);
}
}
if (batchSize == 1) {
detail::KeGruBackwardResetGrad<
detail::backward::gru_resetGrad<T>,
/* isBatch= */ false><<<grid, threads, 0, stream>>>(
detail::backward::gru_resetGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.resetOutputGrad, frameSize,
batchSize, active_gate);
} else {
detail::KeGruBackwardResetGrad<
detail::backward::gru_resetGrad<T>,
/* isBatch= */ true><<<grid, threads, 0, stream>>>(
detail::backward::gru_resetGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.resetOutputGrad, frameSize,
batchSize, active_gate);
}
if (grad.prevOutGrad && value.prevOutValue) {
math::gemm<platform::GPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize * 2, 1,
grad.gateGrad, frameSize * 3, value.gateWeight, frameSize * 2, 1,
grad.prevOutGrad, frameSize);
if (grad.gateWeightGrad) {
math::gemm<platform::GPUPlace, T>(
context, true, false, frameSize, frameSize * 2, batchSize, 1,
value.prevOutValue, frameSize, grad.gateGrad, frameSize * 3, 1,
grad.gateWeightGrad, frameSize * 2);
}
}
}
};
template struct GRUUnitFunctor<platform::GPUPlace, float>;
template struct GRUUnitFunctor<platform::GPUPlace, double>;
template struct GRUUnitGradFunctor<platform::GPUPlace, float>;
template struct GRUUnitGradFunctor<platform::GPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/math/lstm_compute.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace operators {
namespace math {
// TODO(guosheng): refine code style in gru_compute
template <typename T>
struct hl_gru_value {
T *gateWeight;
T *stateWeight;
T *gateValue;
T *resetOutputValue;
T *outputValue;
T *prevOutValue;
};
template <typename T>
struct hl_gru_grad {
T *gateWeightGrad;
T *stateWeightGrad;
T *gateGrad;
T *resetOutputGrad;
T *outputGrad;
T *prevOutGrad;
};
template <typename Place, typename T>
struct GRUUnitFunctor {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, int frameSize, int batchSize,
activation_mode_t active_node,
activation_mode_t active_gate);
};
template <typename Place, typename T>
struct GRUUnitGradFunctor {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node,
activation_mode_t active_gate);
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/sequence_pooling.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
class MaxSeqPoolFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::LoDTensor& input, framework::Tensor* output,
framework::Tensor* index) {
auto in_dims = input.dims();
auto out_dims = output->dims();
auto idx_dims = index->dims();
PADDLE_ENFORCE_GT(in_dims.size(), 1);
PADDLE_ENFORCE_GT(out_dims.size(), 1);
for (int64_t i = 1; i < in_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, out_dims);
auto starts = input.lod()[0];
const T* in_data = input.data<T>();
T* out_data = output->data<T>();
int* max_index = index->data<int>();
int64_t num_seq = out_dims[0];
int64_t dim = output->numel() / num_seq;
for (int64_t i = 0; i < num_seq; ++i) {
for (int64_t k = 0; k < dim; ++k) {
out_data[i * dim + k] = in_data[starts[i] * dim + k];
max_index[i * dim + k] = starts[i];
}
for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) {
for (int64_t k = 0; k < dim; ++k) {
if (in_data[j * dim + k] > out_data[i * dim + k]) {
out_data[i * dim + k] = in_data[j * dim + k];
max_index[i * dim + k] = j;
}
}
}
}
}
};
template <typename T>
class MaxSeqPoolGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& out_grad,
const framework::Tensor& index,
framework::LoDTensor* in_grad) {
auto og_dims = out_grad.dims();
auto ig_dims = in_grad->dims();
auto idx_dims = index.dims();
PADDLE_ENFORCE_GT(og_dims.size(), 1);
PADDLE_ENFORCE_GT(ig_dims.size(), 1);
for (int64_t i = 1; i < og_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, og_dims);
const T* og_data = out_grad.data<T>();
const int* max_index = index.data<int>();
T* ig_data = in_grad->data<T>();
SetConstant<platform::CPUPlace, T> set_zero;
set_zero(context, in_grad, static_cast<T>(0.0));
int64_t num_seq = og_dims[0];
int64_t dim = out_grad.numel() / num_seq;
for (int64_t i = 0; i < num_seq; ++i) {
for (int64_t j = 0; j < dim; ++j) {
int step_id = max_index[i * dim + j];
ig_data[step_id * dim + j] = og_data[i * dim + j];
}
}
}
};
template class MaxSeqPoolFunctor<platform::CPUPlace, float>;
template class MaxSeqPoolFunctor<platform::CPUPlace, double>;
template class MaxSeqPoolGradFunctor<platform::CPUPlace, float>;
template class MaxSeqPoolGradFunctor<platform::CPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence_pooling.h"
namespace paddle {
namespace operators {
namespace math {
#define FLT_MAX __FLT_MAX__
template <typename T>
__global__ void KeMaxSequencePool(const T* input, const size_t* starts,
T* output, int* index, int64_t num_seq,
int64_t dim) {
int dim_idx = threadIdx.x;
int seq_id = blockIdx.x;
if (seq_id >= num_seq) return;
size_t start = starts[seq_id];
size_t end = starts[seq_id + 1];
for (int64_t i = dim_idx; i < dim; i += blockDim.x) {
T max_val = static_cast<T>(-FLT_MAX);
int max_id = -1;
for (size_t step_id = start; step_id < end; step_id++) {
if (max_val < input[step_id * dim + i]) {
max_val = input[step_id * dim + i];
max_id = step_id;
}
}
output[seq_id * dim + i] = max_val;
index[seq_id * dim + i] = max_id;
}
}
template <typename T>
class MaxSeqPoolFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::LoDTensor& input, framework::Tensor* output,
framework::Tensor* index) {
auto in_dims = input.dims();
auto out_dims = output->dims();
auto idx_dims = index->dims();
PADDLE_ENFORCE_GT(in_dims.size(), static_cast<int64_t>(1));
PADDLE_ENFORCE_GT(out_dims.size(), 1);
for (int64_t i = 1; i < in_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, out_dims);
auto starts = input.lod()[0];
const T* in_data = input.data<T>();
T* out_data = output->data<T>();
int* max_index = index->data<int>();
int64_t num_seq = out_dims[0];
int64_t dim = output->numel() / num_seq;
dim3 threads(256, 1);
dim3 grid(num_seq, 1);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(context).stream();
KeMaxSequencePool<T><<<grid, threads, 0, stream>>>(
in_data, starts.data(), out_data, max_index, num_seq, dim);
}
};
template <typename T>
__global__ void KeMaxSequencePoolGrad(const T* out_grad, const int* max_index,
T* in_grad, int64_t num_seq,
int64_t dim) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
int col_idx = idx % dim;
if (idx < num_seq * dim) {
int step_id = max_index[idx];
in_grad[step_id * dim + col_idx] = out_grad[idx];
}
}
template <typename T>
class MaxSeqPoolGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& out_grad,
const framework::Tensor& index,
framework::LoDTensor* in_grad) {
auto og_dims = out_grad.dims();
auto idx_dims = index.dims();
auto ig_dims = in_grad->dims();
PADDLE_ENFORCE_GT(og_dims.size(), static_cast<int64_t>(1));
PADDLE_ENFORCE_GT(ig_dims.size(), static_cast<int64_t>(1));
for (int64_t i = 1; i < og_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, og_dims);
const T* og_data = out_grad.data<T>();
const int* max_index = index.data<int>();
T* ig_data = in_grad->data<T>();
SetConstant<platform::GPUPlace, T> set_zero;
set_zero(context, in_grad, static_cast<T>(0.0));
int64_t num_seq = og_dims[0];
int64_t dim = out_grad.numel() / num_seq;
unsigned int blocks = (num_seq * dim + 128 - 1) / 128;
dim3 threads(128, 1);
dim3 grid(blocks, 1);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(context).stream();
KeMaxSequencePoolGrad<T><<<grid, threads, 0, stream>>>(
og_data, max_index, ig_data, num_seq, dim);
}
};
template class MaxSeqPoolFunctor<platform::GPUPlace, float>;
template class MaxSeqPoolFunctor<platform::GPUPlace, double>;
template class MaxSeqPoolGradFunctor<platform::GPUPlace, float>;
template class MaxSeqPoolGradFunctor<platform::GPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
#define FLT_MAX __FLT_MAX__
template <typename Place, typename T>
class MaxSeqPoolFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::LoDTensor& input, framework::Tensor* output,
framework::Tensor* index);
};
template <typename Place, class T>
class MaxSeqPoolGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& out_grad,
const framework::Tensor& index,
framework::LoDTensor* in_grad);
};
} // namespace math
} // namespace operators
} // namespace paddle
...@@ -144,7 +144,10 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -144,7 +144,10 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
)DOC") )DOC")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
The MatMul operator is used to perform (batched) matrix multiplication MatMul Operator.
This operator is used to perform (batched) matrix multiplication
over the last two dimensions of the input tensors `X` and `Y`. over the last two dimensions of the input tensors `X` and `Y`.
If a transpose flag is specified, the last two dimensions of the If a transpose flag is specified, the last two dimensions of the
...@@ -166,7 +169,8 @@ The differences are: ...@@ -166,7 +169,8 @@ The differences are:
- We add `transpose_X` and `transpose_Y` flags. - We add `transpose_X` and `transpose_Y` flags.
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -36,7 +36,11 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -36,7 +36,11 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op"); AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op"); AddOutput("Out", "The output of mean op");
AddComment(R"DOC( Mean Operator AddComment(R"DOC(
Mean Operator.
Out is a scalar which is the mean of all elements in X.
)DOC"); )DOC");
} }
}; };
......
...@@ -52,14 +52,16 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -52,14 +52,16 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Y", "The right tensor of minus operator."); AddInput("Y", "The right tensor of minus operator.");
AddOutput("Out", "The output tensor of minus operator."); AddOutput("Out", "The output tensor of minus operator.");
AddComment(R"DOC(Minus Operator AddComment(R"DOC(
Minus Operator.
Equation: Equation:
Out = X - Y $Out = X - Y$
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -43,27 +43,35 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -43,27 +43,35 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input tensor of modified huber loss op." "The input tensor of modified huber loss op. "
"X is 2-D tensor with shape [batch_size, 1]."); "X is 2-D tensor with shape [batch_size, 1].");
AddInput("Y", AddInput("Y",
"The target labels of modified huber loss op." "The target labels of modified huber loss op. "
"The shape of Y is same as X. Values of Y must be 0 or 1."); "The shape of Y is the same as X. Values of Y must be 0 or 1.");
AddOutput("IntermediateVal", AddOutput("IntermediateVal",
"Variable to save intermediate result which will be reused in " "Variable to save intermediate result which will be reused in "
"backward processing.") "backward processing.")
.AsIntermediate(); .AsIntermediate();
AddOutput("Out", "Classification loss for X."); AddOutput("Out", "Classification loss for X.");
AddComment(R"DOC( AddComment(R"DOC(
Modified huber loss is used in binary classification problem. The shape of Modified Huber Loss Operator.
input X and target Y are both [N, 1] and so is the shape of output loss.
Since target Y is not differentiable, cacluating gradient for Y is illegal. This operator is used in binary classification problem. The shape of
The formulation of modified huber loss is: input X and target Y are both [N, 1] and so is the shape of the output loss.
Since target Y is not differentiable, calculating gradient for Y is illegal.
L(y, f(x)) = max(0, 1 - yf(x))^2 for yf(x) >= -1, The formula of modified huber loss is:
-4yf(x) otherwise.
$$
Make sure the values of target label Y are in {0, 1} here. The operator will L(y, f(x)) =
\begin{cases}
(\max(0, 1 - yf(x)))^2, \text{if} \ yf(x) >= -1 \\
-4yf(x), \quad \text{otherwise}
\end{cases}
$$
Make sure the values of target label Y are in {0, 1} here. This operator will
scale values of Y to {-1, +1} when computing losses and gradients. scale values of Y to {-1, +1} when computing losses and gradients.
)DOC"); )DOC");
} }
}; };
......
...@@ -75,17 +75,23 @@ class MomentumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -75,17 +75,23 @@ class MomentumOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("VelocityOut", "(Tensor) Output updated velocity"); AddOutput("VelocityOut", "(Tensor) Output updated velocity");
AddAttr<float>("mu", "(float) Momentum coefficient"); AddAttr<float>("mu", "(float) Momentum coefficient");
AddAttr<bool>("useNesterov", "(bool) Use Nesterov Momentum") AddAttr<bool>("useNesterov",
"(bool, default false) "
"Use Nesterov Momentum")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Momentum Optimizer.
Momentum Algorithm with a flag for Nestrov Moemntum (momentum).
This optimizer has a flag for Nestrov Momentum.
velocity = mu * velocity + gradient The update equations are as follows:
if (use_nesterov):
param = param - gradient * learning_rate + mu * velocity * learning_rate $$
else: velocity = mu * velocity + gradient \\
param = param - learning_rate * velocity if (use\_nesterov): \\
param = param - gradient * learning\_rate + mu * velocity * learning\_rate \\
else: \\
param = param - learning\_rate * velocity. \\
$$
)DOC"); )DOC");
} }
......
...@@ -78,6 +78,7 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -78,6 +78,7 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "The output of mul op"); AddOutput("Out", "The output of mul op");
AddAttr<int>( AddAttr<int>(
"x_num_col_dims", "x_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `X`, R"DOC(mul_op can take tensors with more than two dimensions as input `X`,
in that case, tensors will be reshaped to a matrix. The matrix's first in that case, tensors will be reshaped to a matrix. The matrix's first
dimension(column length) will be the product of tensor's last dimension(column length) will be the product of tensor's last
...@@ -88,20 +89,24 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -88,20 +89,24 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
.EqualGreaterThan(1); .EqualGreaterThan(1);
AddAttr<int>( AddAttr<int>(
"y_num_col_dims", "y_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `Y`, R"DOC(mul_op can take tensors with more than two dimensions as input `Y`,
in that case, tensors will be reshaped to a matrix. Just like input `X`. in that case, tensors will be reshaped to a matrix. Just like input `X`.
)DOC") )DOC")
.SetDefault(1) .SetDefault(1)
.EqualGreaterThan(1); .EqualGreaterThan(1);
AddComment(R"DOC( AddComment(R"DOC(
Mul operator is used to perform matrix multiplication for input X and Y. Mul Operator.
This operator is used to perform matrix multiplication for input X and Y.
The equation is: The equation is:
Out = X * Y $$Out = X * Y$$
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -66,7 +66,8 @@ class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -66,7 +66,8 @@ class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "The candidate tensors of multiplex operator.") AddInput("X", "The candidate tensors of multiplex operator.")
.AsDuplicable(); .AsDuplicable();
AddOutput("Out", "The output tensor of multiplex operator."); AddOutput("Out", "The output tensor of multiplex operator.");
AddComment(R"DOC(Multiplex operator AddComment(R"DOC(
Multiplex Operator.
Multiplex multiple tensors according to the index provided by the index tensor. Multiplex multiple tensors according to the index provided by the index tensor.
...@@ -77,10 +78,11 @@ the (Ids[i])-th tensor. ...@@ -77,10 +78,11 @@ the (Ids[i])-th tensor.
For i-th row of the output tensor: For i-th row of the output tensor:
y[i] = x_{k}[i] $$y[i] = x_{k}[i]$$
where y is the output tensor. `x_{k}` is the k-th input tensor where `y` is the output tensor, `x_{k}` is the k-th input tensor,
and `k = Ids[i]`. and `k = Ids[i]`.
)DOC"); )DOC");
} }
}; };
......
...@@ -44,17 +44,21 @@ public: ...@@ -44,17 +44,21 @@ public:
AddOutput("Out", "(Tensor) Accumulated output tensor"); AddOutput("Out", "(Tensor) Accumulated output tensor");
AddAttr<float>("gamma", "(float, default 1.0) Accumulation multiplier").SetDefault(1.0f); AddAttr<float>("gamma", "(float, default 1.0) Accumulation multiplier").SetDefault(1.0f);
AddComment(R"DOC( AddComment(R"DOC(
Accumulate operator accumulates the input tensor to the output tensor. If the Accumulate Operator.
This operator accumulates the input tensor to the output tensor. If the
output tensor already has the right size, we add to it; otherwise, we first output tensor already has the right size, we add to it; otherwise, we first
initialize the output tensor to all zeros, and then do accumulation. Any initialize the output tensor to all zeros, and then do accumulation. Any
further calls to the operator, given that no one else fiddles with the output further calls to the operator, given that no one else fiddles with the output
in the interim, will do simple accumulations. in the interim, will do simple accumulations.
Accumulation is done as shown:
Accumulation is done as follows:
Out = 1*X + gamma*Out Out = 1*X + gamma*Out
where X is the input tensor, Out is the output tensor and gamma is the multiplier where X is the input tensor, Out is the output tensor and gamma is the multiplier
argument. argument.
)DOC"); )DOC");
} }
}; };
......
...@@ -48,12 +48,17 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -48,12 +48,17 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Communicator", AddOutput("Communicator",
"Create Communicator for communicating between gpus"); "Create Communicator for communicating between gpus");
AddAttr<std::vector<int>>("gpus", "gpu id lists"); AddAttr<std::vector<int>>("gpus", "(vector<int>) GPU id lists");
AddAttr<int>("data_type", "output data type") AddAttr<int>("data_type",
"(int, default 5 (FP32)) "
"Output data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::DataType::FP32);
AddComment(R"DOC( AddComment(R"DOC(
create communicator. NCCLInit Operator.
)DOC");
Create communicator.
)DOC");
} }
}; };
...@@ -143,11 +148,15 @@ class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -143,11 +148,15 @@ class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
AddOutput("Out", "The output of AllReduce op"); AddOutput("Out", "The output of AllReduce op");
AddAttr<std::string>("reduction", AddAttr<std::string>("reduction",
"(string, default 'ncclSum') "
"{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.")
.SetDefault("ncclSum"); .SetDefault("ncclSum");
AddComment(R"DOC( AddComment(R"DOC(
AllReduce the input tensors. NCCLAllReduce Operator.
)DOC");
AllReduce the input tensors.
)DOC");
} }
}; };
...@@ -161,14 +170,20 @@ class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -161,14 +170,20 @@ class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
AddOutput("Out", "The output of Reduce op"); AddOutput("Out", "The output of Reduce op");
AddAttr<std::string>("reduction", AddAttr<std::string>("reduction",
"(string, default 'ncclSum') "
"{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.")
.SetDefault("ncclSum"); .SetDefault("ncclSum");
AddAttr<int>("root", AddAttr<int>("root",
"root gpu of the parameter. if not " "(int, default kInvalidGPUId) "
"set(platform::kInvalidGPUId). hashed by name.") "Root gpu of the parameter. If not, "
"set(platform::kInvalidGPUId). Hashed by name.")
.SetDefault(platform::kInvalidGPUId); .SetDefault(platform::kInvalidGPUId);
AddComment(R"DOC( AddComment(R"DOC(
Reduce the tensors)DOC"); NCCLReduce Operator.
Reduce the tensors.
)DOC");
} }
}; };
...@@ -182,12 +197,16 @@ class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -182,12 +197,16 @@ class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
AddOutput("Out", "The output of Bcast"); AddOutput("Out", "The output of Bcast");
AddAttr<int>("root", AddAttr<int>("root",
"root gpu of the parameter. if not " "(int, default kInvalidGPUId) "
"set(platform::kInvalidGPUId). hashed by name.") "Root gpu of the parameter. If not, "
"set(platform::kInvalidGPUId). Hashed by name.")
.SetDefault(platform::kInvalidGPUId); .SetDefault(platform::kInvalidGPUId);
AddComment(R"DOC( AddComment(R"DOC(
Bcast the tensors. NCCLBcast Operator.
)DOC");
Bcast the tensors.
)DOC");
} }
}; };
......
...@@ -54,41 +54,44 @@ class PadOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -54,41 +54,44 @@ class PadOpMaker : public framework::OpProtoAndCheckerMaker {
"The input of pad op. " "The input of pad op. "
"The input should be a k-D tensor(k > 0 and k < 7)"); "The input should be a k-D tensor(k > 0 and k < 7)");
AddOutput("Out", AddOutput("Out",
"The output of pad op." "The output of pad op. "
"A tensor with the same shape as X."); "A tensor with the same shape as X.");
AddAttr<std::vector<int>>(
"paddings",
"(vector<int>) "
"A list<int> to describe the padding rules for each dimension. "
"For 2-D image tensor, paddings=[0, 1, 2, 3] means "
"padding 0 row to top, 1 row to bottom, 2 columns to left "
"and 3 columns to right. Size of paddings should be equal to "
"2 * dimension size of the input tensor.");
AddAttr<float>("pad_value",
"(float, default 0.0) "
"The value to fill the padded areas.")
.SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
Pad input into output, as specified by paddings and pad_value. The input should be a k-D tensor(k > 0 and k < 7). As an example: Pad Operator.
Pad input into output, as specified by paddings and pad_value.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
Given: Given:
X = [[1, 2], X = [[1, 2],
[3, 4]] [3, 4]],
and
paddings = [0, 1, 1, 2] paddings = [0, 1, 1, 2],
and and
pad_value = 0 pad_value = 0,
then we get we have:
Out = [[0, 1, 2, 0, 0] Out = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0] [0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]] [0, 0, 0, 0, 0]]
)DOC"); )DOC");
AddAttr<std::vector<int>>(
"paddings",
"A list<int> to describes padding rules for each dimension."
" For 2-D image tensor, paddings=[0, 1, 2, 3] means"
" padding 0 row to top, 1 row to bottom, 2 columns to left"
" and 3 columns to right.Size of paddings should be equal to"
" 2 * dimension size of input tensor.");
AddAttr<float>("pad_value",
"(float) default to 0; "
"The value to fill padded areas.")
.SetDefault(0.0f);
} }
}; };
......
...@@ -73,125 +73,138 @@ Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto, ...@@ -73,125 +73,138 @@ Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto,
AddInput( AddInput(
"X", "X",
"(Tensor) The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H and W is the height and width of feature."); "number of channels, H is the height of the feature, "
"and W is the width of the feature.");
AddOutput("Out", AddOutput("Out",
"(Tensor) The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator. "
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW, "
"Where N is batch size, C is " "where N is batch size, C is the number of channels, "
"the number of channels, H and W is the height and " "H is the height of the feature, "
"width of feature."); "and W is the width of the feature.");
AddAttr<std::string>("poolingType", AddAttr<std::string>("poolingType",
"(string), pooling type, can be \"max\" for max-pooling " "(string), pooling type, can be \"max\" for max-pooling "
"and \"avg\" for average-pooling.") "and \"avg\" for average-pooling.")
.InEnum({"max", "avg"}); .InEnum({"max", "avg"});
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>("ksize",
"(vector ), the pooling window size(height, width) " "(vector<int>) The pooling window "
"of pooling operator." "size(height, width) of the pooling operator. "
"If globalPooling = true, ksize and paddings will " "If globalPooling = true, ksize and paddings will "
"be ignored."); // TODO(Chengduo): Add checker. "be ignored."); // TODO(Chengduo): Add checker.
// (Currently, // (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>("globalPooling", AddAttr<bool>("globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings will be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>("strides",
"strides", "(vector<int>, default {1, 1}), strides(height, "
"(vector, default:{1, 1}), strides(height, width) of pooling operator.") "width) of pooling operator.")
.SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0,0}), paddings(height, width) of pooling operator." "(vector<int>, defalut {0,0}), paddings(height, width) of pooling "
"operator."
"If globalPooling = true, paddings and ksize will be ignored.") "If globalPooling = true, paddings and ksize will be ignored.")
.SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
Pool2d Operator.
The pooling2d operation calculates the output based on The pooling2d operation calculates the output based on
the input, poolingType and ksize, strides, paddings parameters. the input, poolingType and ksize, strides, paddings parameters.
Input(X) and output(Out) are in NCHW format. Where N is batch size, C is the Input(X) and output(Out) are in NCHW format, where N is batch size, C is the
number of channels, H and W is the height and width of feature. number of channels, H is the height of the feature, and W is the width of the feature.
Parameters(ksize, strides, paddings) are two elements. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively. These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different. The input(X) size and output(Out) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, H_in, W_in) X shape: $(N, C, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, H_out, W_out) Out shape: $(N, C, H_{out}, W_{out})$
where where
H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1
$$
)DOC"); )DOC");
} }
Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto, Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("X",
"X",
"(Tensor) The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCDHW. Where N is batch size, C is " "The format of input tensor is NCDHW, where N is batch size, C is "
"the number of channels, D, H and W is the depth, height and width of " "the number of channels, and D, H and W is the depth, height and "
"feature."); "width of "
"the feature, respectively.");
AddOutput("Out", AddOutput("Out",
"(Tensor) The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator."
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW, "
"Where N is batch size, C is " "where N is batch size, C is "
"the number of channels, D, H and W is the depth, height and " "the number of channels, and D, H and W is the depth, height and "
"width of feature."); "width of the feature, respectively.");
AddAttr<std::string>("poolingType", AddAttr<std::string>("poolingType",
"(string), pooling type, can be \"max\" for max-pooling " "(string) Pooling type, can be \"max\" for max-pooling "
"and \"avg\" for average-pooling.") "and \"avg\" for average-pooling.")
.InEnum({"max", "avg"}); .InEnum({"max", "avg"});
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>(
"(vector ), the pooling window size(depth, height, " "ksize",
"width) of pooling " "(vector<int>) The pooling window size(depth, height, "
"operator." "width) of pooling operator. "
"If globalPooling = true, ksize and paddings wille " "If globalPooling = true, ksize and paddings will "
"be ignored."); // TODO(Chengduo): Add checker. "be ignored."); // TODO(Chengduo): Add checker.
// (Currently, // (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>("globalPooling", AddAttr<bool>("globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings wille be ignored.") "If globalPooling = true, ksize and paddings wille be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>(
"(vector, default:{1,1,1}), strides(depth, height, " "strides",
"width) of pooling operator.") "(vector<int>, default {1,1,1}) Strides(depth, height, "
"width) of the pooling operator.")
.SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0,0,0}), paddings(depth, height, " "(vector<int>, defalut {0,0,0}), paddings(depth, height, "
"width) of pooling operator." "width) of pooling operator. "
"If globalPooling = true, ksize and paddings wille be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
Pool3d Operator.
The pooling3d operation calculates the output based on The pooling3d operation calculates the output based on
the input, poolingType and ksize, strides, paddings parameters. the input, poolingType, ksize, strides, and paddings parameters.
Input(X) and output(Out) are in NCDHW format. Where N is batch Input(X) and output(Out) are in NCDHW format, where N is batch
size, C is the number of channels, D, H and W is the depth, height and size, C is the number of channels, and D, H and W are the depth, height and
width of feature. Parameters(ksize, strides, paddings) are three elements. width of the feature, respectively. Parameters(ksize, strides, paddings)
These three elements represent depth, height and width, respectively. are three elements. These three elements represent depth, height and
The input(X) size and output(Out) size may be different. width, respectively. The input(X) size and output(Out) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, D_in, H_in, W_in) X shape: $(N, C, D_{in}, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, D_out, H_out, W_out) Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
where where
D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\
W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1
$$
)DOC"); )DOC");
} }
} // namespace operators } // namespace operators
......
...@@ -89,64 +89,73 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -89,64 +89,73 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
"(Tensor), the input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H and W is the height and width of image."); "number of channels, H is the height of the image, "
"and W is the width of the image.");
AddOutput("Out", AddOutput("Out",
"(Tensor), the output tensor of pooling operator." "(Tensor) The output tensor of pooling operator. "
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW, "
"Where N is batch size, C is " "where N is batch size, C is "
"the number of channels, H and W is the height and " "the number of channels, H is the height of the image "
"width of image."); "and W is the width of the image.");
AddOutput("Mask", AddOutput("Mask",
"(Tensor), the Mask tensor of pooling operator." "(Tensor) The Mask tensor of pooling operator."
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW, "
"Where N is batch size, C is the number of channels, H and W " "where N is batch size, C is the number of channels, "
"is the height and width of image." "H is the height of the image, "
"The value in it is the index in current feature map"); "and W is the width of the image. "
"It represents the index in the current feature map.");
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>("ksize",
"(vector ), the pooling window size(height, " "(vector<int>) The pooling window size(height, "
"width) of pooling operator." "width) of pooling operator. "
"If globalPooling = true, ksize and paddings " "If globalPooling = true, ksize and paddings "
"will be ignored."); // TODO(Chengduo): Add "will be ignored."); // TODO(Chengduo): Add
// checker. (Currently, // checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>( AddAttr<bool>(
"globalPooling", "globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings will be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>("strides",
"strides", "(vector<int>, default {1, 1}), strides(height, "
"(vector, default:{1, 1}), strides(height, width) of pooling operator.") "width) of pooling operator.")
.SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0, 0}), paddings(height, width) of pooling operator." "(vector<int>, defalut {0, 0}), paddings(height, width) of pooling "
"operator. "
"If globalPooling = true, paddings and will be ignored.") "If globalPooling = true, paddings and will be ignored.")
.SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
MaxPool2d Operator.
The maxPooling2d with index operation calculates the output and the mask The maxPooling2d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters. Input(X) and based on the input, ksize, strides, and paddings parameters. Input(X) and
output(Out, Mask) are in NCHW format. Where N is batch size, C is the output(Out, Mask) are in NCHW format, where N is batch size, C is the
number of channels, H and W is the height and width of feature. number of channels, H is the height of the feature,
and W is the width of the feature.
Parameters(ksize, strides, paddings) are two elements. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively. These two elements represent height and width, respectively.
The input(X) size and output(Out, Mask) size may be different. The input(X) size and output(Out, Mask) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, H_in, W_in) X shape: $(N, C, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, H_out, W_out) Out shape: $(N, C, H_{out}, W_{out})$
Mask shape: (N, C, H_out, W_out) Mask shape: $(N, C, H_{out}, W_{out})$
where where
H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1
$$
)DOC"); )DOC");
} }
}; };
...@@ -156,70 +165,76 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -156,70 +165,76 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
MaxPool3dWithIndexOpMaker(framework::OpProto *proto, MaxPool3dWithIndexOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("X",
"X", "(Tensor) The input tensor of pooling operator. "
"(Tensor), the input tensor of pooling operator. " "The format of input tensor is NCDHW, where N is batch size, C is "
"The format of input tensor is NCDHW. Where N is batch size, C is " "the number of channels, and D, H and W are the depth, height and "
"the number of channels, D, H and W is the depth, height and width of " "width of "
"image."); "the image, respectively");
AddOutput("Out", AddOutput("Out",
"(Tensor), the output tensor of pooling operator." "(Tensor) The output tensor of pooling operator. "
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW, "
"Where N is batch size, C is " "where N is the batch size, C is the number of channels, "
"the number of channels, D, H and W is the depth, height and " "and D, H and W are the depth, height and "
"width of image."); "width of the image, respectively.");
AddOutput("Mask", AddOutput("Mask",
"(Tensor), the Mask tensor of pooling operator." "(Tensor) The Mask tensor of pooling operator. "
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW, "
"Where N is batch size, C is the number of channels, D, H and W " "where N is the batch size, C is the number of channels, and "
"is the depth, height and width of image." "D, H and W are the depth, height and width "
"The value in it is the index in current feature map"); "of the image, respectively. "
"It represents the index in the current feature map.");
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>("ksize",
"(vector), the pooling window size(depth, " "(vector<int>) The pooling window size(depth, "
"height, width) of pooling " "height, width) of pooling operator. "
"operator."
"If globalPooling = true, ksize and paddings " "If globalPooling = true, ksize and paddings "
"will be ignored."); // TODO(Chengduo): Add "will be ignored."); // TODO(Chengduo): Add
// checker. (Currently, // checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>( AddAttr<bool>(
"globalPooling", "globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings will be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"(vector, default:{1,1,1}), strides(depth, " "(vector<int>, default {1,1,1}), strides(depth, "
"height, width) of pooling operator.") "height, width) of pooling operator.")
.SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0,0,0}), paddings(depth, " "(vector, defalut {0,0,0}), paddings(depth, "
"height, width) of pooling operator." "height, width) of pooling operator. "
"If globalPooling = true, paddings and ksize will be ignored.") "If globalPooling = true, paddings and ksize will be ignored.")
.SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
MaxPool3d Operator.
The maxpooling3d with index operation calculates the output and the mask The maxpooling3d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters. based on the input and ksize, strides, paddings parameters.
Input(X) and output(Out, Mask) are in NCDHW format. Where N is batch Input(X) and output(Out, Mask) are in NCDHW format, where N is batch
size, C is the number of channels, D, H and W is the depth, height and size, C is the number of channels, and D, H and W are the depth, height and
width of feature. Parameters(ksize, strides, paddings) are three elements. width of the feature, respectively.
Parameters(ksize, strides, paddings) are three elements.
These three elements represent depth, height and width, respectively. These three elements represent depth, height and width, respectively.
The input(X) size and output(Out, Mask) size may be different. The input(X) size and output(Out, Mask) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, D_in, H_in, W_in) X shape: $(N, C, D_{in}, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, D_out, H_out, W_out) Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
Mask shape: (N, C, D_out, H_out, W_out) Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$
where where
D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\
W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1
$$
)DOC"); )DOC");
} }
}; };
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/positive_negative_pair_op.h"
namespace paddle {
namespace operators {
class PositiveNegativePairOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(
ctx->HasInput("Score"),
"Input(Score) of PositiveNegativePairOp should not be null.");
PADDLE_ENFORCE(
ctx->HasInput("Label"),
"Input(Label) of PositiveNegativePairOp should not be null.");
PADDLE_ENFORCE(
ctx->HasInput("QueryID"),
"Input(QueryID) of PositiveNegativePairOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("PositivePair"),
"Output(PositivePair) of PositiveNegativePairOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("NegativePair"),
"Output(NegativePair) of PositiveNegativePairOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("NeutralPair"),
"Output(NeutralPair) of PositiveNegativePairOp should not be null.");
auto scalar_dim = framework::make_ddim({1});
if (ctx->HasInput("AccumulatePositivePair") ||
ctx->HasInput("AccumulateNegativePair") ||
ctx->HasInput("AccumulateNeutralPair")) {
PADDLE_ENFORCE(ctx->HasInput("AccumulatePositivePair") &&
ctx->HasInput("AccumulateNegativePair") &&
ctx->HasInput("AccumulateNeutralPair"),
"All optional inputs(AccumulatePositivePair, "
"AccumulateNegativePair, AccumulateNeutralPair) of "
"PositiveNegativePairOp are required if one of them is "
"specified.");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("AccumulatePositivePair"), scalar_dim,
"Shape of AccumulatePositivePair should be {1}.");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("AccumulateNegativePair"), scalar_dim,
"Shape of AccumulateNegativePair should be {1}.");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("AccumulateNeutralPair"), scalar_dim,
"Shape of AccumulateNeutralPair should be {1}.");
}
auto score_dim = ctx->GetInputDim("Score");
auto label_dim = ctx->GetInputDim("Label");
auto query_dim = ctx->GetInputDim("QueryID");
PADDLE_ENFORCE_EQ(score_dim.size(), 2, "Score should be a 2-D tensor.");
PADDLE_ENFORCE_EQ(label_dim.size(), 2, "Label should be a 2-D tensor.");
PADDLE_ENFORCE_EQ(
label_dim[0], score_dim[0],
"Tensor Score and Label should have the same height (batch size).");
PADDLE_ENFORCE_EQ(label_dim[1], 1,
"The width of Label should be 1, i.e. each item should "
"have a scalar label.");
PADDLE_ENFORCE(query_dim == label_dim,
"QueryID should have the same shape as Label.");
if (ctx->HasInput("Weight")) {
PADDLE_ENFORCE(ctx->GetInputDim("Weight") == label_dim,
"Weight should have the same shape as Label.");
}
int column = ctx->Attrs().Get<int>("column");
auto depth = score_dim[1];
PADDLE_ENFORCE(column < depth && column >= -depth,
"Attribute column should be in the range of [-%l, %l)",
depth, depth);
ctx->SetOutputDim("PositivePair", scalar_dim);
ctx->SetOutputDim("NegativePair", scalar_dim);
ctx->SetOutputDim("NeutralPair", scalar_dim);
}
protected:
framework::DataType IndicateDataType(
const framework::ExecutionContext &ctx) const override {
return framework::ToDataType(ctx.Input<Tensor>("Score")->type());
}
};
class PositiveNegativePairOpMaker : public framework::OpProtoAndCheckerMaker {
public:
PositiveNegativePairOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Score",
"(Tensor, float) Model Score on an item (with "
"respect to QueryID). It's a 2-D tensor with shape [batch_size, "
"depth], where the column specified by the attribute \"column\" "
"is used as item score.");
AddInput("Label",
"(Tensor, float) Label of an item (with repsect to "
"QueryId). It's a 2-D tensor with shape [batch_size, 1].");
AddInput("QueryID",
"(Tensor, int64) Query ID that indicates the context. Its shape "
"should be the same as Label.");
AddInput(
"AccumulatePositivePair",
"(float) Optional. The accumulated number of positive pairs over a "
"stream of data. If provided, the output PositivePair will be "
"initialized with this number rather than 0. it won't be modified "
"in place.")
.AsDispensable();
AddInput(
"AccumulateNegativePair",
"(float) Optional. The accumulated number of negative pairs over a "
"stream of data. If provided, the output NegativePair will be "
"initialized with this number rather than 0. it won't be modified "
"in place.")
.AsDispensable();
AddInput("AccumulateNeutralPair",
"(float) Optional. The accumulated number of neutral pairs over a "
"stream of data. If provided, the output NeutralPair will be "
"initialized with this number rather than 0. it won't be modified "
"in place.")
.AsDispensable();
AddInput("Weight",
"(float) Optional. Weight of current item. If specified, its "
"shape should be the same as Label, and the meaning of the output "
"changes from numbers of pairs to the total sum of pairs' "
"weights. Weight of a pair of items is the average of their "
"weights.")
.AsDispensable();
AddOutput("PositivePair",
"(float) Number of positive pairs, i.e. the pairs of "
"items that are ranked correctly.");
AddOutput("NegativePair",
"(float) Number of negative pairs, i.e. the pairs of "
"items that are ranked incorrectly.");
AddOutput("NeutralPair",
"(float) Number of neutral pairs, i.e. the pairs of items "
"that have the same score.")
.AsDispensable();
AddAttr<int>(
"column",
"(int, default -1) The column position of Score used to rank items in "
"descending order. It must be in the range of [-rank(Score), "
"rank(Score)). "
"If `dim < 0`, the dim to reduce is `rank + dim`. "
"Noting that reducing on the first dim will make the LoD info lost.")
.SetDefault(0);
AddComment(R"DOC(
PositiveNegativePairOp can be used to evaluate Learning To Rank(LTR)
model performance.
Within some context, e.g. the "query", a LTR model generates scores
for a list of items, which gives a partial order of the items.
PositiveNegativePairOp takes a list of reference rank order
(Input("Label")) and the model generated scores (Input(Score)) as
inputs and counts the pairs that ranked correctly and incorrectly.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair,
ops::PositiveNegativePairOp,
ops::PositiveNegativePairOpMaker);
REGISTER_OP_CPU_KERNEL(
positive_negative_pair,
ops::PositiveNegativePairKernel<paddle::platform::CPUPlace, float>,
ops::PositiveNegativePairKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <unordered_map>
#include <vector>
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/utils/Logging.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
template <typename Place, typename T>
class PositiveNegativePairKernel : public framework::OpKernel<T> {
public:
struct PredictionResult {
PredictionResult(T score, T label, T weight)
: score(score), label(label), weight(weight) {}
T score;
T label;
T weight;
};
void Compute(const framework::ExecutionContext& context) const override {
auto score_t = context.Input<Tensor>("Score");
auto label_t = context.Input<Tensor>("Label");
auto query_t = context.Input<Tensor>("QueryID");
auto acc_positive_t = context.Input<Tensor>("AccumulatePositivePair");
auto acc_negative_t = context.Input<Tensor>("AccumulateNegativePair");
auto acc_neutral_t = context.Input<Tensor>("AccumulateNeutralPair");
auto positive_t = context.Output<Tensor>("PositivePair");
auto negative_t = context.Output<Tensor>("NegativePair");
auto neutral_t = context.Output<Tensor>("NeutralPair");
auto weight_t = context.Input<Tensor>("Weight");
auto score = score_t->data<T>();
auto label = label_t->data<T>();
auto query = query_t->data<int64_t>();
const T* weight = nullptr;
if (weight_t != nullptr) {
weight = weight_t->data<T>();
}
T* positive = positive_t->mutable_data<T>(context.GetPlace());
T* negative = negative_t->mutable_data<T>(context.GetPlace());
T* neutral = neutral_t->mutable_data<T>(context.GetPlace());
auto score_dim = score_t->dims();
auto batch_size = score_dim[0];
auto width = score_dim[1];
auto column = context.Attr<int32_t>("column");
if (column < 0) {
column += width;
}
// construct document instances for each query: Query => List[<score#0,
// label#0, weight#0>, ...]
std::unordered_map<int64_t, std::vector<PredictionResult>> predictions;
for (auto i = 0; i < batch_size; ++i) {
if (predictions.find(query[i]) == predictions.end()) {
predictions.emplace(
std::make_pair(query[i], std::vector<PredictionResult>()));
}
predictions[query[i]].emplace_back(score[i * width + column], label[i],
weight_t != nullptr ? weight[i] : 1.0);
}
// for each query, accumulate pair counts
T pos = 0, neg = 0, neu = 0;
if (acc_positive_t != nullptr && acc_negative_t != nullptr &&
acc_neutral_t != nullptr) {
pos = acc_positive_t->data<T>()[0];
neg = acc_negative_t->data<T>()[0];
neu = acc_neutral_t->data<T>()[0];
}
auto evaluate_one_list = [&pos, &neg,
&neu](std::vector<PredictionResult> vec) {
for (auto ite1 = vec.begin(); ite1 != vec.end(); ++ite1) {
for (auto ite2 = ite1 + 1; ite2 != vec.end(); ++ite2) {
if (ite1->label == ite2->label) { // labels are equal, ignore.
continue;
}
T w = (ite1->weight + ite2->weight) * 0.5;
if (ite1->score == ite2->score) {
neu += w;
}
(ite1->score - ite2->score) * (ite1->label - ite2->label) > 0.0
? pos += w
: neg += w;
}
}
};
for (auto prediction : predictions) {
evaluate_one_list(prediction.second);
}
*positive = pos;
*negative = neg;
*neutral = neu;
}
};
} // namespace operators
} // namespace paddle
...@@ -92,76 +92,78 @@ class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -92,76 +92,78 @@ class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("MaxProbs", AddInput("MaxProbs",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<float>) A 2-D tensor with shape N x 1, "
"where N is the batch size. Each row contains the max probability " "where N is the batch size. Each row contains the max probability "
"of an instance which computed by the previous top_k (k=1) " "of an instance which computed by the previous top_k (k=1) "
"operator."); "operator.");
AddInput("Indices", AddInput("Indices",
"(Tensor, default Tensor<int>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<int>) A 2-D tensor with shape N x 1, "
"where N is the batch size. Each row contains the corresponding " "where N is the batch size. Each row contains the corresponding "
"index which computed by the previous top_k (k=1) operator."); "index which computed by the previous top_k (k=1) operator.");
AddInput("Labels", AddInput("Labels",
"(Tensor, default Tensor<int>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<int>) A 2-D tensor with shape N x 1, "
"where N is the batch size. Each element is a label and the " "where N is the batch size. Each element is a label and the "
"value should be in [0, class_number - 1]."); "value should be in [0, class_number - 1].");
AddInput("Weights", AddInput("Weights",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<float>) A 2-D tensor with shape N x 1, "
"where N is the batch size. This input is optional. If provided, " "where N is the batch size. This input is optional. If provided, "
"weight of instance would be considered when computing metrics.") "weight of instance would be considered when computing metrics.")
.AsDispensable(); .AsDispensable();
AddInput("StatesInfo", AddInput("StatesInfo",
"(Tensor, default Tensor<int>), a 2-D tensor with shape D x 4, " "(Tensor, default Tensor<int>) A 2-D tensor with shape D x 4, "
"where D is the number of classes. This input is optional. If " "where D is the number of classes. This input is optional. If "
"provided, current state will be accumulated to this state and " "provided, current state will be accumulated to this state and "
"the accumulation state will be as the output state.") "the accumulation state will be the output state.")
.AsDispensable(); .AsDispensable();
AddOutput("BatchMetrics", AddOutput("BatchMetrics",
"(Tensor, default Tensor<float>), a 1-D tensor with shape {6}." "(Tensor, default Tensor<float>) A 1-D tensor with shape {6}. "
"This output tensor contains metrics for current batch data." "This output tensor contains metrics for current batch data. "
"The layout is [macro average precision, macro average recall, " "The layout is [macro average precision, macro average recall, "
"macro f1 score, micro average precision, micro average recall, " "macro f1 score, micro average precision, micro average recall, "
"micro f1 score]"); "micro f1 score].");
AddOutput("AccumMetrics", AddOutput("AccumMetrics",
"(Tensor, default Tensor<float>), a 1-D tensor with shape {6}." "(Tensor, default Tensor<float>) A 1-D tensor with shape {6}. "
"This output tensor contains metrics for accumulated data." "This output tensor contains metrics for accumulated data. "
"The layout is [macro average precision, macro average recall, " "The layout is [macro average precision, macro average recall, "
"macro f1 score, micro average precision, micro average recall, " "macro f1 score, micro average precision, micro average recall, "
"micro f1 score]"); "micro f1 score].");
AddOutput("AccumStatesInfo", AddOutput("AccumStatesInfo",
"(Tensor, default Tensor<float>), a 2-D tensor with shape D x 4, " "(Tensor, default Tensor<float>) A 2-D tensor with shape D x 4, "
"where D is equal to class number. This output tensor contains " "where D is equal to class number. This output tensor contains "
"accumulated state variables used to compute metrics. The layout " "accumulated state variables used to compute metrics. The layout "
"for each class is [true positives, false positives, " "for each class is [true positives, false positives, "
"true negatives, false negatives]."); "true negatives, false negatives].");
AddAttr<int>("class_number", "Number of classes to be evaluated."); AddAttr<int>("class_number", "(int) Number of classes to be evaluated.");
AddComment(R"DOC( AddComment(R"DOC(
When given 'Input(Indices)' and 'Input(Labels)', this operator can be used Precision Recall Operator.
When given Input(Indices) and Input(Labels), this operator can be used
to compute various metrics including: to compute various metrics including:
- macro average precision 1. macro average precision
- macro average recall 2. macro average recall
- macro f1 score 3. macro f1 score
- micro average precision 4. micro average precision
- micro average recall 5. micro average recall
- micro f1 score 6. micro f1 score
To compute the above metrics, we need to do statistics for true positives, To compute the above metrics, we need to do statistics for true positives,
false positives and false negatives. Here count of true negatives is not false positives and false negatives. Here the count of true negatives is not
necessary, but counting it may provide potential usage and the cost is necessary, but counting it may provide potential usage and the cost is
trivial, so the operator also provides count of true negatives. trivial, so the operator also provides the count of true negatives.
We define state as a 2-D tensor with shape [class_number, 4]. Each row of a We define state as a 2-D tensor with shape [class_number, 4]. Each row of a
state contains statistic variables for corresponding class. Layout of each row state contains statistic variables for corresponding class. Layout of each row
is: TP(true positives), FP(false positives), TN(true negatives), is: TP(true positives), FP(false positives), TN(true negatives),
FN(false negatives). If 'Input(Weights)' provided, TP, FP, TN, FN will be FN(false negatives). If Input(Weights) is provided, TP, FP, TN, FN will be
calculated by given weight instead of instance count. calculated by given weight instead of the instance count.
This operator also supports metrics computing for cross-batch situation. To This operator also supports metrics computing for cross-batch situation. To
achieve this, 'Input(StatesInfo)' should be provided. State of current batch achieve this, Input(StatesInfo) should be provided. State of current batch
data will be accumulated to 'Input(StatesInfo)' and 'Output(AccumStatesInfo)' data will be accumulated to Input(StatesInfo) and Output(AccumStatesInfo)
is the accumulation state. is the accumulation state.
'Output(BatchMetrics)' is metrics of current batch data while Output(BatchMetrics) is metrics of current batch data while
'Output(AccumStatesInfo)' is metrics of accumulation data. Output(AccumStatesInfo) is metrics of accumulation data.
)DOC"); )DOC");
} }
......
...@@ -41,17 +41,24 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -41,17 +41,24 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of prelu operator."); AddInput("X", "The input tensor of prelu operator.");
AddInput("Alpha", "The alpha weight of PRelu operator."); AddInput("Alpha", "The alpha weight of prelu operator.");
AddOutput("Out", "The output tensor of PRelu operator."); AddOutput("Out", "The output tensor of prelu operator.");
AddComment(R"DOC(PRelu operator AddComment(R"DOC(
PRelu Operator.
The equation is: The equation is:
f(x) = alpha * x , for x < 0 $$
f(x) = x , for x >= 0 f(x) =
\begin{cases}
\alpha * x, \quad \text{if} \ x < 0 \\
x, \qquad \text{if} \ x >= 0
\end{cases}
$$
The input `X` can carry the LoD (Level of Details) information, The input `X` can carry the LoD (Level of Details) information,
or not. And the output shares the LoD with input `X`. or not. And the output shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -83,22 +83,26 @@ class ProximalAdagradOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -83,22 +83,26 @@ class ProximalAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
"L1 regularization strength.") "L1 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddAttr<float>("l2", AddAttr<float>("l2",
"(float, default 0.0)" "(float, default 0.0) "
"L2 regularization strength.") "L2 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
Proximal Adagrad Optimizer.
Optimizer that implements the proximal adagrad algorithm. Optimizer that implements the proximal adagrad algorithm:
moment = moment + grad * grad $$
prox_param = param - learning_rate * grad * (1 / sqrt(moment)) moment = moment + grad * grad \\
param = sign(prox_param) / (1 + learning_rate * l2) * prox\_param = param - learning\_rate * grad * (1 / \sqrt{moment}) \\
max { |prox_param| - learning_rate * l1 , 0 } param = sign(prox\_param) / (1 + learning\_rate * l2) *
\max(|prox\_param| - learning\_rate * l1 , 0)
$$
The paper that proposed Proximal GD: The paper that proposed Proximal GD:
(http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf) (http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf)
Here, we use the adagrad learning rate as specified here: Here, we use the adagrad learning rate as specified here:
(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) (http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
)DOC"); )DOC");
} }
}; };
......
...@@ -67,19 +67,23 @@ class ProximalGDOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -67,19 +67,23 @@ class ProximalGDOpMaker : public framework::OpProtoAndCheckerMaker {
"L1 regularization strength.") "L1 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddAttr<float>("l2", AddAttr<float>("l2",
"(float, default 0.0)" "(float, default 0.0) "
"L2 regularization strength.") "L2 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
ProximalGD Operator.
Optimizer that implements the proximal gradient descent algorithm. Optimizer that implements the proximal gradient descent algorithm:
prox_param = param - learning_rate * grad $$
param = sign(prox_param) / (1 + learning_rate * l2) * prox\_param = param - learning\_rate * grad \\
max { |prox_param| - learning_rate * l1 , 0 } param = sign(prox\_param) / (1 + learning\_rate * l2) *
\max(|prox\_param| - learning\_rate * l1, 0)
$$
The paper that proposed Proximal Gradient Descent: The paper that proposed Proximal Gradient Descent:
(http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf) (http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf)
)DOC"); )DOC");
} }
}; };
......
...@@ -26,9 +26,9 @@ class RankLossOp : public framework::OperatorWithKernel { ...@@ -26,9 +26,9 @@ class RankLossOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
// input check // input check
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null.");
auto label_dims = ctx->GetInputDim("Label"); auto label_dims = ctx->GetInputDim("Label");
auto left_dims = ctx->GetInputDim("Left"); auto left_dims = ctx->GetInputDim("Left");
...@@ -50,32 +50,32 @@ class RankLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -50,32 +50,32 @@ class RankLossOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Label", AddInput("Label",
"The label indicating A ranked higher than B or not, row vector."); "The label indicating A ranked higher than B or not, row vector.");
AddInput("Left", "The output of RankNet for doc A, vector."); AddInput("Left", "The output of RankNet for doc A, vector.");
AddInput("Right", "The output of RankNet for doc B, vetor"); AddInput("Right", "The output of RankNet for doc B, vetor.");
AddOutput("Out", "The output loss of RankLoss operator, vector."); AddOutput("Out", "The output loss of RankLoss operator, vector.");
AddComment(R"DOC(RankLoss operator AddComment(R"DOC(
RankLoss Operator.
Rank loss operator for RankNet[1]. RankNet is a pairwise ranking model with RankLoss operator for RankNet
(http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf).
RankNet is a pairwise ranking model with
one training sample consisting of a pair of doc A and B, and the label P one training sample consisting of a pair of doc A and B, and the label P
indicating that A is ranked higher than B or not: indicating that A is ranked higher than B or not:
P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of
the input pair. the input pair.
The RankLoss operator contains three inputs: Left (o_i), Right (o_j) and Label The RankLoss operator takes three inputs: Left (o_i), Right (o_j) and Label
(P_{i,j}), which represent the output of RankNet for two docs and the label (P_{i,j}), which represent the output of RankNet for the two docs and the label,
respectively, and yields the rank loss C_{i,j} by following the expression respectively, and yields the rank loss C_{i,j} using the following equation:
\f[ \f$$
C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\ C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
o_{i,j} = o_i - o_j \\ o_{i,j} = o_i - o_j \\
\tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \} \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
\f] \f$$
The operator can take inputs of one sample or in batch. The operator can take inputs of one sample or in batch.
[1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
Rank using Gradient Descent.
http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf
)DOC"); )DOC");
} }
}; };
......
...@@ -509,14 +509,14 @@ class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -509,14 +509,14 @@ class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker {
AddInput(kInitialStates, "rnn initial states").AsDuplicable(); AddInput(kInitialStates, "rnn initial states").AsDuplicable();
AddInput(kParameters, AddInput(kParameters,
"Parameters are used by step block as its input. However, the " "Parameters are used by step block as its input. However, the "
"inputs is not a sequence tensor. Every time step, each operator " "input is not a sequence tensor. Every time step, each operator "
"in step block just use the parameter directly") "in step block just use the parameter directly.")
.AsDuplicable(); .AsDuplicable();
AddOutput(kOutputs, AddOutput(kOutputs,
"The output sequence of RNN. The sequence length must be same") "The output sequence of RNN. The sequence length must be same.")
.AsDuplicable(); .AsDuplicable();
AddOutput(kStepScopes, AddOutput(kStepScopes,
"StepScopes contains all local variables in each time step."); "StepScopes contain all local variables in each time step.");
AddAttr<std::vector<std::string>>(kExStates, AddAttr<std::vector<std::string>>(kExStates,
string::Sprintf( string::Sprintf(
R"DOC(The ex-state variable names. R"DOC(The ex-state variable names.
...@@ -556,10 +556,12 @@ if reverse is True ...@@ -556,10 +556,12 @@ if reverse is True
o o o o o o o o
)DOC").SetDefault(false); )DOC").SetDefault(false);
AddAttr<bool>(kIsTrain, "").SetDefault(true); AddAttr<bool>(kIsTrain, "").SetDefault(true);
AddComment(R"DOC(Static Length Recurrent Operator AddComment(R"DOC(
Static Length Recurrent Operator.
The static length recurrent operator can only operate on fixed size sequence
data, i.e. in each mini-batch, the sequence length of all inputs are the same.
The static length recurrent operator can only operate on fix sized sequence
data, i.e. in each mini-batch, the sequence length of all inputs are same.
)DOC"); )DOC");
} }
}; };
......
...@@ -80,24 +80,27 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -80,24 +80,27 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ReduceOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ReduceOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("X",
"X", "(Tensor) The input tensor. Tensors with rank at most 6 are "
"(Tensor) The input tensor. Tensors with rank at most 6 are supported"); "supported.");
AddOutput("Out", "(Tensor) The result tensor."); AddOutput("Out", "(Tensor) The result tensor.");
AddAttr<int>( AddAttr<int>(
"dim", "dim",
"(int, default 1) The dimension to reduce. " "(int, default 0) The dimension to reduce. "
"Must be in the range [-rank(input), rank(input)). " "Must be in the range [-rank(input), rank(input)). "
"If `dim < 0`, the dim to reduce is `rank + dim`. " "If `dim < 0`, the dim to reduce is `rank + dim`. "
"Noting that reducing on the first dim will make the LoD info lost.") "Note that reducing on the first dim will make the LoD info lost.")
.SetDefault(0); .SetDefault(0);
AddAttr<bool>("keep_dim", AddAttr<bool>("keep_dim",
"(bool, default false) " "(bool, default false) "
"If true, retain the reduced dimension with length 1.") "If true, retain the reduced dimension with length 1.")
.SetDefault(false); .SetDefault(false);
comment_ = R"DOC( comment_ = R"DOC(
{ReduceOP} operator computes the {reduce} of input tensor along the given dimension. {ReduceOp} Operator.
The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
This operator computes the {reduce} of input tensor along the given dimension.
The result tensor has 1 fewer dimension than the input unless keep_dim is true.
)DOC"; )DOC";
AddComment(comment_); AddComment(comment_);
} }
......
...@@ -71,8 +71,11 @@ class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -71,8 +71,11 @@ class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of reshape operator."); AddInput("X", "The input tensor of reshape operator.");
AddOutput("Out", "The output tensor of reshape operator."); AddOutput("Out", "The output tensor of reshape operator.");
AddAttr<std::vector<int>>("shape", "Target shape of reshape operator."); AddAttr<std::vector<int>>("shape",
AddComment(R"DOC(Reshape operator "(vector<int>) "
"Target shape of reshape operator.");
AddComment(R"DOC(
Reshape Operator.
Reshape Input(X) into the shape specified by Attr(shape). Reshape Input(X) into the shape specified by Attr(shape).
...@@ -81,7 +84,7 @@ Given a 2-D tensor X with 2 rows and 2 columns ...@@ -81,7 +84,7 @@ Given a 2-D tensor X with 2 rows and 2 columns
[[1, 2], [3, 4]] [[1, 2], [3, 4]]
with target shape = [1, 4], the reshape operator will transform and target shape = [1, 4], the reshape operator will transform
the tensor X into a 1-D tensor: the tensor X into a 1-D tensor:
[1, 2, 3, 4] [1, 2, 3, 4]
......
...@@ -68,22 +68,22 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -68,22 +68,22 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", AddInput("Param",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
"Input parameter value that has to be updated"); "Input parameter value that has to be updated.");
AddInput("MeanSquare", AddInput("MeanSquare",
"(Tensor, default Tensor<float>)" "(Tensor, default Tensor<float>)"
" The mean square value that gets updated"); " The mean square value that gets updated.");
AddInput("LearningRate", AddInput("LearningRate",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
"The learning rate should be a tensor of size 1"); "The learning rate should be a tensor of size 1.");
AddInput("Grad", AddInput("Grad",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
"Input gradient of the parameter"); "Input gradient of the parameter.");
AddInput("Moment", AddInput("Moment",
"(Tensor, default Tensor<float>) The moment that gets updated"); "(Tensor, default Tensor<float>) The moment that gets updated.");
AddOutput("ParamOut", "(Tensor) Output updated parameter value"); AddOutput("ParamOut", "(Tensor) Output updated parameter value.");
AddOutput("MomentOut", "(Tensor) Output updated moment"); AddOutput("MomentOut", "(Tensor) Output updated moment.");
AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value"); AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value.");
AddAttr<float>("epsilon", AddAttr<float>("epsilon",
"(float, default 1e-10) Constant " "(float, default 1e-10) Constant "
...@@ -93,18 +93,19 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -93,18 +93,19 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
"(float, default 0.9) " "(float, default 0.9) "
"Discounting factor for coming gradient.") "Discounting factor for coming gradient.")
.SetDefault(0.9f); .SetDefault(0.9f);
AddAttr<float>("momentum", "(float, default 0.0) Constant value") AddAttr<float>("momentum", "(float, default 0.0) Constant value.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
Rmsprop Optimizer.
RMSprop $$
MeanSquareOut = decay * MeanSquare + (1 - decay) * Grad * Grad \\
MeanSquareOut = decay * MeanSquare + (1 - decay) * Grad * Grad
MomentOut = momentum * Moment + MomentOut = momentum * Moment +
LearningRate * Grad / sqrt(MeanSquareOut + epsilon) \frac{LearningRate * Grad}{\sqrt{MeanSquareOut + epsilon}} \\
ParamOut = Param - MomentOut ParamOut = Param - MomentOut
$$
The original slides that proposed RMSprop: Slide 29 of The original slides that proposed Rmsprop: Slide 29 of
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
)DOC"); )DOC");
......
...@@ -163,14 +163,19 @@ class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -163,14 +163,19 @@ class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker {
SaveOpProtoMaker(framework::OpProto *proto, SaveOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The tensor need to be saved"); AddInput("X", "(Tensor ) Input tensor to be saved");
AddComment(R"DOC(Save operator AddComment(R"DOC(
Save operator will serialize and write a tensor variable to disk file. Save operator
This operator will serialize and write a tensor variable to file on disk.
)DOC"); )DOC");
AddAttr<bool>("overwrite", "Overwrite the output file if exist") AddAttr<bool>("overwrite",
"(boolean, default true)"
"Overwrite the output file if exist")
.SetDefault(true); .SetDefault(true);
AddAttr<std::string>("file_path", AddAttr<std::string>("file_path",
"Variable will be saved to \"file_path\".") "(string)"
"The \"file_path\" where the variable will be saved.")
.AddCustomChecker( .AddCustomChecker(
[](const std::string &path) { return !path.empty(); }); [](const std::string &path) { return !path.empty(); });
} }
......
...@@ -40,13 +40,16 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -40,13 +40,16 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of scale operator."); AddInput("X", "(Tensor) Input tensor of scale operator.");
AddOutput("Out", "The output tensor of scale operator."); AddOutput("Out", "(Tensor) Output tensor of scale operator.");
AddComment(R"DOC(Scale operator AddComment(R"DOC(
Scale operator
The equation is: Out = scale*X $$Out = scale*X$$
)DOC"); )DOC");
AddAttr<AttrType>("scale", "The scaling factor of the scale operator.") AddAttr<AttrType>("scale",
"(float, default 0)"
"The scaling factor of the scale operator.")
.SetDefault(1.0); .SetDefault(1.0);
} }
}; };
......
...@@ -53,8 +53,10 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -53,8 +53,10 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker {
"(LodTensor)The output of seq_expand op." "(LodTensor)The output of seq_expand op."
"The lod of output will be as same as input(Y)'s lod."); "The lod of output will be as same as input(Y)'s lod.");
AddComment(R"DOC( AddComment(R"DOC(
Expand input(X) according to LOD of input(Y). Seq Expand Operator.
This operator expands input(X) according to LOD of input(Y).
Following are cases to better explain how this works:
Case 1: Case 1:
Given 2-level a LoDTensor input(X) Given 2-level a LoDTensor input(X)
......
...@@ -47,19 +47,19 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -47,19 +47,19 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(A vector of LoDTensor), the input is a vector of LoDTensor, " "(vector<LoDTensor>) Input is a vector of LoDTensor, "
"each of which is a variable-length sequence or nested sequence.") "each of which is a variable-length sequence or nested sequence.")
.AsDuplicable(); .AsDuplicable();
AddOutput("Out", AddOutput("Out",
"(A LoDTensor), the variable-length output of " "(LoDTensor), Variable-length output of "
"sequence_concat Op."); "sequence_concat Op.");
AddAttr<int>("axis", AddAttr<int>("axis",
"(int, default 0)" "(int, default 0) "
"The axis which the inputs will be joined with. " "The axis along which the inputs will be joined. "
"If axis is 0, the inputs will be joined with LoD index.") "If axis is 0, the inputs will be joined with LoD index.")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("level", AddAttr<int>("level",
"(int, default 0)" "(int, default 0) "
"The level at which the inputs will be joined. " "The level at which the inputs will be joined. "
"If the level is 0, the inputs will be joined at the nested " "If the level is 0, the inputs will be joined at the nested "
"sequence level. " "sequence level. "
...@@ -68,10 +68,13 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -68,10 +68,13 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
"The level should be less than the level number of inputs.") "The level should be less than the level number of inputs.")
.SetDefault(0); .SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
The sequence_concat operator concatenates multiple LoDTensors. Sequence Concat Operator.
It only supports sequence (LoD Tensor with level number is 1)
or a nested sequence (LoD tensor with level number is 2) as its input. The sequence_concat operator concatenates multiple LoDTensors.
- Case1: It supports a sequence (LoD Tensor with level number is 1)
or a nested sequence (LoD tensor with level number is 2) as its input.
The following examples explain how the operator works:
- Case1:
If the axis is other than 0(here, axis is 1 and level is 1), If the axis is other than 0(here, axis is 1 and level is 1),
each input should have the same LoD information and the LoD each input should have the same LoD information and the LoD
information of the output keeps the same as the input. information of the output keeps the same as the input.
...@@ -80,7 +83,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -80,7 +83,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
LoD(x1) = {{0,2,4}, {0,1,2,3,4}}; Dims(x1) = (4,4,4) LoD(x1) = {{0,2,4}, {0,1,2,3,4}}; Dims(x1) = (4,4,4)
LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4) LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4)
- Case2: - Case2:
If the axis is 0(here, leve is 0), the inputs are concatenated along If the axis is 0(here, leve is 0), the inputs are concatenated along
time steps, the LoD information of the output need to re-compute. time steps, the LoD information of the output need to re-compute.
...@@ -88,14 +91,15 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -88,14 +91,15 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
LoD(x1) = {{0,3,5}, {0,1,2,3,5}}; Dims(x1) = (5,3,4) LoD(x1) = {{0,3,5}, {0,1,2,3,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,1,2,3,4,5,6,7,9}}; Dims(Out) = (9,3,4) LoD(Out) = {{0,5,9}, {0,1,2,3,4,5,6,7,9}}; Dims(Out) = (9,3,4)
- Case3: - Case3:
If the axis is 0(here, level is 1). If the axis is 0(here, level is 1).
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4) LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4) LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4) LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4)
NOTE: The levels of all the inputs should be the same. NOTE: The levels of all the inputs should be the same.
)DOC"); )DOC");
} }
}; };
......
...@@ -105,10 +105,10 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -105,10 +105,10 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
"(LoDTensor) the input(X) is a LodTensor, which support " "(LoDTensor) the input(X) is a LodTensor, which supports "
"variable-time length input sequence. The underlying tensor in " "variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T, N), where, T is the " "this LoDTensor is a matrix with shape (T, N), where T is the "
"total time steps in this mini-batch, N is the input_hidden_size."); "total time steps in this mini-batch and N is the input_hidden_size.");
AddInput("PaddingData", AddInput("PaddingData",
"(Tensor, optional) the input(PaddingData) is an optional " "(Tensor, optional) the input(PaddingData) is an optional "
"parameter, and it is learnable. " "parameter, and it is learnable. "
...@@ -157,14 +157,16 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -157,14 +157,16 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
.GreaterThan(0); .GreaterThan(0);
AddComment(R"DOC( AddComment(R"DOC(
SequenceConvOp performs convolution operation on features of Sequence Conv Operator.
contextLength time-steps of each instance.
The convolution operation calculates the output based on the input, filter SequenceConvOp performs convolution operation on features of contextLength
and strides, paddings parameters. The size of each dimension of the time-steps of each instance. The convolution operation calculates the output
parameters is checked in the infer-shape. In order to ensure the equal based on the input, filter, strides and paddings parameters.
length of sequence before and after convolution, it is necessary to fill The size of each dimension of the parameters is checked during infer-shape.
the top and bottom of each sequence according to context_length, In order to ensure the equal length of sequence before and after convolution,
context_stride and context_start. it is necessary to fill the top and bottom of each sequence based on
context_length, context_stride and context_start.
)DOC"); )DOC");
} }
}; };
......
...@@ -27,6 +27,11 @@ class SequencePoolOp : public framework::OperatorWithKernel { ...@@ -27,6 +27,11 @@ class SequencePoolOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequencePoolOp should not be null."); "Output(Out) of SequencePoolOp should not be null.");
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
if (ctx->Attrs().Get<std::string>("pooltype") == "MAX") {
PADDLE_ENFORCE(ctx->HasOutput("MaxIndex"),
"Output(MaxIndex) of SequencePoolOp should not be null.");
ctx->SetOutputDim("MaxIndex", ctx->GetInputDim("X"));
}
} }
}; };
...@@ -35,43 +40,50 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -35,43 +40,50 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
SequencePoolOpMaker(framework::OpProto* proto, SequencePoolOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor), the variable-length input of SequencePoolOp"); AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp");
AddOutput("Out", AddOutput("Out",
"(Tensor), output of SequencePoolOp, which does not contain LoD " "(Tensor) The output of SequencePoolOp does not contain LoD "
"infomation."); "infomation.");
AddOutput("MaxIndex",
"(Tensor<int>) This tensor is used for the sequence max-pooling "
"to record the max indexes.")
.AsIntermediate();
AddAttr<std::string>( AddAttr<std::string>(
"pooltype", "pooltype",
"(int, default AVERAGE) the pooling pooltype of SequencePoolOp.") "(int, default AVERAGE) the pooling pooltype of SequencePoolOp.")
.SetDefault("AVERAGE") .SetDefault("AVERAGE")
.InEnum({"AVERAGE", "SUM", "SQRT", "LAST", "FIRST", "MAX"}); .InEnum({"AVERAGE", "SUM", "SQRT", "LAST", "FIRST", "MAX"});
AddComment(R"DOC( AddComment(R"DOC(
SequencePoolOp pools features of all time-steps of each instance. Sequence Pool Operator.
It supports six pooling pooltype: The SequencePoolOp pools features of all time-steps of each instance.
- AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]} It supports six pooling types:
- SUM: Out[i] = sum_{for each instance in i-th sequence}{X[i]} 1. AVERAGE: Out[i] = $$avg(X_i)$$
- SQRT: Out[i] = sum_{for each instance in i-th sequence}{X[i]} 2. SUM: Out[i] = $$\sum_jX_{ij}$$
/ sqrt(i-th sequence length) 3. SQRT: Out[i] = $$\frac{\sum_jX_{ij}}{\sqrt{len(X_i)}}$$
- LAST: Out[i] = last instance in i-th sequence X[i] 4. LAST: Out[i] = last instance in i-th sequence X[i]
- FIRST: Out[i] = first instance in i-th sequence X[i] 5. FIRST: Out[i] = first instance in i-th sequence X[i]
- MAX: Out[i] = max_{for each instance in i-th sequence}{X[i]} 6. MAX: Out[i] = $$max(X_i)$$
For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps: The following example explains how this works:
For a mini-batch of 3 variable-length sentences,
Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2. containing 2, 3, and 2 time-steps:
Besides, for the sake of simplicity, we assume M=1 and N=1,
and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
Besides, for the sake of simplicity, we assume M=1 and N=1,
Thus, Out is a [3,1,1] Tensor without LoD infomation. and the value of X = [[1, 3], [2, 4, 6], [5, 1]].
And for different pooltype, the value of Out is as follows:
Thus, Out is a [3,1,1] Tensor without LoD infomation.
- AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 And for different pooltype, the value of Out is as follows:
- SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1
- SQRT: [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2), - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
- SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1
- SQRT: [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2),
6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2) 6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2)
- MAX: [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1) - MAX: [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1)
- LAST: [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1) - LAST: [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)
- FIRST: [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1) - FIRST: [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)
)DOC"); )DOC");
} }
}; };
...@@ -93,6 +105,12 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { ...@@ -93,6 +105,12 @@ class SequencePoolGradOp : public framework::OperatorWithKernel {
} }
ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
} }
protected:
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
return framework::ToDataType(ctx.Input<Tensor>("X")->type());
}
}; };
} // namespace operators } // namespace operators
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence_pooling.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -34,7 +35,7 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -34,7 +35,7 @@ class SequencePoolKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out"); auto* out = context.Output<Tensor>("Out");
std::string pooltype = context.Attr<std::string>("pooltype"); std::string pooltype = context.Attr<std::string>("pooltype");
auto dims = in->dims(); auto dims = in->dims();
...@@ -53,6 +54,16 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -53,6 +54,16 @@ class SequencePoolKernel : public framework::OpKernel<T> {
auto lod_level_0 = lod[0]; auto lod_level_0 = lod[0];
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
if (pooltype == "MAX") {
math::MaxSeqPoolFunctor<Place, T> max_pool;
auto* index = context.Output<Tensor>("MaxIndex");
index->Resize({dims});
index->mutable_data<int>(context.GetPlace());
max_pool(context.device_context(), *in, out, index);
return;
}
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
Tensor in_t = in->Slice(static_cast<int>(lod_level_0[i]), Tensor in_t = in->Slice(static_cast<int>(lod_level_0[i]),
...@@ -69,8 +80,6 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -69,8 +80,6 @@ class SequencePoolKernel : public framework::OpKernel<T> {
} else if (pooltype == "SQRT") { } else if (pooltype == "SQRT") {
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) / out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
std::sqrt(static_cast<T>(h)); std::sqrt(static_cast<T>(h));
} else if (pooltype == "MAX") {
out_e.device(place) = in_e.maximum(Eigen::array<int, 1>({{0}}));
} else if (pooltype == "LAST") { } else if (pooltype == "LAST") {
out_e.device(place) = in_e.chip(h - 1, 0); out_e.device(place) = in_e.chip(h - 1, 0);
} else if (pooltype == "FIRST") { } else if (pooltype == "FIRST") {
...@@ -87,8 +96,8 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -87,8 +96,8 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* out_g = context.Input<Tensor>(framework::GradVarName("Out"));
auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X")); auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
std::string pooltype = context.Attr<std::string>("pooltype"); std::string pooltype = context.Attr<std::string>("pooltype");
auto dims = in->dims(); auto dims = in->dims();
...@@ -96,6 +105,14 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -96,6 +105,14 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
int64_t w = in->numel() / dims[0]; int64_t w = in->numel() / dims[0];
in_g->mutable_data<T>(context.GetPlace()); in_g->mutable_data<T>(context.GetPlace());
if (pooltype == "MAX") {
math::MaxSeqPoolGradFunctor<Place, T> max_pool_grad;
auto* index = context.Input<Tensor>("MaxIndex");
max_pool_grad(context.device_context(), *out_g, *index, in_g);
return;
}
if (pooltype == "LAST" || pooltype == "FIRST") { if (pooltype == "LAST" || pooltype == "FIRST") {
// set X@Grad be zero at first when pooltype is LAST/FIRST // set X@Grad be zero at first when pooltype is LAST/FIRST
math::SetConstant<Place, T> functor; math::SetConstant<Place, T> functor;
...@@ -118,20 +135,6 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -118,20 +135,6 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
} else if (pooltype == "SQRT") { } else if (pooltype == "SQRT") {
in_g_e.device(place) = in_g_e.device(place) =
(out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast); (out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
} else if (pooltype == "MAX") {
auto in_t =
in->Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1]));
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
in_t_map(in_t.data<T>(), h, w);
int row_id;
Eigen::array<int, 2> extents{{1, 1}};
for (int col_id = 0; col_id < w; col_id++) {
in_t_map.col(col_id).maxCoeff(&row_id);
Eigen::array<int, 2> in_offsets{{row_id, col_id}};
Eigen::array<int, 2> out_offsets{{0, col_id}};
in_g_e.slice(in_offsets, extents).device(place) =
out_g_e.slice(out_offsets, extents);
}
} else if (pooltype == "LAST") { } else if (pooltype == "LAST") {
in_g_e.chip(h - 1, 0).device(place) = out_g_e; in_g_e.chip(h - 1, 0).device(place) = out_g_e;
} else if (pooltype == "FIRST") { } else if (pooltype == "FIRST") {
......
...@@ -43,20 +43,24 @@ class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -43,20 +43,24 @@ class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor) 1-D or 2-D output LoDTensor with the 2-nd dimension " "(LoDTensor) 1-D or 2-D output LoDTensor with the 2-nd dimension "
"of length 1."); "of length 1.");
AddComment(R"DOC( AddComment(R"DOC(
SequenceSoftmaxOp computes softmax activation among all time-steps for each Sequence Softmax Operator.
SequenceSoftmaxOp computes the softmax activation among all time-steps for each
sequence. The dimension of each time-step should be 1. Thus, the shape of sequence. The dimension of each time-step should be 1. Thus, the shape of
input Tensor can be either [N, 1] or [N], where N is the sum of all sequences' input Tensor can be either [N, 1] or [N], where N is the sum of the length
lengths. of all sequences.
Equation: The algorithm works as follows:
for i-th sequence in a mini-batch: for i-th sequence in a mini-batch:
Out(X[lod[i]:lod[i+1]], :) = $$Out(X[lod[i]:lod[i+1]], :) =
exp(X[lod[i]:lod[i+1], :]) / sum(exp(X[lod[i]:lod[i+1], :])) \frac{\exp(X[lod[i]:lod[i+1], :])}
{\sum(\exp(X[lod[i]:lod[i+1], :]))}$$
For example, for a mini-batch of 3 sequences with variable-length, For example, for a mini-batch of 3 sequences with variable-length,
each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7], each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
then softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :] then softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :]
and N turns out to be 7. and N turns out to be 7.
)DOC"); )DOC");
} }
}; };
......
...@@ -45,15 +45,17 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -45,15 +45,17 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SGDOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) SGDOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("LearningRate", "Learning rate of SGD"); AddInput("LearningRate", "(Tensor) Learning rate of SGD");
AddInput("Grad", "Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddOutput("ParamOut", "output parameter"); AddOutput("ParamOut", "(Tensor) Output parameter");
AddComment(R"DOC( AddComment(R"DOC(
Simplest sgd algorithm. SGD operator
param_out = param - learning_rate * grad; This operator implements one step of the stochastic gradient descent algorithm.
$$param_out = param - learning_rate * grad$$
)DOC"); )DOC");
} }
......
...@@ -107,26 +107,28 @@ class SigmoidCrossEntropyWithLogitsOpMaker ...@@ -107,26 +107,28 @@ class SigmoidCrossEntropyWithLogitsOpMaker
AddComment(R"DOC( AddComment(R"DOC(
SigmoidCrossEntropyWithLogits Operator. SigmoidCrossEntropyWithLogits Operator.
This measures the elementwise probability error in discrete classification tasks This measures the element-wise probability error in classification tasks
in which each class is independent. This can be thought of as predicting labels in which each class is independent. This can be thought of as predicting labels
for a data-point that are not mutually exclusive. For example, a news article for a data-point, where labels are not mutually exclusive.
can be about politics, technology or sports at the same time or none of these. For example, a news article can be about politics, technology or sports
at the same time or none of these.
The logistic loss is given as follows: The logistic loss is given as follows:
loss = -Labels * log(sigmoid(X)) - (1 - Labels) * log(1 - sigmoid(X)) $$loss = -Labels * \log(\sigma(X)) - (1 - Labels) * \log(1 - \sigma(X))$$
We know that sigmoid(X) = (1 / (1 + exp(-X))). By substituting this we get We know that $$\sigma(X) = (1 / (1 + \exp(-X)))$$. By substituting this we get:
loss = X - X * Labels + log(1 + exp(-X)) $$loss = X - X * Labels + \log(1 + \exp(-X))$$
For stability and to prevent overflow of exp(-X) when X < 0, For stability and to prevent overflow of $$\exp(-X)$$ when X < 0,
we can reformulate the loss as follows: we reformulate the loss as follows:
loss = max(X, 0) - X * Labels + log(1 + exp(-abs(X))) $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-|X|))$$
Both the input `X` and `Labels` can carry the LoD (Level of Details) information. Both the input `X` and `Labels` can carry the LoD (Level of Details) information.
However the output only shares the LoD with input `X`. However the output only shares the LoD with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -38,9 +38,10 @@ class SignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -38,9 +38,10 @@ class SignOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor of sign operator."); AddInput("X", "(Tensor) Input tensor of sign operator.");
AddOutput("Out", "(Tensor) Output tensor of sign operator."); AddOutput("Out", "(Tensor) Output tensor of sign operator.");
AddComment(R"DOC(Sign operator AddComment(R"DOC(
Sign operator
The equation is: Out = X.sign() $$Out = X.sign()$$
)DOC"); )DOC");
} }
}; };
......
...@@ -77,14 +77,17 @@ class SmoothL1LossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -77,14 +77,17 @@ class SmoothL1LossOpMaker : public framework::OpProtoAndCheckerMaker {
"A float scalar with default value 3.0.") "A float scalar with default value 3.0.")
.SetDefault(3.0); .SetDefault(3.0);
AddComment(R"DOC( AddComment(R"DOC(
Compute smooth l1 loss for input and target. The operator take the 1st Smooth L1 Loss Operator.
dimension of input as batch size. For each instance, it will compute
smooth l1 loss element by element first and sum all losses to one value. This operator computes the smooth l1 loss for input and target.
So the output shape is [batch_size, 1]. The operator takes the first dimension of input as the batch size.
For each instance, it computes the smooth l1 loss element by element first
and then sums all the losses. So the resulting output shape
is [batch_size, 1].
The equation is: The equation is:
loss = 0.5 * (sigma * (x-y))^2 if abs(x - y) < 1 / sigma^2 loss = $$0.5 * (\sigma * (x-y))^2$$ if $$|x - y| < 1 /({\sigma}^2)$$
abs(x - y) - 0.5 / sigma^2 otherwise $$\frac{|x - y| - 0.5}{{\sigma}^2}$$ otherwise
)DOC"); )DOC");
} }
......
...@@ -44,20 +44,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -44,20 +44,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
"2-D with shape [batch_size, input_feature_dimensions]."); "2-D with shape [batch_size, input_feature_dimensions].");
AddOutput("Y", "The normalized values with the same shape as X."); AddOutput("Y", "The normalized values with the same shape as X.");
AddComment(R"DOC( AddComment(R"DOC(
The input of softmax operator is a 2-D tensor with shape N x K (N is the Softmax Operator.
The input of the softmax operator is a 2-D tensor with shape N x K (N is the
batch_size, K is the dimension of input feature). The output tensor has the batch_size, K is the dimension of input feature). The output tensor has the
same shape as the input tensor. same shape as the input tensor.
For each row of the input tensor, the softmax operator squashes the For each row of the input tensor, the softmax operator squashes the
K-dimensional vector of arbitrary real values to a K-dimensional vector of real K-dimensional vector of arbitrary real values to a K-dimensional vector of real
values in the range [0, 1] that add up to 1. Specifically, it computes the values in the range [0, 1] that add up to 1.
exponential of the given dimension and the sum of exponential values of all It computes the exponential of the given dimension and the sum of exponential
the other dimensions in the K-dimensional vector input. Then the ratio of the values of all the other dimensions in the K-dimensional vector input.
exponential of the given dimension and the sum of exponential values of all Then the ratio of the exponential of the given dimension and the sum of
the other dimensions is the output of the softmax operator. exponential values of all the other dimensions is the output of the softmax
operator.
For each row `i` and each column `j` in input X, we have: For each row `i` and each column `j` in input X, we have:
Y[i, j] = exp(X[i, j]) / sum_j(exp(X[i, j])) $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$
)DOC"); )DOC");
} }
......
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/softmax_with_cross_entropy_op.h" #include "paddle/operators/softmax_with_cross_entropy_op.h"
#include <paddle/function/TensorType.h> #include <paddle/function/TensorType.h>
...@@ -30,12 +30,10 @@ class SoftmaxWithCrossEntropyOpMaker ...@@ -30,12 +30,10 @@ class SoftmaxWithCrossEntropyOpMaker
"which is a 2-D tensor with shape [N x K]. N is the batch_size, " "which is a 2-D tensor with shape [N x K]. N is the batch_size, "
"and K is the class number."); "and K is the class number.");
AddInput("Label", AddInput("Label",
"(Tensor, default: Tensor<int>), The ground truth which is a 2-D " "(Tensor) The ground truth which is a 2-D tensor. If soft_label "
"tensor. " "is set to false, Label is a Tensor<int64> with shape [N x 1]. If "
"If softLabel is set to false, Label is a Tensor<int> with shape " "soft_label is set to true, Label is a Tensor<float/double> with "
"[N x 1]." "shape [N x K].");
"If softLabel is set to true, Label is a Tensor<float/double> "
"with shape [N x K].");
AddOutput( AddOutput(
"Softmax", "Softmax",
"(Tensor, default: Tensor<float>), A 2-D tensor with shape [N x K]. " "(Tensor, default: Tensor<float>), A 2-D tensor with shape [N x K]. "
...@@ -51,32 +49,34 @@ class SoftmaxWithCrossEntropyOpMaker ...@@ -51,32 +49,34 @@ class SoftmaxWithCrossEntropyOpMaker
"the given labels as soft labels.") "the given labels as soft labels.")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Cross entropy loss with softmax are used as the output layer extensively. This Softmax With Cross Entropy Operator.
Cross entropy loss with softmax is used as the output layer extensively. This
operator computes the softmax normalized values for each row of the input operator computes the softmax normalized values for each row of the input
tensor, after which cross-entropy loss is then computed. This provides a more tensor, after which cross-entropy loss is computed. This provides a more
numerically stable gradient. numerically stable gradient.
Because this operators performs a softmax on logits internally, it expects Because this operator performs a softmax on logits internally, it expects
unscaled logits. Please do not call this op with the output of softmax operator, unscaled logits. This operator should not be used with the output of
which will produce incorrect results. softmax operator since that would produce incorrect results.
When the attribute softLabel is set false, this operators expects mutually When the attribute soft_label is set false, this operators expects mutually
exclusive hard labels, each sample in a batch is in exactly one class with exclusive hard labels, each sample in a batch is in exactly one class with a
probabilities 1. Each sample in the batch with one and only one label. probability of 1.0. Each sample in the batch will have a single label.
Equation: The equation is as follows:
1) hard label (one-hot label) 1) Hard label (one-hot label, so every sample has exactly one class)
Loss_j = \f$ -\text{Logit}_{Label_j} + $$Loss_j = \f$ -\text{Logit}_{Label_j} +
\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right),
j = 1, ..., K $\f j = 1, ..., K $\f$$
2) soft label (a distribution over all classes) 2) Soft label (each sample can have a distribution over all classes)
Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i - $$Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i -
\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right),
j = 1,...,K $\f j = 1,...,K $\f$$
)DOC"); )DOC");
} }
...@@ -196,6 +196,8 @@ REGISTER_OPERATOR(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp, ...@@ -196,6 +196,8 @@ REGISTER_OPERATOR(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp,
REGISTER_OPERATOR(softmax_with_cross_entropy_grad, REGISTER_OPERATOR(softmax_with_cross_entropy_grad,
ops::SoftmaxWithCrossEntropyOpGrad); ops::SoftmaxWithCrossEntropyOpGrad);
REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy, REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy,
ops::SoftmaxWithCrossEntropyKernel<float>); ops::SoftmaxWithCrossEntropyKernel<float>,
ops::SoftmaxWithCrossEntropyKernel<double>);
REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad, REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad,
ops::SoftmaxWithCrossEntropyGradKernel<float>); ops::SoftmaxWithCrossEntropyGradKernel<float>,
ops::SoftmaxWithCrossEntropyGradKernel<double>);
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
...@@ -24,7 +24,7 @@ using Tensor = framework::Tensor; ...@@ -24,7 +24,7 @@ using Tensor = framework::Tensor;
namespace { namespace {
template <typename T> template <typename T>
__global__ void CrossEntropyGrad(T* logit_grad, const T* loss_grad, __global__ void CrossEntropyGrad(T* logit_grad, const T* loss_grad,
const int* labels, const int batch_size, const int64_t* labels, const int batch_size,
const int class_num) { const int class_num) {
int tid = blockIdx.x * blockDim.x + threadIdx.x; int tid = blockIdx.x * blockDim.x + threadIdx.x;
int sample_idx = tid / class_num; int sample_idx = tid / class_num;
...@@ -50,7 +50,7 @@ __global__ void SoftCrossEntropyGradientKernel(T* logit_grad, ...@@ -50,7 +50,7 @@ __global__ void SoftCrossEntropyGradientKernel(T* logit_grad,
int ids = blockIdx.x * blockDim.x + threadIdx.x; int ids = blockIdx.x * blockDim.x + threadIdx.x;
if (ids < batch_size * class_num) { if (ids < batch_size * class_num) {
int row_ids = ids / class_num; int row_ids = ids / class_num;
logit_grad[ids] = logit_grad[ids] * (loss_grad[row_ids] - labels[ids]); logit_grad[ids] = loss_grad[row_ids] * (logit_grad[ids] - labels[ids]);
} }
} }
} // namespace } // namespace
...@@ -104,7 +104,7 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> { ...@@ -104,7 +104,7 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
.stream()>>>(logit_grad_data, loss_grad_data, .stream()>>>(logit_grad_data, loss_grad_data,
label_data, batch_size, class_num); label_data, batch_size, class_num);
} else { } else {
const int* label_data = labels->data<int>(); const int64_t* label_data = labels->data<int64_t>();
CrossEntropyGrad<T><<< CrossEntropyGrad<T><<<
grid, block, 0, reinterpret_cast<const platform::CUDADeviceContext&>( grid, block, 0, reinterpret_cast<const platform::CUDADeviceContext&>(
context.device_context()) context.device_context())
...@@ -119,6 +119,8 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> { ...@@ -119,6 +119,8 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy, REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy,
ops::SoftmaxWithCrossEntropyCUDAKernel<float>); ops::SoftmaxWithCrossEntropyCUDAKernel<float>,
ops::SoftmaxWithCrossEntropyCUDAKernel<double>);
REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy_grad, REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy_grad,
ops::SoftmaxWithCrossEntropyGradCUDAKernel<float>); ops::SoftmaxWithCrossEntropyGradCUDAKernel<float>,
ops::SoftmaxWithCrossEntropyGradCUDAKernel<double>);
...@@ -4,13 +4,13 @@ ...@@ -4,13 +4,13 @@
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
...@@ -60,25 +60,25 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> { ...@@ -60,25 +60,25 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> {
logit_grad->ShareDataWith(*context.Input<Tensor>("Softmax")); logit_grad->ShareDataWith(*context.Input<Tensor>("Softmax"));
const int class_num = logit_grad->dims()[1]; const int class_num = logit_grad->dims()[1];
if (context.Attr<bool>("soft_label")) {
auto out_grad_mat = EigenMatrix<T>::From(*out_grad); auto out_grad_mat = EigenMatrix<T>::From(*out_grad);
auto logit_grad_mat = EigenMatrix<T>::From(*logit_grad); auto logit_grad_mat = EigenMatrix<T>::From(*logit_grad);
auto lbl_mat = EigenMatrix<T>::From(*labels);
if (context.Attr<bool>("soft_label")) {
auto lbl_mat = EigenMatrix<T>::From(*labels);
logit_grad_mat.device(context.GetEigenDevice<platform::CPUPlace>()) = logit_grad_mat.device(context.GetEigenDevice<platform::CPUPlace>()) =
logit_grad_mat * out_grad_mat.broadcast(Eigen::DSizes<int, 2>(1, class_num)) *
(out_grad_mat.broadcast(Eigen::DSizes<int, 2>(1, class_num)) - (logit_grad_mat - lbl_mat);
lbl_mat);
} else { } else {
logit_grad_mat.device(context.GetEigenDevice<platform::CPUPlace>()) =
logit_grad_mat *
out_grad_mat.broadcast(Eigen::DSizes<int, 2>(1, class_num));
const int batch_size = logit_grad->dims()[0]; const int batch_size = logit_grad->dims()[0];
const int* label_data = labels->data<int>(); const int64_t* label_data = labels->data<int64_t>();
const T* out_grad_data = out_grad->data<T>();
T* logit_grad_data = logit_grad->data<T>(); T* logit_grad_data = logit_grad->data<T>();
const T* out_grad_data = out_grad->data<T>();
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
int index = i * class_num + label_data[i]; logit_grad_data[i * class_num + label_data[i]] -= out_grad_data[i];
logit_grad_data[index] =
out_grad_data[i] * (logit_grad_data[index] - 1.);
} }
} }
} }
......
...@@ -67,11 +67,15 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -67,11 +67,15 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SplitOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SplitOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensor of split operator."); AddInput("X", "(Tensor) Input tensor of the split operator.");
AddOutput("Out", "the output tensors of split operator.").AsDuplicable(); AddOutput("Out", "(Tensor) Output tensors of the split operator.")
.AsDuplicable();
AddComment(R"DOC( AddComment(R"DOC(
Split the input tensor into multiple sub-tensors. Split operator
Example:
This operator splits the input tensor into multiple sub-tensors.
Example:
Input = [[1,2], Input = [[1,2],
[3,4], [3,4],
[5,6]] [5,6]]
...@@ -83,14 +87,18 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -83,14 +87,18 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
)DOC"); )DOC");
AddAttr<std::vector<int>>("sections", AddAttr<std::vector<int>>("sections",
"the length for each" "(vector<int>) "
"output along with the specify axis.") "the length of each output along the "
"specified axis.")
.SetDefault(std::vector<int>{}); .SetDefault(std::vector<int>{});
AddAttr<int>("num", AddAttr<int>("num",
"number of the sub-tensors, it must evenly divide " "(int, default 0)"
"Number of sub-tensors. This must evenly divide "
"Input.dims()[axis]") "Input.dims()[axis]")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("axis", "The axis which the input will be splited on.") AddAttr<int>("axis",
"(int, default 0) "
"The axis which the input will be splited on.")
.SetDefault(0); .SetDefault(0);
} }
}; };
......
...@@ -59,23 +59,26 @@ class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -59,23 +59,26 @@ class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker {
SquaredL2DistanceOpMaker(framework::OpProto* proto, SquaredL2DistanceOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of SquaredL2DistanceOp."); AddInput("X", "(Tensor) Input of SquaredL2DistanceOp.");
AddInput("Y", "Target of SquaredL2DistanceOp."); AddInput("Y", "(Tensor) Target of SquaredL2DistanceOp.");
AddOutput("sub_result", AddOutput("sub_result",
"Buffering substraction result which " "(Tensor) Buffering subtraction result which "
"will be reused in backward.") "will be reused in backward.")
.AsIntermediate(); .AsIntermediate();
AddOutput("Out", "Squared l2 distance between input and target."); AddOutput("Out", "(Tensor) Squared l2 distance between input and target.");
AddComment(R"DOC( AddComment(R"DOC(
SquaredL2DistanceOp will cacluate the squared L2 distance for SquaredL2Distance operator
input and target. Number of distance value equals to the
first dimension of input. First dimension of target could be equal to This operator will cacluate the squared L2 distance for the input and
input or to 1. If the first dimension of target is 1, SquaredL2DistanceOp the target. Number of distance value will be equal to the first dimension
will broadcast target's first dimension to input's first dimension. of input. First dimension of the target could be equal to the input or to 1.
You can decide whether calculate the gradient of input and target. If the first dimension of target is 1, the operator will broadcast target's
first dimension to input's first dimension. During backward propagation,
Both the input X and Y can carry the LoD (Level of Details) information, the user can decide whether to calculate the gradient of the input or
or not. But the output only shares the LoD with input X. the target or both.
Both the input X and Y can carry the LoD (Level of Details) information.
However, the output only shares the LoD information with input X.
)DOC"); )DOC");
} }
}; };
......
...@@ -52,13 +52,13 @@ class SquaredL2NormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -52,13 +52,13 @@ class SquaredL2NormOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input of squared_l2_norm op."); AddInput("X", "(Tensor) The input of squared_l2_norm op.");
AddOutput("Out", "(Float) The output of squared_l2_norm op."); AddOutput("Out", "(Scalar) The output of squared_l2_norm op.");
AddComment(R"DOC( AddComment(R"DOC(
SquaredL2Norm Operator. SquaredL2Norm Operator.
Computes the squared L2 norm of a tensor. Computes the squared L2 norm of a tensor.
Out = sum (X ** 2) $$Out = \sum_{i} X_{i}^2$$
)DOC"); )DOC");
} }
......
...@@ -45,13 +45,15 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -45,13 +45,15 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SumOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) SumOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensors of sum operator.").AsDuplicable(); AddInput("X", "(vector<Tensor>) The input tensors of sum operator.")
AddOutput("Out", "the output tensor of sum operator."); .AsDuplicable();
AddOutput("Out", "(Tensor) The output tensor of sum operator.");
AddComment(R"DOC( AddComment(R"DOC(
Sum the input tensors. Sum operator.
All the inputs can carry the LoD (Level of Details) information, This operators sums the input tensors. All the inputs can carry the
or not. But the output only shares the LoD with the first input. LoD (Level of Details) information. However, the output only shares
the LoD information with the first input.
)DOC"); )DOC");
} }
}; };
......
...@@ -48,20 +48,20 @@ class TopkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -48,20 +48,20 @@ class TopkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
TopkOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) TopkOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of Topk op"); AddInput("X", "(Tensor) The input of Topk op");
AddOutput("Out", "The output tensor of Topk op"); AddOutput("Out", "(Tensor) The output tensor of Topk op");
AddOutput("Indices", "The indices of Topk elements of input"); AddOutput("Indices", "(Tensor) The indices of Topk elements of input");
AddComment( AddComment(R"DOC(
R"DOC(If the input is a vector (1d tensor), Top K operator
finds the k largest entries in the vector
and outputs their values and indices as vectors.
Thus values[j] is the j-th largest entry in input,
and its index is indices[j].
For matrices, computes the top k entries in each row. )DOC"); If the input is a vector (1d tensor), this operator finds the k largest
entries in the vector and outputs their values and indices as vectors.
Thus values[j] is the j-th largest entry in input, and its index is indices[j].
For matrices, this operator computes the top k entries in each row. )DOC");
AddAttr<int>("k", AddAttr<int>("k",
"Number of top elements to look for along the last " "(int, default 1) Number of top elements to look for along "
"dimension (along each row for matrices).") "the last dimension (along each row for matrices).")
.SetDefault(1); .SetDefault(1);
} }
}; };
......
...@@ -32,7 +32,7 @@ class TransposeOp : public framework::OperatorWithKernel { ...@@ -32,7 +32,7 @@ class TransposeOp : public framework::OperatorWithKernel {
size_t axis_size = axis.size(); size_t axis_size = axis.size();
PADDLE_ENFORCE_EQ(x_rank, axis_size, PADDLE_ENFORCE_EQ(x_rank, axis_size,
"the input tensor's rank(%d) " "The input tensor's rank(%d) "
"should be equal to the axis's size(%d)", "should be equal to the axis's size(%d)",
x_rank, axis_size); x_rank, axis_size);
...@@ -64,12 +64,14 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -64,12 +64,14 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "(Tensor)The output tensor"); AddOutput("Out", "(Tensor)The output tensor");
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"axis", "axis",
"(vector<int>)a list of values, and the size of the list should be " "(vector<int>)A list of values, and the size of the list should be "
"the same with the input tensor rank, the tensor will " "the same with the input tensor rank, the tensor will "
"permute the axes according the the values given"); "permute the axes according the the values given");
AddComment(R"DOC( AddComment(R"DOC(
The Tensor will be permuted according to the axis values given. Transpose Operator.
The op is very much like the numpy.transpose function in python
The input tensor will be permuted according to the axis values given.
The op functions similar to how numpy.transpose works in python.
For example: For example:
>> input = numpy.arange(6).reshape((2,3)) >> input = numpy.arange(6).reshape((2,3))
>> input >> input
...@@ -83,6 +85,7 @@ For example: ...@@ -83,6 +85,7 @@ For example:
[2, 5]]) [2, 5]])
So, given a input tensor of shape(N, C, H, W) and the axis is {0, 2, 3, 1}, So, given a input tensor of shape(N, C, H, W) and the axis is {0, 2, 3, 1},
the output tensor shape will be (N, H, W, C) the output tensor shape will be (N, H, W, C)
)DOC"); )DOC");
} }
}; };
......
...@@ -74,18 +74,30 @@ class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -74,18 +74,30 @@ class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
UniformRandomOpMaker(framework::OpProto* proto, UniformRandomOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "The output tensor of uniform random op"); AddOutput("Out", "(Tensor) The output tensor of uniform random op");
AddComment(R"DOC(Uniform random operator. AddComment(R"DOC(
Used to initialize tensor with uniform random generator. Uniform random operator.
This operator initializes a tensor with random values sampled from a
uniform distribution.
)DOC"); )DOC");
AddAttr<std::vector<int>>("shape", "the dimension of random tensor"); AddAttr<std::vector<int>>("shape",
AddAttr<float>("min", "Minimum value of uniform random").SetDefault(-1.0f); "(vector<int>) The shape of the output tensor");
AddAttr<float>("max", "Maximun value of uniform random").SetDefault(1.0f); AddAttr<float>("min",
"(float, default -1.0) "
"Minimum value of uniform random")
.SetDefault(-1.0f);
AddAttr<float>("max",
"(float, default 1.0) "
"Maximun value of uniform random")
.SetDefault(1.0f);
AddAttr<int>("seed", AddAttr<int>("seed",
"Random seed of uniform random. " "(int, default 0) "
"0 means generate a seed by system") "Random seed used for generating samples. "
"0 means use a seed generated by the system.")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("data_type", "output tensor data type") AddAttr<int>("data_type", "(int, default 5(FP32)) Output tensor data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::DataType::FP32);
} }
}; };
......
...@@ -238,7 +238,9 @@ void BindVarDsec(py::module &m) { ...@@ -238,7 +238,9 @@ void BindVarDsec(py::module &m) {
.value("SELECTED_ROWS", VarDesc::SELECTED_ROWS) .value("SELECTED_ROWS", VarDesc::SELECTED_ROWS)
.value("FEED_MINIBATCH", VarDesc::FEED_MINIBATCH) .value("FEED_MINIBATCH", VarDesc::FEED_MINIBATCH)
.value("FETCH_LIST", VarDesc::FETCH_LIST) .value("FETCH_LIST", VarDesc::FETCH_LIST)
.value("STEP_SCOPES", VarDesc::STEP_SCOPES); .value("STEP_SCOPES", VarDesc::STEP_SCOPES)
.value("LOD_RANK_TABLE", VarDesc::LOD_RANK_TABLE)
.value("LOD_TENSOR_ARRAY", VarDesc::LOD_TENSOR_ARRAY);
} }
void BindOpDesc(py::module &m) { void BindOpDesc(py::module &m) {
......
...@@ -21,7 +21,9 @@ limitations under the License. */ ...@@ -21,7 +21,9 @@ limitations under the License. */
#include "paddle/framework/executor.h" #include "paddle/framework/executor.h"
#include "paddle/framework/feed_fetch_method.h" #include "paddle/framework/feed_fetch_method.h"
#include "paddle/framework/framework.pb.h" #include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/prune.h" #include "paddle/framework/prune.h"
#include "paddle/framework/selected_rows.h" #include "paddle/framework/selected_rows.h"
#include "paddle/framework/tensor_array.h" #include "paddle/framework/tensor_array.h"
...@@ -224,11 +226,17 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -224,11 +226,17 @@ All parameter, weight, gradient are variables in Paddle.
return self.GetMutable<LoDTensor>(); return self.GetMutable<LoDTensor>();
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("get_lod_rank_table",
[](Variable &self) { return self.GetMutable<LoDRankTable>(); },
py::return_value_policy::reference)
.def("get_selected_rows", .def("get_selected_rows",
[](Variable &self) -> SelectedRows * { [](Variable &self) -> SelectedRows * {
return self.GetMutable<SelectedRows>(); return self.GetMutable<SelectedRows>();
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("get_lod_tensor_array",
[](Variable &self) { return self.GetMutable<LoDTensorArray>(); },
py::return_value_policy::reference)
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
.def("get_communicator", .def("get_communicator",
[](Variable &self) -> platform::Communicator * { [](Variable &self) -> platform::Communicator * {
...@@ -492,6 +500,32 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -492,6 +500,32 @@ All parameter, weight, gradient are variables in Paddle.
BindVarDsec(m); BindVarDsec(m);
BindOpDesc(m); BindOpDesc(m);
py::class_<framework::LoDRankTable>(m, "LodRankTable")
.def("items", [](framework::LoDRankTable &table) {
std::vector<std::pair<size_t, size_t>> res;
for (auto &item : table.items()) {
res.push_back({item.index, item.length});
}
return res;
});
py::class_<LoDTensorArray>(m, "LoDTensorArray")
.def("__getitem__",
[](LoDTensorArray &self, size_t i) { return &self.at(i); },
py::return_value_policy::reference)
.def("__len__", [](LoDTensorArray &self) { return self.size(); })
.def("__setitem__",
[](LoDTensorArray &self, size_t i, const LoDTensor &t) {
PADDLE_ENFORCE_LT(i, self.size());
self[i].ShareDataWith(t);
self[i].set_lod(t.lod());
})
.def("append", [](LoDTensorArray &self, const LoDTensor &t) {
self.emplace_back();
self.back().ShareDataWith(t);
self.back().set_lod(t.lod());
});
m.def("op_support_gpu", OpSupportGPU); m.def("op_support_gpu", OpSupportGPU);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
m.def("get_cuda_device_count", platform::GetCUDADeviceCount); m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
......
...@@ -2,31 +2,33 @@ ...@@ -2,31 +2,33 @@
set -xe set -xe
# Set BASE_IMAGE according to env variables
if [[ ${WITH_GPU} == "ON" ]]; then function cmake_gen() {
# Set BASE_IMAGE according to env variables
if [[ ${WITH_GPU} == "ON" ]]; then
BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
else else
BASE_IMAGE="ubuntu:16.04" BASE_IMAGE="ubuntu:16.04"
fi fi
DOCKERFILE_GPU_ENV="" DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO="" DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so" DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi fi
mkdir -p /paddle/build mkdir -p /paddle/build
cd /paddle/build cd /paddle/build
# build script will not fail if *.deb does not exist # build script will not fail if *.deb does not exist
rm *.deb 2>/dev/null || true rm *.deb 2>/dev/null || true
# delete previous built whl packages # delete previous built whl packages
rm -rf /paddle/paddle/dist 2>/dev/null || true rm -rf /paddle/paddle/dist 2>/dev/null || true
cat <<EOF cat <<EOF
======================================== ========================================
Configuring cmake in /paddle/build ... Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DWITH_DOC=OFF -DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF} -DWITH_GPU=${WITH_GPU:-OFF}
...@@ -42,13 +44,13 @@ Configuring cmake in /paddle/build ... ...@@ -42,13 +44,13 @@ Configuring cmake in /paddle/build ...
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON}
-DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_TESTING=${WITH_TESTING:-ON}
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
======================================== ========================================
EOF EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because # Disable UNITTEST_USE_VIRTUALENV in docker because
# docker environment is fully controlled by this script. # docker environment is fully controlled by this script.
# See /Paddle/CMakeLists.txt, UNITTEST_USE_VIRTUALENV option. # See /Paddle/CMakeLists.txt, UNITTEST_USE_VIRTUALENV option.
cmake .. \ cmake .. \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=Release \
-DWITH_DOC=OFF \ -DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
...@@ -63,34 +65,40 @@ cmake .. \ ...@@ -63,34 +65,40 @@ cmake .. \
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \
-DWITH_TESTING=${WITH_TESTING:-ON} \ -DWITH_TESTING=${WITH_TESTING:-ON} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
}
cat <<EOF function run_build() {
============================================ cat <<EOF
Building in /paddle/build ... ============================================
============================================ Building in /paddle/build ...
============================================
EOF EOF
make -j `nproc` make -j `nproc`
}
if [ ${WITH_TESTING:-ON} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then function run_test() {
cat <<EOF if [ ${WITH_TESTING:-ON} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then
======================================== cat <<EOF
Running unit tests ... ========================================
======================================== Running unit tests ...
========================================
EOF EOF
ctest --output-on-failure ctest --output-on-failure
# make install should also be test when unittest # make install should also be test when unittest
make install -j `nproc` make install -j `nproc`
pip install /usr/local/opt/paddle/share/wheels/*.whl pip install /usr/local/opt/paddle/share/wheels/*.whl
paddle version paddle version
fi fi
}
if [[ ${WITH_DOC:-OFF} == "ON" ]]; then function gen_docs() {
if [[ ${WITH_DOC:-OFF} == "ON" ]]; then
cat <<EOF cat <<EOF
======================================== ========================================
Building documentation ... Building documentation ...
In /paddle/build_doc In /paddle/build_doc
======================================== ========================================
EOF EOF
mkdir -p /paddle/build_doc mkdir -p /paddle/build_doc
pushd /paddle/build_doc pushd /paddle/build_doc
...@@ -103,14 +111,14 @@ EOF ...@@ -103,14 +111,14 @@ EOF
make -j `nproc` gen_proto_py make -j `nproc` gen_proto_py
make -j `nproc` paddle_docs paddle_docs_cn make -j `nproc` paddle_docs paddle_docs_cn
popd popd
fi fi
if [[ ${WOBOQ:-OFF} == 'ON' ]]; then if [[ ${WOBOQ:-OFF} == 'ON' ]]; then
cat <<EOF cat <<EOF
======================================== ========================================
Converting C++ source code into HTML ... Converting C++ source code into HTML ...
======================================== ========================================
EOF EOF
export WOBOQ_OUT=/paddle/build/woboq_out export WOBOQ_OUT=/paddle/build/woboq_out
mkdir -p $WOBOQ_OUT mkdir -p $WOBOQ_OUT
...@@ -121,36 +129,34 @@ EOF ...@@ -121,36 +129,34 @@ EOF
-o $WOBOQ_OUT \ -o $WOBOQ_OUT \
-p paddle:/paddle -p paddle:/paddle
/woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT
fi fi
}
cat <<EOF
========================================
Generate /paddle/build/Dockerfile ...
========================================
EOF
cat > /paddle/build/Dockerfile <<EOF function gen_dockerfile() {
FROM ${BASE_IMAGE}
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com> cat <<EOF
ENV HOME /root ========================================
Generate /paddle/build/Dockerfile ...
========================================
EOF EOF
if [[ -n ${APT_MIRROR} ]]; then cat > /paddle/build/Dockerfile <<EOF
cat >> /paddle/build/Dockerfile <<EOF FROM ${BASE_IMAGE}
RUN sed -i '${APT_MIRROR}' /etc/apt/sources.list MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV HOME /root
EOF EOF
fi
if [[ ${WITH_GPU} == "ON" ]]; then if [[ ${WITH_GPU} == "ON" ]]; then
NCCL_DEPS="apt-get install -y libnccl-dev &&" NCCL_DEPS="apt-get install -y libnccl-dev &&"
else else
NCCL_DEPS="" NCCL_DEPS=""
fi fi
cat >> /paddle/build/Dockerfile <<EOF cat >> /paddle/build/Dockerfile <<EOF
ADD python/dist/*.whl / ADD python/dist/*.whl /
# run paddle version to install python packages first # run paddle version to install python packages first
RUN apt-get update &&\ RUN apt-get update &&\
${NCCL_DEPS}\ ${NCCL_DEPS}\
apt-get install -y wget python-pip && pip install -U pip && \ apt-get install -y wget python-pip && pip install -U pip && \
pip install /*.whl; apt-get install -f -y && \ pip install /*.whl; apt-get install -f -y && \
...@@ -158,14 +164,22 @@ RUN apt-get update &&\ ...@@ -158,14 +164,22 @@ RUN apt-get update &&\
rm -f /*.whl && \ rm -f /*.whl && \
paddle version && \ paddle version && \
ldconfig ldconfig
${DOCKERFILE_CUDNN_DSO} ${DOCKERFILE_CUDNN_DSO}
${DOCKERFILE_GPU_ENV} ${DOCKERFILE_GPU_ENV}
ADD go/cmd/pserver/pserver /usr/bin/ ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/ ADD go/cmd/master/master /usr/bin/
# default command shows the paddle version and exit # default command shows the paddle version and exit
CMD ["paddle", "version"] CMD ["paddle", "version"]
EOF EOF
}
set +xe set +xe
cmake_gen
run_build
run_test
gen_docs
gen_dockerfile
printf "If you need to install PaddlePaddle in develop docker image," printf "If you need to install PaddlePaddle in develop docker image,"
printf "please make install or pip install build/python/dist/*.whl.\n" printf "please make install or pip install build/python/dist/*.whl.\n"
...@@ -17,8 +17,7 @@ limitations under the License. */ ...@@ -17,8 +17,7 @@ limitations under the License. */
#include <fenv.h> #include <fenv.h>
#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \ #if defined(__APPLE__) || defined(__OSX__)
!defined(__aarch64__)
int fegetexcept(void); int fegetexcept(void);
int feenableexcept(unsigned int excepts); int feenableexcept(unsigned int excepts);
......
...@@ -14,9 +14,13 @@ limitations under the License. */ ...@@ -14,9 +14,13 @@ limitations under the License. */
#include "paddle/utils/Excepts.h" #include "paddle/utils/Excepts.h"
#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \ #if defined(__APPLE__) || defined(__OSX__)
!defined(__aarch64__) #if defined(__arm__) || defined(__arm64__)
// TODO(liuyiqun): implement the arm version
int fegetexcept(void) { return -1; }
int feenableexcept(unsigned int excepts) { return -1; }
int fedisableexcept(unsigned int excepts) { return -1; }
#else
int fegetexcept(void) { int fegetexcept(void) {
static fenv_t fenv; static fenv_t fenv;
return fegetenv(&fenv) ? -1 : (fenv.__control & FE_ALL_EXCEPT); return fegetenv(&fenv) ? -1 : (fenv.__control & FE_ALL_EXCEPT);
...@@ -49,5 +53,5 @@ int fedisableexcept(unsigned int excepts) { ...@@ -49,5 +53,5 @@ int fedisableexcept(unsigned int excepts) {
return (fesetenv(&fenv) ? -1 : old_excepts); return (fesetenv(&fenv) ? -1 : old_excepts);
} }
#endif
#endif #endif
...@@ -18,6 +18,6 @@ limitations under the License. */ ...@@ -18,6 +18,6 @@ limitations under the License. */
TEST(StringUtil, to) { TEST(StringUtil, to) {
ASSERT_NEAR(paddle::str::to<double>("12.45"), 12.45, 1e-5); ASSERT_NEAR(paddle::str::to<double>("12.45"), 12.45, 1e-5);
ASSERT_DEATH(paddle::str::to<double>("12.45x23"), ".*"); ASSERT_DEATH_IF_SUPPORTED(paddle::str::to<double>("12.45x23"), ".*");
ASSERT_DEATH(paddle::str::to<int>(""), ".*"); ASSERT_DEATH_IF_SUPPORTED(paddle::str::to<int>(""), ".*");
} }
...@@ -44,6 +44,7 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/pad ...@@ -44,6 +44,7 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/pad
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND touch stub.cc
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
......
...@@ -2775,9 +2775,15 @@ class NCELayer(LayerBase): ...@@ -2775,9 +2775,15 @@ class NCELayer(LayerBase):
@config_layer('addto') @config_layer('addto')
class AddToLayer(LayerBase): class AddToLayer(LayerBase):
layer_type = 'addto'
def __init__(self, name, inputs, bias=True, **xargs): def __init__(self, name, inputs, bias=True, **xargs):
use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
if self.layer_type == "mkldnn_addto":
config_assert(use_mkldnn, "mkldnn_addto only support MKLDNN")
self.layer_type = 'mkldnn_addto' if use_mkldnn else 'addto'
super(AddToLayer, self).__init__( super(AddToLayer, self).__init__(
name, 'addto', 0, inputs=inputs, **xargs) name, self.layer_type, 0, inputs=inputs, **xargs)
config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer') config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer')
if len(self.inputs) > 1: if len(self.inputs) > 1:
...@@ -2796,6 +2802,11 @@ class AddToLayer(LayerBase): ...@@ -2796,6 +2802,11 @@ class AddToLayer(LayerBase):
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('mkldnn_addto')
class MKLDNNAddtoLayer(AddToLayer):
layer_type = 'mkldnn_addto'
@config_layer('agent') @config_layer('agent')
class AgentLayer(LayerBase): class AgentLayer(LayerBase):
def __init__(self, name, size, device=None): def __init__(self, name, size, device=None):
......
...@@ -143,6 +143,7 @@ __all__ = [ ...@@ -143,6 +143,7 @@ __all__ = [
'scale_shift_layer', 'scale_shift_layer',
'img_conv3d_layer', 'img_conv3d_layer',
'resize_layer', 'resize_layer',
'sub_seq_layer',
] ]
...@@ -252,6 +253,7 @@ class LayerType(object): ...@@ -252,6 +253,7 @@ class LayerType(object):
SCALE_SHIFT_LAYER = 'scale_shift' SCALE_SHIFT_LAYER = 'scale_shift'
RESIZE = 'resize' RESIZE = 'resize'
SUB_SEQ_LAYER = 'subseq'
@staticmethod @staticmethod
def is_layer_type(type_name): def is_layer_type(type_name):
...@@ -6980,3 +6982,58 @@ def resize_layer(input, size, name=None): ...@@ -6980,3 +6982,58 @@ def resize_layer(input, size, name=None):
""" """
Layer(name=name, type=LayerType.RESIZE, inputs=Input(input.name), size=size) Layer(name=name, type=LayerType.RESIZE, inputs=Input(input.name), size=size)
return LayerOutput(name, LayerType.RESIZE, parents=[input], size=input.size) return LayerOutput(name, LayerType.RESIZE, parents=[input], size=input.size)
@wrap_act_default(act=LinearActivation())
@wrap_name_default('sub_seq')
def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None):
"""
sub_seq_layer will return sub-sequences from the input sequences. For each
sequence in the input sequence layer, sub_seq_layer will slice it by given
offset and size. Please notice that, number of offset value and size value
both are equal to the number of sequence in the input layer.
.. code-block:: python
sub_seq = sub_seq_layer(input=input_seq, offsets=offsets, sizes=sizes)
:param name: The name of this layer. It is optional.
:type name: basestring
:param input: The input of this layer, which should be sequence.
:type input: LayerOutput
:param offsets: offset indices to slice the input sequence, which should be
sequence type.
:type offsets: LayerOutput
:param sizes: sizes of the sub-sequences, which should be sequence type.
:type sizes: LayerOutput
:param act: Layer activation, default is LinearActivation
:type act: BaseActivation.
:param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to
True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), (
'The first input of sub_seq_layer layer must be a PaddlePaddle layer.')
assert isinstance(offsets, LayerOutput), (
'The offset indices for sub_seq_layer, '
'must be a PaddlePaddle layer.')
assert isinstance(sizes, LayerOutput), (
'The sizes of sub-sequences, must be a PaddlePaddle layer.')
Layer(
name=name,
type=LayerType.SUB_SEQ_LAYER,
inputs=[input.name, offsets.name, sizes.name],
active_type=act.name,
bias=ParamAttr.to_bias(bias_attr))
return LayerOutput(
name,
LayerType.SUB_SEQ_LAYER,
parents=[input, offsets, sizes],
size=input.size)
...@@ -116,7 +116,7 @@ class AdamOptimizer(BaseSGDOptimizer): ...@@ -116,7 +116,7 @@ class AdamOptimizer(BaseSGDOptimizer):
m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\ m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\ v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}} w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}}
:param beta1: the :math:`\\beta_1` in equation. :param beta1: the :math:`\\beta_1` in equation.
:type beta1: float :type beta1: float
......
...@@ -23,23 +23,23 @@ from paddle.v2.topology import Topology ...@@ -23,23 +23,23 @@ from paddle.v2.topology import Topology
def merge_v2_model(net, param_file, output_file): def merge_v2_model(net, param_file, output_file):
'''Integrate the model config and model parameters into one file. '''Merge the model config and parameters into one file.
The model configuration file describes the model structure which The model configuration file describes the model structure which
ends with .py. The parameters file stores the parameters of the model ends with .py. The parameters file stores the parameters of the model
which ends with .tar.gz. which ends with .tar.gz.
@param net The output layer of the network. @param net The output layer of the network for inference.
@param param_file Path of the model parameters(.tar.gz) which is stored by v2 api. @param param_file Path of the parameters (.tar.gz) which is stored by v2 api.
@param output_file Path of the merged file which will be generated. @param output_file Path of the merged file which will be generated.
Usage: Usage:
from paddle.util.merge_model import merge_v2_model from paddle.utils.merge_model import merge_v2_model
# import your network configuration # import your network configuration
from mobilenet import mobile_net from example_net import net_conf
net = mobile_net(3*224*224, 102) net = net_conf(is_predict=True)
param_file = './param_pass_00000.tar.gz' param_file = './param_pass_00000.tar.gz'
output_file = './output.paddle' output_file = './output.paddle'
...@@ -48,7 +48,7 @@ def merge_v2_model(net, param_file, output_file): ...@@ -48,7 +48,7 @@ def merge_v2_model(net, param_file, output_file):
''' '''
assert isinstance(net, LayerOutput), \ assert isinstance(net, LayerOutput), \
"The net should be the output of the network" "The net should be the output of the network for inference"
assert os.path.exists(param_file), \ assert os.path.exists(param_file), \
"The model parameters file %s does not exists " % (param_file) "The model parameters file %s does not exists " % (param_file)
......
...@@ -19,8 +19,20 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None): ...@@ -19,8 +19,20 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
:rtype: list[Variable] :rtype: list[Variable]
""" """
assert isinstance(loss, framework.Variable) assert isinstance(loss, framework.Variable)
param_grad_map = loss.block.program.append_backward(loss, no_grad_set or
set()) if no_grad_set is None:
program = loss.block.program
assert isinstance(program, framework.Program)
no_grad_set = list()
for block in program.blocks:
assert isinstance(block, framework.Block)
for var in block.vars.itervalues():
assert isinstance(var, framework.Variable)
if var.stop_gradient:
no_grad_set.append(var.name)
no_grad_set = set(no_grad_set)
param_grad_map = loss.block.program.append_backward(loss, no_grad_set)
if parameter_list is not None: if parameter_list is not None:
parameters = parameter_list parameters = parameter_list
else: else:
......
...@@ -21,6 +21,7 @@ class Variable(object): ...@@ -21,6 +21,7 @@ class Variable(object):
dtype=None, dtype=None,
lod_level=None, lod_level=None,
persistable=None, persistable=None,
stop_gradient=False,
**kwargs): **kwargs):
self.block = block self.block = block
...@@ -89,6 +90,7 @@ class Variable(object): ...@@ -89,6 +90,7 @@ class Variable(object):
self.block.vars[name] = self self.block.vars[name] = self
self.op = None self.op = None
self.stop_gradient = stop_gradient
def __str__(self): def __str__(self):
protostr = self.desc.serialize_to_string() protostr = self.desc.serialize_to_string()
...@@ -101,6 +103,10 @@ class Variable(object): ...@@ -101,6 +103,10 @@ class Variable(object):
def persistable(self): def persistable(self):
return self.desc.persistable() return self.desc.persistable()
@persistable.setter
def persistable(self, p):
self.desc.set_persistable(p)
@property @property
def name(self): def name(self):
return self.desc.name() return self.desc.name()
...@@ -546,5 +552,5 @@ class Parameter(Variable): ...@@ -546,5 +552,5 @@ class Parameter(Variable):
# program is a global instance. # program is a global instance.
g_program = Program() g_main_program = Program()
g_init_program = Program() g_startup_program = Program()
import os import os
import cPickle as pickle import cPickle as pickle
from paddle.v2.framework.framework import Program, Parameter, g_program, \ from paddle.v2.framework.framework import Program, Parameter, g_main_program, \
Variable Variable
__all__ = [ __all__ = [
...@@ -29,13 +29,13 @@ def _clone_var_in_block_(block, var): ...@@ -29,13 +29,13 @@ def _clone_var_in_block_(block, var):
persistable=True) persistable=True)
def save_vars(executor, dirname, program=None, vars=None, predicate=None): def save_vars(executor, dirname, main_program=None, vars=None, predicate=None):
""" """
Save variables to directory by executor. Save variables to directory by executor.
:param executor: executor that save variable :param executor: executor that save variable
:param dirname: directory path :param dirname: directory path
:param program: program. If vars is None, then filter all variables in this :param main_program: program. If vars is None, then filter all variables in this
program which fit `predicate`. Default g_program. program which fit `predicate`. Default g_program.
:param predicate: The Predicate describes a callable that returns a variable :param predicate: The Predicate describes a callable that returns a variable
as a bool. If it returns true, the variables will be saved. as a bool. If it returns true, the variables will be saved.
...@@ -44,15 +44,15 @@ def save_vars(executor, dirname, program=None, vars=None, predicate=None): ...@@ -44,15 +44,15 @@ def save_vars(executor, dirname, program=None, vars=None, predicate=None):
:return: None :return: None
""" """
if vars is None: if vars is None:
if program is None: if main_program is None:
program = g_program main_program = g_main_program
if not isinstance(program, Program): if not isinstance(main_program, Program):
raise TypeError("program should be as Program type or None") raise TypeError("program should be as Program type or None")
save_vars( save_vars(
executor, executor,
dirname=dirname, dirname=dirname,
vars=filter(predicate, program.list_vars())) vars=filter(predicate, main_program.list_vars()))
else: else:
save_program = Program() save_program = Program()
save_block = save_program.global_block() save_block = save_program.global_block()
...@@ -66,37 +66,37 @@ def save_vars(executor, dirname, program=None, vars=None, predicate=None): ...@@ -66,37 +66,37 @@ def save_vars(executor, dirname, program=None, vars=None, predicate=None):
executor.run(save_program) executor.run(save_program)
def save_params(executor, dirname, program=None): def save_params(executor, dirname, main_program=None):
""" """
Save all parameters to directory with executor. Save all parameters to directory with executor.
""" """
save_vars( save_vars(
executor, executor,
dirname=dirname, dirname=dirname,
program=program, main_program=main_program,
vars=None, vars=None,
predicate=is_parameter) predicate=is_parameter)
def save_persistables(executor, dirname, program=None): def save_persistables(executor, dirname, main_program=None):
""" """
Save all persistables to directory with executor. Save all persistables to directory with executor.
""" """
save_vars( save_vars(
executor, executor,
dirname=dirname, dirname=dirname,
program=program, main_program=main_program,
vars=None, vars=None,
predicate=is_persistable) predicate=is_persistable)
def load_vars(executor, dirname, program=None, vars=None, predicate=None): def load_vars(executor, dirname, main_program=None, vars=None, predicate=None):
""" """
Load variables from directory by executor. Load variables from directory by executor.
:param executor: executor that save variable :param executor: executor that save variable
:param dirname: directory path :param dirname: directory path
:param program: program. If vars is None, then filter all variables in this :param main_program: program. If vars is None, then filter all variables in this
program which fit `predicate`. Default g_program. program which fit `predicate`. Default g_program.
:param predicate: The Predicate describes a callable that returns a variable :param predicate: The Predicate describes a callable that returns a variable
as a bool. If it returns true, the variables will be loaded. as a bool. If it returns true, the variables will be loaded.
...@@ -105,15 +105,15 @@ def load_vars(executor, dirname, program=None, vars=None, predicate=None): ...@@ -105,15 +105,15 @@ def load_vars(executor, dirname, program=None, vars=None, predicate=None):
:return: None :return: None
""" """
if vars is None: if vars is None:
if program is None: if main_program is None:
program = g_program main_program = g_main_program
if not isinstance(program, Program): if not isinstance(main_program, Program):
raise TypeError("program's type should be Program") raise TypeError("program's type should be Program")
load_vars( load_vars(
executor, executor,
dirname=dirname, dirname=dirname,
vars=filter(predicate, program.list_vars())) vars=filter(predicate, main_program.list_vars()))
else: else:
load_prog = Program() load_prog = Program()
load_block = load_prog.global_block() load_block = load_prog.global_block()
...@@ -129,27 +129,33 @@ def load_vars(executor, dirname, program=None, vars=None, predicate=None): ...@@ -129,27 +129,33 @@ def load_vars(executor, dirname, program=None, vars=None, predicate=None):
executor.run(load_prog) executor.run(load_prog)
def load_params(executor, dirname, program=None): def load_params(executor, dirname, main_program=None):
""" """
load all parameters from directory by executor. load all parameters from directory by executor.
""" """
load_vars( load_vars(
executor, dirname=dirname, program=program, predicate=is_parameter) executor,
dirname=dirname,
main_program=main_program,
predicate=is_parameter)
def load_persistables(executor, dirname, program=None): def load_persistables(executor, dirname, main_program=None):
""" """
load all persistables from directory by executor. load all persistables from directory by executor.
""" """
load_vars( load_vars(
executor, dirname=dirname, program=program, predicate=is_persistable) executor,
dirname=dirname,
main_program=main_program,
predicate=is_persistable)
def save_inference_model(dirname, def save_inference_model(dirname,
feeded_var_names, feeded_var_names,
target_vars, target_vars,
executor, executor,
program=None): main_program=None):
""" """
Build a model especially for inference, Build a model especially for inference,
and save it to directory by the executor. and save it to directory by the executor.
...@@ -158,20 +164,20 @@ def save_inference_model(dirname, ...@@ -158,20 +164,20 @@ def save_inference_model(dirname,
:param feeded_var_names: Names of variables that need to be feeded data during inference :param feeded_var_names: Names of variables that need to be feeded data during inference
:param target_vars: Variables from which we can get inference results. :param target_vars: Variables from which we can get inference results.
:param executor: executor that save inference model :param executor: executor that save inference model
:param program: original program, which will be pruned to build the inference model. :param main_program: original program, which will be pruned to build the inference model.
Default g_program. Default g_program.
:return: None :return: None
""" """
if program is None: if main_program is None:
program = g_program main_program = g_main_program
if not isinstance(target_vars, list): if not isinstance(target_vars, list):
target_vars = [target_vars] target_vars = [target_vars]
if not os.path.isdir(dirname): if not os.path.isdir(dirname):
os.makedirs(dirname) os.makedirs(dirname)
pruned_program = program.prune(target_vars) pruned_program = main_program.prune(target_vars)
fetch_var_names = [v.name for v in target_vars] fetch_var_names = [v.name for v in target_vars]
model_file_name = dirname + "/__model__" model_file_name = dirname + "/__model__"
...@@ -182,10 +188,10 @@ def save_inference_model(dirname, ...@@ -182,10 +188,10 @@ def save_inference_model(dirname,
"fetch_var_names": fetch_var_names "fetch_var_names": fetch_var_names
}, f, -1) }, f, -1)
save_params(executor, dirname, program) save_params(executor, dirname, main_program)
def load_persistables_if_exist(executor, dirname, program=None): def load_persistables_if_exist(executor, dirname, main_program=None):
filenames = next(os.walk(dirname))[2] filenames = next(os.walk(dirname))[2]
filenames = set(filenames) filenames = set(filenames)
...@@ -198,7 +204,7 @@ def load_persistables_if_exist(executor, dirname, program=None): ...@@ -198,7 +204,7 @@ def load_persistables_if_exist(executor, dirname, program=None):
load_vars( load_vars(
executor, executor,
dirname, dirname,
program=program, main_program=main_program,
vars=None, vars=None,
predicate=_is_presistable_and_exist_) predicate=_is_presistable_and_exist_)
......
import copy import copy
import itertools import itertools
from paddle.v2.framework.framework import Variable, g_program, \ from paddle.v2.framework.framework import Variable, g_main_program, \
g_init_program, unique_name, Program g_startup_program, unique_name, Program
from paddle.v2.framework.initializer import ConstantInitializer, \ from paddle.v2.framework.initializer import ConstantInitializer, \
UniformInitializer UniformInitializer
...@@ -20,23 +20,23 @@ class LayerHelper(object): ...@@ -20,23 +20,23 @@ class LayerHelper(object):
return self.kwargs['name'] return self.kwargs['name']
@property @property
def program(self): def main_program(self):
prog = self.kwargs.get('program', None) prog = self.kwargs.get('main_program', None)
if prog is None: if prog is None:
return g_program return g_main_program
else: else:
return prog return prog
@property @property
def init_program(self): def startup_program(self):
prog = self.kwargs.get('init_program', None) prog = self.kwargs.get('startup_program', None)
if prog is None: if prog is None:
return g_init_program return g_startup_program
else: else:
return prog return prog
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):
return self.program.current_block().append_op(*args, **kwargs) return self.main_program.current_block().append_op(*args, **kwargs)
def multiple_input(self, input_param_name='input'): def multiple_input(self, input_param_name='input'):
inputs = self.kwargs.get(input_param_name, []) inputs = self.kwargs.get(input_param_name, [])
...@@ -112,32 +112,35 @@ class LayerHelper(object): ...@@ -112,32 +112,35 @@ class LayerHelper(object):
raise ValueError("Data Type mismatch") raise ValueError("Data Type mismatch")
return dtype return dtype
def create_parameter(self, attr, shape, dtype, suffix='w'): def create_parameter(self, attr, shape, dtype, suffix='w',
initializer=None):
# Deepcopy the attr so that parameters can be shared in program # Deepcopy the attr so that parameters can be shared in program
attr_copy = copy.deepcopy(attr) attr_copy = copy.deepcopy(attr)
if initializer is not None:
attr_copy['initializer'] = initializer
if attr_copy['name'] is None: if attr_copy['name'] is None:
attr_copy['name'] = unique_name(".".join([self.name, suffix])) attr_copy['name'] = unique_name(".".join([self.name, suffix]))
self.init_program.global_block().create_parameter( self.startup_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr_copy) dtype=dtype, shape=shape, **attr_copy)
return self.program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
name=attr_copy['name'], dtype=dtype, shape=shape) name=attr_copy['name'], dtype=dtype, shape=shape)
def create_tmp_variable(self, dtype): def create_tmp_variable(self, dtype):
return self.program.current_block().create_var( return self.main_program.current_block().create_var(
name=unique_name(".".join([self.name, 'tmp'])), name=unique_name(".".join([self.name, 'tmp'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
def create_variable(self, *args, **kwargs): def create_variable(self, *args, **kwargs):
return self.program.current_block().create_var(*args, **kwargs) return self.main_program.current_block().create_var(*args, **kwargs)
def create_global_variable(self, persistable=False, *args, **kwargs): def create_global_variable(self, persistable=False, *args, **kwargs):
return self.program.global_block().create_var( return self.main_program.global_block().create_var(
*args, persistable=persistable, **kwargs) *args, persistable=persistable, **kwargs)
def set_variable_initializer(self, var, initializer): def set_variable_initializer(self, var, initializer):
assert isinstance(var, Variable) assert isinstance(var, Variable)
self.init_program.global_block().create_var( self.startup_program.global_block().create_var(
name=var.name, name=var.name,
type=var.type, type=var.type,
dtype=var.data_type, dtype=var.data_type,
......
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, Operator
Operator from paddle.v2.framework.initializer import ConstantInitializer, NormalInitializer
from paddle.v2.framework.initializer import ConstantInitializer from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import re import re
__all__ = [ __all__ = [
...@@ -19,8 +18,8 @@ def fc(input, ...@@ -19,8 +18,8 @@ def fc(input,
name=None, name=None,
act=None, act=None,
num_flatten_dims=1, num_flatten_dims=1,
program=None, main_program=None,
init_program=None): startup_program=None):
# create helper # create helper
helper = LayerHelper('fc', **locals()) helper = LayerHelper('fc', **locals())
...@@ -65,8 +64,8 @@ def embedding(input, ...@@ -65,8 +64,8 @@ def embedding(input,
data_type='float32', data_type='float32',
is_sparse=False, is_sparse=False,
param_attr=None, param_attr=None,
program=None, main_program=None,
init_program=None): startup_program=None):
helper = LayerHelper('embedding', **locals()) helper = LayerHelper('embedding', **locals())
w = helper.create_parameter( w = helper.create_parameter(
attr=helper.param_attr, shape=size, dtype=data_type) attr=helper.param_attr, shape=size, dtype=data_type)
...@@ -85,8 +84,8 @@ def data(name, ...@@ -85,8 +84,8 @@ def data(name,
data_type='float32', data_type='float32',
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
append_batch_size=True, append_batch_size=True,
program=None, main_program=None,
init_program=None): startup_program=None):
helper = LayerHelper('data', **locals()) helper = LayerHelper('data', **locals())
shape = list(shape) shape = list(shape)
for i in xrange(len(shape)): for i in xrange(len(shape)):
...@@ -100,7 +99,7 @@ def data(name, ...@@ -100,7 +99,7 @@ def data(name,
shape = [-1] + shape # append batch size as -1 shape = [-1] + shape # append batch size as -1
return helper.create_global_variable( return helper.create_global_variable(
name=name, shape=shape, dtype=data_type, type=type) name=name, shape=shape, dtype=data_type, type=type, stop_gradient=True)
def _convert_(name): def _convert_(name):
...@@ -179,7 +178,7 @@ _create_op_func_('sigmoid') ...@@ -179,7 +178,7 @@ _create_op_func_('sigmoid')
_create_op_func_('scale') _create_op_func_('scale')
def cast(x, data_type, program=None): def cast(x, data_type, main_program=None):
helper = LayerHelper('cast', **locals()) helper = LayerHelper('cast', **locals())
out = helper.create_tmp_variable(dtype=data_type) out = helper.create_tmp_variable(dtype=data_type)
helper.append_op( helper.append_op(
...@@ -191,7 +190,7 @@ def cast(x, data_type, program=None): ...@@ -191,7 +190,7 @@ def cast(x, data_type, program=None):
return out return out
def concat(input, axis, program=None, init_program=None): def concat(input, axis, main_program=None, startup_program=None):
helper = LayerHelper('concat', **locals()) helper = LayerHelper('concat', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op( helper.append_op(
...@@ -202,7 +201,7 @@ def concat(input, axis, program=None, init_program=None): ...@@ -202,7 +201,7 @@ def concat(input, axis, program=None, init_program=None):
return out return out
def sums(input, program=None, init_program=None): def sums(input, main_program=None, startup_program=None):
helper = LayerHelper('sum', **locals()) helper = LayerHelper('sum', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out})
...@@ -282,8 +281,8 @@ def sequence_conv(input, ...@@ -282,8 +281,8 @@ def sequence_conv(input,
padding=None, padding=None,
bias_attr=None, bias_attr=None,
param_attr=None, param_attr=None,
program=None, main_program=None,
init_program=None): startup_program=None):
# FIXME(dzh) : want to unify the argument of python layer # FIXME(dzh) : want to unify the argument of python layer
# function. So we ignore some unecessary attributes. # function. So we ignore some unecessary attributes.
# such as, padding_trainable, context_start. # such as, padding_trainable, context_start.
...@@ -322,8 +321,8 @@ def conv2d(input, ...@@ -322,8 +321,8 @@ def conv2d(input,
padding=None, padding=None,
bias_attr=None, bias_attr=None,
param_attr=None, param_attr=None,
program=None, main_program=None,
init_program=None): startup_program=None):
helper = LayerHelper('conv2d', **locals()) helper = LayerHelper('conv2d', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -344,8 +343,13 @@ def conv2d(input, ...@@ -344,8 +343,13 @@ def conv2d(input,
input_shape = input.shape input_shape = input.shape
filter_shape = [num_filters, num_filter_channels] + filter_size filter_shape = [num_filters, num_filter_channels] + filter_size
std = (2.0 / (filter_size[0]**2 * num_channels))**0.5
filter = helper.create_parameter( filter = helper.create_parameter(
attr=helper.param_attr, shape=filter_shape, dtype=dtype) attr=helper.param_attr,
shape=filter_shape,
dtype=dtype,
initializer=NormalInitializer(0.0, std, 0))
pre_bias = helper.create_tmp_variable(dtype) pre_bias = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
...@@ -368,11 +372,13 @@ def sequence_pool(input, pool_type, **kwargs): ...@@ -368,11 +372,13 @@ def sequence_pool(input, pool_type, **kwargs):
helper = LayerHelper('sequence_pool', input=input, **kwargs) helper = LayerHelper('sequence_pool', input=input, **kwargs)
dtype = helper.input_dtype() dtype = helper.input_dtype()
pool_out = helper.create_tmp_variable(dtype) pool_out = helper.create_tmp_variable(dtype)
max_index = helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type="sequence_pool", type="sequence_pool",
inputs={"X": [input]}, inputs={"X": input},
outputs={"Out": [pool_out]}, outputs={"Out": pool_out,
"MaxIndex": max_index},
attrs={"pooltype": pool_type.upper()}) attrs={"pooltype": pool_type.upper()})
return pool_out return pool_out
...@@ -384,8 +390,8 @@ def pool2d(input, ...@@ -384,8 +390,8 @@ def pool2d(input,
pool_stride=[1, 1], pool_stride=[1, 1],
pool_padding=[0, 0], pool_padding=[0, 0],
global_pooling=False, global_pooling=False,
program=None, main_program=None,
init_program=None): startup_program=None):
if pool_type not in ["max", "avg"]: if pool_type not in ["max", "avg"]:
raise ValueError( raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.", "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
...@@ -420,12 +426,12 @@ def batch_norm(input, ...@@ -420,12 +426,12 @@ def batch_norm(input,
act=None, act=None,
is_test=False, is_test=False,
momentum=0.9, momentum=0.9,
epsilon=1e05, epsilon=1e-05,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
data_layout='NCHW', data_layout='NCHW',
program=None, main_program=None,
init_program=None): startup_program=None):
helper = LayerHelper('batch_norm', **locals()) helper = LayerHelper('batch_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -438,27 +444,29 @@ def batch_norm(input, ...@@ -438,27 +444,29 @@ def batch_norm(input,
else: else:
raise ValueError("unsupported data layout:" + data_layout) raise ValueError("unsupported data layout:" + data_layout)
def create_persistable_var(dtype, shape, initializer=None):
name = unique_name(".".join([helper.name, "xxxx"]))
var = init_program.global_block().create_var(
dtype=dtype, shape=shape, name=name, persistable=True)
if initializer is not None:
initializer(var, var.block)
return program.global_block().create_var(
name=name, dtype=dtype, shape=shape, persistable=True)
param_shape = [channel_num] param_shape = [channel_num]
# create parameter # create parameter
scale = helper.create_parameter( scale = helper.create_parameter(
attr=helper.param_attr, shape=param_shape, dtype=dtype) attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
initializer=ConstantInitializer(1.0))
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.param_attr, shape=param_shape, dtype=dtype) attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
initializer=ConstantInitializer(0.0))
mean = helper.create_global_variable(
dtype=input.data_type, shape=param_shape, persistable=True)
helper.set_variable_initializer(
var=mean, initializer=ConstantInitializer(0.0))
# create input variance = helper.create_global_variable(
mean = create_persistable_var(dtype, param_shape, ConstantInitializer(0.0)) dtype=input.data_type, shape=param_shape, persistable=True)
variance = create_persistable_var(dtype, param_shape, helper.set_variable_initializer(
ConstantInitializer(1.0)) var=variance, initializer=ConstantInitializer(1.0))
# create output # create output
# mean and mean_out share the same memory # mean and mean_out share the same memory
...@@ -499,16 +507,16 @@ class BlockGuard(object): ...@@ -499,16 +507,16 @@ class BlockGuard(object):
keyword. keyword.
""" """
def __init__(self, program): def __init__(self, main_program):
if not isinstance(program, Program): if not isinstance(main_program, Program):
raise TypeError("BlockGuard takes a program") raise TypeError("BlockGuard takes a program")
self.program = program self.main_program = main_program
def __enter__(self): def __enter__(self):
self.program.create_block() self.main_program.create_block()
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
self.program.rollback() self.main_program.rollback()
if exc_type is not None: if exc_type is not None:
return False # re-raise exception return False # re-raise exception
return True return True
...@@ -518,7 +526,7 @@ class StaticRNNGuard(BlockGuard): ...@@ -518,7 +526,7 @@ class StaticRNNGuard(BlockGuard):
def __init__(self, rnn): def __init__(self, rnn):
if not isinstance(rnn, StaticRNN): if not isinstance(rnn, StaticRNN):
raise TypeError("StaticRNNGuard takes an StaticRNN") raise TypeError("StaticRNNGuard takes an StaticRNN")
super(StaticRNNGuard, self).__init__(rnn.helper.program) super(StaticRNNGuard, self).__init__(rnn.helper.main_program)
self.rnn = rnn self.rnn = rnn
def __enter__(self): def __enter__(self):
...@@ -554,8 +562,9 @@ class StaticRNN(object): ...@@ -554,8 +562,9 @@ class StaticRNN(object):
IN_RNN_BLOCK = 1 IN_RNN_BLOCK = 1
AFTER_RNN_BLOCK = 2 AFTER_RNN_BLOCK = 2
def __init__(self, name=None, program=None): def __init__(self, name=None, main_program=None):
self.helper = LayerHelper("static_rnn", name=name, program=program) self.helper = LayerHelper(
"static_rnn", name=name, main_program=main_program)
self.memories = {} # memory map, from pre_mem.name --> MemoryLink self.memories = {} # memory map, from pre_mem.name --> MemoryLink
self.inputs = [] # input variable list in current block self.inputs = [] # input variable list in current block
self.outputs = [] # output variable list in parent block self.outputs = [] # output variable list in parent block
...@@ -647,7 +656,7 @@ class StaticRNN(object): ...@@ -647,7 +656,7 @@ class StaticRNN(object):
self.memories[mem.name].mem = var self.memories[mem.name].mem = var
def parent_block(self): def parent_block(self):
prog = self.helper.program prog = self.helper.main_program
parent_idx = prog.current_block().parent_idx parent_idx = prog.current_block().parent_idx
assert parent_idx >= 0 assert parent_idx >= 0
parent_block = prog.block(parent_idx) parent_block = prog.block(parent_idx)
...@@ -664,8 +673,8 @@ class StaticRNN(object): ...@@ -664,8 +673,8 @@ class StaticRNN(object):
return self.outputs return self.outputs
def complete_rnn_op(self): def complete_rnn_op(self):
program = self.helper.program main_program = self.helper.main_program
rnn_block = program.current_block() rnn_block = main_program.current_block()
parent_block = self.parent_block() parent_block = self.parent_block()
local_inputs = set() local_inputs = set()
...@@ -729,3 +738,16 @@ class StaticRNN(object): ...@@ -729,3 +738,16 @@ class StaticRNN(object):
'states': memories, 'states': memories,
'step_block': rnn_block 'step_block': rnn_block
}) })
def lod_rank_table(x, level=0, main_program=None):
helper = LayerHelper("lod_rank_table", **locals())
table = helper.create_variable(
type=core.VarDesc.VarType.LOD_RANK_TABLE,
name=unique_name("lod_rank_table"))
helper.append_op(
type='lod_rank_table',
inputs={'X': x},
outputs={'Out': table},
attrs={'level': level})
return table
...@@ -80,7 +80,7 @@ def parse_graph(program, graph, var_dict, **kwargs): ...@@ -80,7 +80,7 @@ def parse_graph(program, graph, var_dict, **kwargs):
graph.edge(**draw_edge(var_dict, op, e, arg)) graph.edge(**draw_edge(var_dict, op, e, arg))
def draw_graph(init_program, program, **kwargs): def draw_graph(startup_program, main_program, **kwargs):
if kwargs.has_key("graph_attr"): if kwargs.has_key("graph_attr"):
GRAPH_STYLE.update(kwargs[graph_attr]) GRAPH_STYLE.update(kwargs[graph_attr])
if kwargs.has_key("node_attr"): if kwargs.has_key("node_attr"):
...@@ -101,8 +101,8 @@ def draw_graph(init_program, program, **kwargs): ...@@ -101,8 +101,8 @@ def draw_graph(init_program, program, **kwargs):
**kwargs) **kwargs)
var_dict = {} var_dict = {}
parse_graph(init_program, g, var_dict) parse_graph(startup_program, g, var_dict)
parse_graph(program, g, var_dict) parse_graph(main_program, g, var_dict)
if filename != None: if filename != None:
g.save() g.save()
......
...@@ -10,23 +10,23 @@ def simple_img_conv_pool(input, ...@@ -10,23 +10,23 @@ def simple_img_conv_pool(input,
pool_stride, pool_stride,
act, act,
pool_type='max', pool_type='max',
program=None, main_program=None,
init_program=None): startup_program=None):
conv_out = layers.conv2d( conv_out = layers.conv2d(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
act=act, act=act,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
pool_out = layers.pool2d( pool_out = layers.pool2d(
input=conv_out, input=conv_out,
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
return pool_out return pool_out
...@@ -40,8 +40,8 @@ def img_conv_group(input, ...@@ -40,8 +40,8 @@ def img_conv_group(input,
conv_batchnorm_drop_rate=None, conv_batchnorm_drop_rate=None,
pool_stride=1, pool_stride=1,
pool_type=None, pool_type=None,
program=None, main_program=None,
init_program=None): startup_program=None):
""" """
Image Convolution Group, Used for vgg net. Image Convolution Group, Used for vgg net.
""" """
...@@ -71,30 +71,30 @@ def img_conv_group(input, ...@@ -71,30 +71,30 @@ def img_conv_group(input,
filter_size=conv_filter_size[i], filter_size=conv_filter_size[i],
padding=conv_padding[i], padding=conv_padding[i],
act=local_conv_act, act=local_conv_act,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
if conv_with_batchnorm[i]: if conv_with_batchnorm[i]:
tmp = layers.batch_norm( tmp = layers.batch_norm(
input=tmp, input=tmp,
act=conv_act, act=conv_act,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
drop_rate = conv_batchnorm_drop_rate[i] drop_rate = conv_batchnorm_drop_rate[i]
if abs(drop_rate) > 1e-5: if abs(drop_rate) > 1e-5:
tmp = layers.dropout( tmp = layers.dropout(
x=tmp, x=tmp,
dropout_prob=drop_rate, dropout_prob=drop_rate,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
pool_out = layers.pool2d( pool_out = layers.pool2d(
input=tmp, input=tmp,
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
return pool_out return pool_out
...@@ -103,19 +103,19 @@ def sequence_conv_pool(input, ...@@ -103,19 +103,19 @@ def sequence_conv_pool(input,
filter_size, filter_size,
act="sigmoid", act="sigmoid",
pool_type="max", pool_type="max",
program=None, main_program=None,
init_program=None): startup_program=None):
conv_out = layers.sequence_conv( conv_out = layers.sequence_conv(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
act=act, act=act,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
pool_out = layers.sequence_pool( pool_out = layers.sequence_pool(
input=conv_out, input=conv_out,
pool_type=pool_type, pool_type=pool_type,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
return pool_out return pool_out
...@@ -132,7 +132,7 @@ class Optimizer(object): ...@@ -132,7 +132,7 @@ class Optimizer(object):
def create_optimization_pass(self, def create_optimization_pass(self,
parameters_and_grads, parameters_and_grads,
loss, loss,
init_program=None): startup_program=None):
"""Add optimization operators to update gradients to variables. """Add optimization operators to update gradients to variables.
Args: Args:
...@@ -144,7 +144,7 @@ class Optimizer(object): ...@@ -144,7 +144,7 @@ class Optimizer(object):
optimization. This will include parameter update ops, global step optimization. This will include parameter update ops, global step
update ops and any other custom ops required by subclasses to manage update ops and any other custom ops required by subclasses to manage
their internal state. their internal state.
:param init_program: :param startup_program:
""" """
# This is a default implementation of create_optimization_pass that # This is a default implementation of create_optimization_pass that
# can be shared by most optimizers. This implementation assumes that # can be shared by most optimizers. This implementation assumes that
...@@ -156,7 +156,9 @@ class Optimizer(object): ...@@ -156,7 +156,9 @@ class Optimizer(object):
# Create any accumulators # Create any accumulators
program = loss.block.program program = loss.block.program
self.helper = LayerHelper( self.helper = LayerHelper(
self.__class__.__name__, program=program, init_program=init_program) self.__class__.__name__,
main_program=program,
startup_program=startup_program)
self._create_accumulators(loss.block, self._create_accumulators(loss.block,
[p[0] for p in parameters_and_grads]) [p[0] for p in parameters_and_grads])
# Create any necessary tensors # Create any necessary tensors
...@@ -185,7 +187,7 @@ class Optimizer(object): ...@@ -185,7 +187,7 @@ class Optimizer(object):
def minimize(self, def minimize(self,
loss, loss,
init_program=None, startup_program=None,
parameter_list=None, parameter_list=None,
no_grad_set=None): no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`. """Add operations to minimize `loss` by updating `parameter_list`.
...@@ -198,7 +200,7 @@ class Optimizer(object): ...@@ -198,7 +200,7 @@ class Optimizer(object):
# Add regularization if any # Add regularization if any
params_grads = append_regularization_ops(params_grads) params_grads = append_regularization_ops(params_grads)
optimize_ops = self.create_optimization_pass(params_grads, loss, optimize_ops = self.create_optimization_pass(params_grads, loss,
init_program) startup_program)
return optimize_ops return optimize_ops
......
...@@ -61,25 +61,23 @@ class TestConv2dOp(OpTest): ...@@ -61,25 +61,23 @@ class TestConv2dOp(OpTest):
def test_check_grad(self): def test_check_grad(self):
self.check_grad( self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.05) set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def test_check_grad_no_filter(self): def test_check_grad_no_filter(self):
self.check_grad( self.check_grad(
['Input'], ['Input'],
'Output', 'Output',
max_relative_error=0.05, max_relative_error=0.02,
no_grad_set=set(['Filter'])) no_grad_set=set(['Filter']))
def test_check_grad_no_input(self): def test_check_grad_no_input(self):
self.check_grad( self.check_grad(
['Filter'], ['Filter'],
'Output', 'Output',
max_relative_error=0.05, max_relative_error=0.02,
no_grad_set=set(['Input'])) no_grad_set=set(['Input']))
def init_test_case(self): def init_test_case(self):
# self.groups = 1
# self.op_type = "conv2d"
self.pad = [0, 0] self.pad = [0, 0]
self.stride = [1, 1] self.stride = [1, 1]
self.dilations = [1, 1] self.dilations = [1, 1]
...@@ -103,6 +101,9 @@ class TestWithGroup(TestConv2dOp): ...@@ -103,6 +101,9 @@ class TestWithGroup(TestConv2dOp):
self.op_type = "conv2d" self.op_type = "conv2d"
#----------------Conv2dCudnn----------------
class TestCudnn(TestConv2dOp): class TestCudnn(TestConv2dOp):
def init_group(self): def init_group(self):
self.groups = 1 self.groups = 1
......
...@@ -58,36 +58,37 @@ class TestConv2dTransposeOp(OpTest): ...@@ -58,36 +58,37 @@ class TestConv2dTransposeOp(OpTest):
print 'check output here for', self.op_type print 'check output here for', self.op_type
self.check_output() self.check_output()
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose"
def test_check_grad_no_input(self): def test_check_grad_no_input(self):
self.check_grad( self.check_grad(
['Filter'], ['Filter'],
'Output', 'Output',
max_relative_error=0.05, max_relative_error=0.02,
no_grad_set=set(['Input'])) no_grad_set=set(['Input']))
def test_check_grad_no_filter(self): def test_check_grad_no_filter(self):
self.check_grad( self.check_grad(
['Input'], ['Input'],
'Output', 'Output',
max_relative_error=0.05, max_relative_error=0.02,
no_grad_set=set(['Filter'])) no_grad_set=set(['Filter']))
def test_check_grad(self): def test_check_grad(self):
self.check_grad( self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.05) set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose"
# ------------ test_cudnn ------------
class TestCudnn(TestConv2dTransposeOp): class TestCudnn(TestConv2dTransposeOp):
def init_op_type(self): def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn" self.op_type = "conv2d_transpose_cudnn"
......
import unittest
import numpy as np
from op_test import OpTest
def conv3d_forward_naive(input, filter, group, conv_param):
in_n, in_c, in_d, in_h, in_w = input.shape
out_c, f_c, f_d, f_h, f_w = filter.shape
assert f_c * group == in_c
assert np.mod(out_c, group) == 0
sub_out_c = out_c / group
stride, pad = conv_param['stride'], conv_param['pad']
out_d = 1 + (in_d + 2 * pad[0] - f_h) / stride[0]
out_h = 1 + (in_h + 2 * pad[1] - f_h) / stride[1]
out_w = 1 + (in_w + 2 * pad[2] - f_w) / stride[2]
out = np.zeros((in_n, out_c, out_d, out_h, out_w))
input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], ),
(pad[2], )),
mode='constant',
constant_values=0)
for d in range(out_d):
for i in range(out_h):
for j in range(out_w):
for g in range(group):
input_pad_masked = \
input_pad[:, g * f_c:(g + 1) * f_c,
d * stride[0]:d * stride[0] + f_d,
i * stride[1]:i * stride[1] + f_h,
j * stride[2]:j * stride[2] + f_w]
f_sub = filter[g * sub_out_c:(g + 1) *
sub_out_c, :, :, :, :]
for k in range(sub_out_c):
out[:, g * sub_out_c + k, d, i, j] = \
np.sum(input_pad_masked * f_sub[k, :, :, :, :],
axis=(1, 2, 3, 4))
return out
class TestConv3dOp(OpTest):
def setUp(self):
self.init_group()
self.init_op_type()
self.init_test_case()
conv3d_param = {'stride': self.stride, 'pad': self.pad}
input = np.random.random(self.input_size).astype("float32")
filter = np.random.random(self.filter_size).astype("float32")
output = conv3d_forward_naive(input, filter, self.groups,
conv3d_param).astype("float32")
self.inputs = {'Input': input, 'Filter': filter}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups
}
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
def test_check_grad_no_filter(self):
self.check_grad(
['Input'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.03,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] / self.groups
self.filter_size = [6, f_c, 3, 3, 3]
def init_group(self):
self.groups = 1
def init_op_type(self):
self.op_type = "conv3d"
class TestCase1(TestConv3dOp):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.input_size = [2, 3, 4, 4, 4] # NCDHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] / self.groups
self.filter_size = [6, f_c, 3, 3, 3]
def init_group(self):
self.groups = 1
def init_op_type(self):
self.op_type = "conv3d"
class TestWithGroup1(TestConv3dOp):
def init_group(self):
self.groups = 3
def init_op_type(self):
self.op_type = "conv3d"
class TestWithGroup2(TestCase1):
def init_group(self):
self.groups = 3
def init_op_type(self):
self.op_type = "conv3d"
if __name__ == '__main__':
unittest.main()
import unittest
import numpy as np
from op_test import OpTest
def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
# [2, 3, 5, 5, 5]
in_n, in_c, in_d, in_h, in_w = input_.shape
# [3, 6, 3, 3, 3]
f_c, out_c, f_d, f_h, f_w = filter_.shape
assert in_c == f_c
stride, pad = conv3dtranspose_param['stride'], conv3dtranspose_param['pad']
out_d = (in_d - 1) * stride[0] + f_d
out_h = (in_h - 1) * stride[1] + f_h
out_w = (in_w - 1) * stride[2] + f_w
out = np.zeros((in_n, out_c, out_d, out_h, out_w))
for n in range(in_n):
for d in range(in_d):
for i in range(in_h):
for j in range(in_w):
input_masked = input_[n, :, d, i, j] # (c)
input_masked = np.reshape(input_masked, (in_c, 1, 1, 1))
input_masked = np.tile(input_masked, (1, f_d, f_h, f_w))
for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :, :],
axis=0)
d1, d2 = d * stride[0], d * stride[0] + f_d
i1, i2 = i * stride[1], i * stride[1] + f_h
j1, j2 = j * stride[2], j * stride[2] + f_w
out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out
return out
class TestConv3dTransposeOp(OpTest):
def setUp(self):
# init as conv transpose
self.init_op_type()
# [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7]
self.init_test_case()
conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad}
input_ = np.random.random(self.input_size).astype("float32")
filter_ = np.random.random(self.filter_size).astype("float32")
output = conv3dtranspose_forward_naive(
input_, filter_, conv3dtranspose_param).astype("float32")
# print 'deconv output py', output, output.shape
self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
# 'dilations': self.dilations
}
self.outputs = {'Output': output}
def test_check_output(self):
print 'check output here'
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def test_check_grad_no_filter(self):
self.check_grad(
['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.input_size = [2, 3, 5, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose"
if __name__ == '__main__':
unittest.main()
import unittest
import random
import numpy as np
from op_test import OpTest
class CRFDecoding(object):
def __init__(self, emission_weights, transition_weights,
seq_start_positions):
assert (emission_weights.shape[0] == seq_start_positions[-1])
self.tag_num = emission_weights.shape[1]
self.seq_num = len(seq_start_positions) - 1
self.seq_start_positions = seq_start_positions
self.x = emission_weights
self.a = transition_weights[0, :]
self.b = transition_weights[1, :]
self.w = transition_weights[2:, :]
self.track = np.zeros(
(seq_start_positions[-1], self.tag_num), dtype="int32")
self.decoded_path = np.zeros(
(seq_start_positions[-1], 1), dtype="int32")
def _decode_one_sequence(self, decoded_path, x):
seq_len, tag_num = x.shape
alpha = np.zeros((seq_len, tag_num), dtype="float64")
track = np.zeros((seq_len, tag_num), dtype="int32")
for i in range(tag_num):
alpha[0, i] = self.a[i] + x[0, i]
for k in range(1, seq_len):
for i in range(tag_num):
max_score = -np.finfo("float64").max
max_idx = 0
for j in range(tag_num):
score = alpha[k - 1, j] + self.w[j, i]
if score > max_score:
max_score = score
max_idx = j
alpha[k, i] = max_score + x[k, i]
track[k, i] = max_idx
max_score = -np.finfo("float64").max
max_idx = 0
for i in range(tag_num):
score = alpha[seq_len - 1, i] + self.b[i]
if score > max_score:
max_score = score
max_idx = i
decoded_path[-1] = max_idx
for i in range(seq_len - 1, 0, -1):
decoded_path[i - 1] = max_idx = track[i, max_idx]
def decode(self):
for i in range(self.seq_num):
start = self.seq_start_positions[i]
end = self.seq_start_positions[i + 1]
self._decode_one_sequence(self.decoded_path[start:end, :],
self.x[start:end, :])
return self.decoded_path
class TestCRFDecodingOp1(OpTest):
"""
Compare the dynamic program with random generated parameters and inputs
with grouth truth not being given.
"""
def set_test_data(self):
SEQ_NUM = 3
TAG_NUM = 17
MAX_SEQ_LEN = 10
lod = [[0]]
for i in range(SEQ_NUM):
lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN))
emission = np.random.uniform(-1, 1,
[lod[-1][-1], TAG_NUM]).astype("float64")
transition = np.random.uniform(-0.5, 0.5,
[TAG_NUM + 2, TAG_NUM]).astype("float64")
self.inputs = {
"Emission": (emission, lod),
"Transition": transition,
}
decoder = CRFDecoding(emission, transition, lod[0])
decoded_path = decoder.decode()
self.outputs = {"ViterbiPath": decoded_path}
def setUp(self):
self.op_type = "crf_decoding"
self.set_test_data()
def test_check_output(self):
self.check_output()
class TestCRFDecodingOp2(OpTest):
"""
Compare the dynamic program with brute force computation with
ground truth being given.
"""
def setUp(self):
self.op_type = "crf_decoding"
TAG_NUM = 5
lod = [[0, 1, 3, 6, 10]]
transition = np.repeat(
np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
TAG_NUM + 2,
axis=0)
emission = np.repeat(
np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
lod[-1][-1],
axis=0)
labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32")
predicted_labels = np.ones(
(lod[-1][-1], 1), dtype="int32") * (TAG_NUM - 1)
expected_output = (labels == predicted_labels).astype("int32")
self.inputs = {
"Emission": (emission, lod),
"Transition": transition,
"Label": (labels, lod)
}
self.outputs = {"ViterbiPath": expected_output}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
from paddle.v2.framework.layers import mul, data from paddle.v2.framework.layers import mul, data
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import g_program from paddle.v2.framework.framework import g_main_program
import numpy import numpy
...@@ -23,7 +23,7 @@ class TestExecutor(unittest.TestCase): ...@@ -23,7 +23,7 @@ class TestExecutor(unittest.TestCase):
tensor_b = core.LoDTensor() tensor_b = core.LoDTensor()
tensor_b.set(b_np, place) tensor_b.set(b_np, place)
exe = Executor(place) exe = Executor(place)
outs = exe.run(g_program, outs = exe.run(g_main_program,
feed={'a': tensor_a, feed={'a': tensor_a,
'b': tensor_b}, 'b': tensor_b},
fetch_list=[out]) fetch_list=[out])
......
...@@ -3,40 +3,44 @@ import paddle.v2.framework.layers as layers ...@@ -3,40 +3,44 @@ import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.framework import Program, g_main_program
from paddle.v2.framework.io import save_persistables, load_persistables from paddle.v2.framework.io import save_persistables, load_persistables
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
init_program = Program() startup_program = Program()
program = Program() main_program = Program()
x = layers.data( x = layers.data(
name='x', name='x',
shape=[13], shape=[13],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
y_predict = layers.fc(input=x, y_predict = layers.fc(input=x,
size=1, size=1,
act=None, act=None,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
y = layers.data( y = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
cost = layers.square_error_cost( cost = layers.square_error_cost(
input=y_predict, label=y, program=program, init_program=init_program) input=y_predict,
avg_cost = layers.mean(x=cost, program=program, init_program=init_program) label=y,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, init_program) opts = sgd_optimizer.minimize(avg_cost, startup_program)
BATCH_SIZE = 20 BATCH_SIZE = 20
...@@ -48,12 +52,12 @@ train_reader = paddle.batch( ...@@ -48,12 +52,12 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[]) exe.run(startup_program, feed={}, fetch_list=[])
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
save_persistables(exe, "./fit_a_line.model/", program=program) save_persistables(exe, "./fit_a_line.model/", main_program=main_program)
load_persistables(exe, "./fit_a_line.model/", program=program) load_persistables(exe, "./fit_a_line.model/", main_program=main_program)
for data in train_reader(): for data in train_reader():
x_data = np.array(map(lambda x: x[0], data)).astype("float32") x_data = np.array(map(lambda x: x[0], data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("float32")
...@@ -65,7 +69,7 @@ for pass_id in range(PASS_NUM): ...@@ -65,7 +69,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
# print tensor_y.get_dims() # print tensor_y.get_dims()
outs = exe.run(program, outs = exe.run(main_program,
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost])
......
import unittest
import numpy as np
import math
from op_test import OpTest
from test_lstm_op import identity, sigmoid, tanh, relu
class TestGRUOp(OpTest):
batch_size = 9
frame_size = 5
activate = {
'identity': identity,
'sigmoid': sigmoid,
'tanh': tanh,
'relu': relu
}
@staticmethod
def seq_to_batch(lod, is_reverse):
idx_in_seq_list = []
seq_starts = lod[0]
seq_lens = []
for i in range(len(seq_starts) - 1):
seq_lens.append(seq_starts[i + 1] - seq_starts[i])
sorted_seqs = sorted(
range(len(seq_lens)), lambda x, y: seq_lens[y] - seq_lens[x])
num_batch = seq_lens[sorted_seqs[0]]
for batch_idx in range(num_batch):
idx_in_seq = []
for i in range(len(seq_lens)):
if seq_lens[sorted_seqs[i]] <= batch_idx:
break
idx = (seq_starts[sorted_seqs[i] + 1] - 1 - batch_idx
) if is_reverse else (
seq_starts[sorted_seqs[i]] + batch_idx)
idx_in_seq.append(idx)
idx_in_seq_list.append(idx_in_seq)
return idx_in_seq_list
def gru_step(self, x, h_p, w, b):
batch_size = x.shape[0]
frame_size = w.shape[0]
g = x + np.tile(b, (batch_size, 1))
w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape(
(frame_size, frame_size * 2))
u_r = self.activate[self.attrs['gate_activation']](np.dot(
h_p, w_u_r) + g[:, :frame_size * 2])
u = u_r[:, :frame_size]
r = u_r[:, frame_size:frame_size * 2]
r_h_p = r * h_p
w_c = w.flatten()[frame_size * frame_size * 2:].reshape(
(frame_size, frame_size))
c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) +
g[:, frame_size * 2:])
g = np.hstack((u_r, c))
h = u * c + (1 - u) * h_p
return g, r_h_p, h
def gru(self):
input, lod = self.inputs['Input']
w = self.inputs['Weight']
b = self.inputs['Bias'] if self.inputs.has_key('Bias') else np.zeros(
(1, self.frame_size * 3))
batch_gate = self.outputs['BatchGate']
batch_reset_hidden_prev = self.outputs['BatchResetHiddenPrev']
batch_hidden = self.outputs['BatchHidden']
hidden = self.outputs['Hidden']
idx_in_seq_list = self.idx_in_seq_list
h_p = self.inputs['H0'] if self.inputs.has_key('H0') else np.zeros(
(len(idx_in_seq_list[0]), self.frame_size))
num_batch = len(idx_in_seq_list)
end_idx = 0
for batch_idx in range(num_batch):
x = input[idx_in_seq_list[batch_idx]]
g, r_h_p, h = self.gru_step(x, h_p, w, b)
if batch_idx < (num_batch - 1):
h_p = h[:len(idx_in_seq_list[batch_idx + 1])]
start_idx = end_idx
end_idx = start_idx + len(idx_in_seq_list[batch_idx])
batch_gate[start_idx:end_idx] = g
batch_reset_hidden_prev[start_idx:end_idx] = r_h_p
batch_hidden[start_idx:end_idx] = h
hidden[idx_in_seq_list[batch_idx]] = h
return batch_gate, batch_reset_hidden_prev, hidden
def set_data(self):
lod = [[0, 2, 6, self.batch_size]]
self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse)
batch_size = self.batch_size
frame_size = self.frame_size
input = np.random.rand(batch_size, frame_size * 3).astype('float64')
h0 = np.random.rand(len(self.idx_in_seq_list[0]),
frame_size).astype('float64')
weight = np.random.rand(frame_size, frame_size * 3).astype('float64')
bias = np.random.rand(1, frame_size * 3).astype('float64')
self.inputs = {
'Input': (input, lod),
'H0': h0,
'Weight': weight,
'Bias': bias
}
self.outputs = {
'BatchGate': np.zeros(
(batch_size, frame_size * 3), dtype='float64'),
'BatchResetHiddenPrev': np.zeros(
(batch_size, frame_size), dtype='float64'),
'BatchHidden': np.zeros(
(batch_size, frame_size), dtype='float64'),
'Hidden': np.zeros(
(batch_size, frame_size), dtype='float64')
}
def set_confs(self):
self.is_reverse = False
self.attrs = {
'activation': 'tanh',
'gate_activation': 'sigmoid',
'is_reverse': self.is_reverse
}
def setUp(self):
self.op_type = "gru"
self.set_confs()
self.set_data()
self.gru()
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['Input', 'H0', 'Weight', 'Bias'], ['Hidden'])
class TestGRUOpNoInitial(TestGRUOp):
def set_data(self):
super(TestGRUOpNoInitial, self).set_data()
self.inputs.pop('H0')
def test_check_grad(self):
self.check_grad(['Input', 'Weight', 'Bias'], ['Hidden'])
class TestGRUOpReverse(TestGRUOp):
def set_confs(self):
self.is_reverse = True
self.attrs = {
'activation': 'identity',
'gate_activation': 'sigmoid',
'is_reverse': self.is_reverse
}
if __name__ == "__main__":
unittest.main()
...@@ -21,7 +21,8 @@ class TestHuberLossOp(OpTest): ...@@ -21,7 +21,8 @@ class TestHuberLossOp(OpTest):
'Y': np.random.uniform(0, 1., (samples_num, 1)).astype('float32'), 'Y': np.random.uniform(0, 1., (samples_num, 1)).astype('float32'),
} }
residual = self.inputs['Y'] - self.inputs['X'] residual = self.inputs['Y'] - self.inputs['X']
loss = np.vectorize(huber_loss_forward)(residual, delta) loss = np.vectorize(huber_loss_forward)(residual,
delta).astype('float32')
self.attrs = {'delta': delta} self.attrs = {'delta': delta}
self.outputs = { self.outputs = {
'Residual': residual, 'Residual': residual,
...@@ -43,6 +44,5 @@ class TestHuberLossOp(OpTest): ...@@ -43,6 +44,5 @@ class TestHuberLossOp(OpTest):
['X'], 'Out', max_relative_error=0.008, no_grad_set=set('residual')) ['X'], 'Out', max_relative_error=0.008, no_grad_set=set('residual'))
# TODO(typhoonzero): should add this back till we fix it if __name__ == '__main__':
#if __name__ == '__main__': unittest.main()
# unittest.main()
...@@ -9,8 +9,8 @@ def conv_block(input, ...@@ -9,8 +9,8 @@ def conv_block(input,
num_filter, num_filter,
groups, groups,
dropouts, dropouts,
program=None, main_program=None,
init_program=None): startup_program=None):
return nets.img_conv_group( return nets.img_conv_group(
input=input, input=input,
pool_size=2, pool_size=2,
...@@ -21,77 +21,81 @@ def conv_block(input, ...@@ -21,77 +21,81 @@ def conv_block(input,
conv_with_batchnorm=True, conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type='max', pool_type='max',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
class TestLayer(unittest.TestCase): class TestLayer(unittest.TestCase):
def test_batch_norm_layer(self): def test_batch_norm_layer(self):
program = Program() main_program = Program()
init_program = Program() startup_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[3, 48, 48], shape=[3, 48, 48],
data_type='float32', data_type='float32',
program=program) main_program=main_program)
layers.batch_norm( layers.batch_norm(
input=images, program=program, init_program=init_program) input=images,
main_program=main_program,
startup_program=startup_program)
# print str(program) # print str(main_program)
def test_dropout_layer(self): def test_dropout_layer(self):
program = Program() main_program = Program()
init_program = Program() startup_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[3, 48, 48], shape=[3, 48, 48],
data_type='float32', data_type='float32',
program=program) main_program=main_program)
layers.dropout( layers.dropout(
x=images, x=images,
dropout_prob=0.5, dropout_prob=0.5,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
# print str(program) # print str(main_program)
def test_img_conv_group(self): def test_img_conv_group(self):
program = Program() main_program = Program()
init_program = Program() startup_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[3, 48, 48], shape=[3, 48, 48],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
conv1 = conv_block(images, 64, 2, [0.3, 0], program, init_program) conv1 = conv_block(images, 64, 2, [0.3, 0], main_program,
conv2 = conv_block(conv1, 256, 3, [0.4, 0.4, 0], program, init_program) startup_program)
conv2 = conv_block(conv1, 256, 3, [0.4, 0.4, 0], main_program,
startup_program)
# print str(program) # print str(main_program)
def test_elementwise_add_with_act(self): def test_elementwise_add_with_act(self):
program = Program() main_program = Program()
init_program = Program() startup_program = Program()
image1 = layers.data( image1 = layers.data(
name='pixel1', name='pixel1',
shape=[3, 48, 48], shape=[3, 48, 48],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
image2 = layers.data( image2 = layers.data(
name='pixel2', name='pixel2',
shape=[3, 48, 48], shape=[3, 48, 48],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
out = layers.elementwise_add( out = layers.elementwise_add(
x=image1, x=image1,
y=image2, y=image2,
act='relu', act='relu',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
# print(program) # print(main_program)
if __name__ == '__main__': if __name__ == '__main__':
......
import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.core as core
import paddle.v2.framework.layers as layers import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import g_startup_program, g_main_program
from paddle.v2.framework.initializer import XavierInitializer
import numpy as np
def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def resnet_cifar10(input, depth=32, program=None, init_program=None):
def conv_bn_layer(input, def conv_bn_layer(input,
ch_out, ch_out,
filter_size, filter_size,
stride, stride,
padding, padding,
act='relu', act='relu',
program=None, main_program=None,
init_program=None): startup_program=None):
tmp = layers.conv2d( tmp = layers.conv2d(
input=input, input=input,
filter_size=filter_size, filter_size=filter_size,
...@@ -27,10 +26,13 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -27,10 +26,13 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
padding=padding, padding=padding,
act=None, act=None,
bias_attr=False, bias_attr=False,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
return layers.batch_norm( return layers.batch_norm(
input=tmp, act=act, program=program, init_program=init_program) input=tmp,
act=act,
main_program=main_program,
startup_program=startup_program)
def shortcut(input, ch_in, ch_out, stride, program, init_program): def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out: if ch_in != ch_out:
...@@ -43,16 +45,16 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -43,16 +45,16 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
ch_in, ch_in,
ch_out, ch_out,
stride, stride,
program=program, main_program=main_program,
init_program=init_program): startup_program=startup_program):
tmp = conv_bn_layer( tmp = conv_bn_layer(
input, input,
ch_out, ch_out,
3, 3,
stride, stride,
1, 1,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
tmp = conv_bn_layer( tmp = conv_bn_layer(
tmp, tmp,
ch_out, ch_out,
...@@ -60,21 +62,22 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -60,21 +62,22 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
1, 1,
1, 1,
act=None, act=None,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
short = shortcut(input, ch_in, ch_out, stride, program, init_program) short = shortcut(input, ch_in, ch_out, stride, main_program,
startup_program)
return layers.elementwise_add( return layers.elementwise_add(
x=tmp, x=tmp,
y=short, y=short,
act='relu', act='relu',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
def layer_warp(block_func, input, ch_in, ch_out, count, stride, program, def layer_warp(block_func, input, ch_in, ch_out, count, stride, program,
init_program): startup_program):
tmp = block_func(input, ch_in, ch_out, stride, program, init_program) tmp = block_func(input, ch_in, ch_out, stride, program, startup_program)
for i in range(1, count): for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1, program, init_program) tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program)
return tmp return tmp
assert (depth - 2) % 6 == 0 assert (depth - 2) % 6 == 0
...@@ -85,8 +88,8 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -85,8 +88,8 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1, padding=1,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
res1 = layer_warp( res1 = layer_warp(
basicblock, basicblock,
conv1, conv1,
...@@ -94,8 +97,8 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -94,8 +97,8 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
16, 16,
n, n,
1, 1,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
res2 = layer_warp( res2 = layer_warp(
basicblock, basicblock,
res1, res1,
...@@ -103,8 +106,8 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -103,8 +106,8 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
32, 32,
n, n,
2, 2,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
res3 = layer_warp( res3 = layer_warp(
basicblock, basicblock,
res2, res2,
...@@ -112,25 +115,25 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None): ...@@ -112,25 +115,25 @@ def resnet_cifar10(input, depth=32, program=None, init_program=None):
64, 64,
n, n,
2, 2,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
pool = layers.pool2d( pool = layers.pool2d(
input=res3, input=res3,
pool_size=8, pool_size=8,
pool_type='avg', pool_type='avg',
pool_stride=1, pool_stride=1,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
return pool return pool
def vgg16_bn_drop(input, program, init_program): def vgg16_bn_drop(input, main_program=None, startup_program=None):
def conv_block(input, def conv_block(input,
num_filter, num_filter,
groups, groups,
dropouts, dropouts,
program=None, main_program=None,
init_program=None): startup_program=None):
return nets.img_conv_group( return nets.img_conv_group(
input=input, input=input,
pool_size=2, pool_size=2,
...@@ -141,74 +144,75 @@ def vgg16_bn_drop(input, program, init_program): ...@@ -141,74 +144,75 @@ def vgg16_bn_drop(input, program, init_program):
conv_with_batchnorm=True, conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type='max', pool_type='max',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
conv1 = conv_block(input, 64, 2, [0.3, 0], program, init_program) conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program)
conv2 = conv_block(conv1, 128, 2, [0.4, 0], program, init_program) conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program)
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], program, init_program) conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program,
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], program, init_program) startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], program, init_program) conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
drop = layers.dropout( drop = layers.dropout(
x=conv5, dropout_prob=0.5, program=program, init_program=init_program) x=conv5,
dropout_prob=0.5,
main_program=main_program,
startup_program=startup_program)
fc1 = layers.fc(input=drop, fc1 = layers.fc(input=drop,
size=512, size=512,
act=None, act=None,
program=program, param_attr={"initializer": XavierInitializer()},
init_program=init_program) main_program=main_program,
startup_program=startup_program)
reshape1 = layers.reshape( reshape1 = layers.reshape(
x=fc1, x=fc1,
shape=list(fc1.shape + (1, 1)), shape=list(fc1.shape + (1, 1)),
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
bn = layers.batch_norm( bn = layers.batch_norm(
input=reshape1, act='relu', program=program, init_program=init_program) input=reshape1,
act='relu',
main_program=main_program,
startup_program=startup_program)
drop2 = layers.dropout( drop2 = layers.dropout(
x=bn, dropout_prob=0.5, program=program, init_program=init_program) x=bn,
dropout_prob=0.5,
main_program=main_program,
startup_program=startup_program)
fc2 = layers.fc(input=drop2, fc2 = layers.fc(input=drop2,
size=512, size=512,
act=None, act=None,
program=program, param_attr={"initializer": XavierInitializer()},
init_program=init_program) main_program=main_program,
startup_program=startup_program)
return fc2 return fc2
init_program = Program()
program = Program()
classdim = 10 classdim = 10
data_shape = [3, 32, 32] data_shape = [3, 32, 32]
images = layers.data( images = layers.data(name='pixel', shape=data_shape, data_type='float32')
name='pixel', shape=data_shape, data_type='float32', program=program) label = layers.data(name='label', shape=[1], data_type='int64')
label = layers.data(
name='label',
shape=[1],
data_type='int64',
program=program,
init_program=init_program)
# Add neural network config # Add neural network config
# option 1. resnet # option 1. resnet
net = resnet_cifar10(images, 32, program, init_program) # net = resnet_cifar10(images, 32)
# option 2. vgg # option 2. vgg
# net = vgg16_bn_drop(images, program, init_program) net = vgg16_bn_drop(images)
# print(program) # print(program)
predict = layers.fc(input=net, predict = layers.fc(input=net, size=classdim, act='softmax')
size=classdim, cost = layers.cross_entropy(input=predict, label=label)
act='softmax', avg_cost = layers.mean(x=cost)
program=program, accuracy = layers.accuracy(input=predict, label=label)
init_program=init_program)
cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program)
avg_cost = layers.mean(x=cost, program=program, init_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) # optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, init_program) optimizer = optimizer.AdamOptimizer(learning_rate=0.001)
opts = optimizer.minimize(avg_cost)
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 1 PASS_NUM = 1
...@@ -221,7 +225,7 @@ train_reader = paddle.batch( ...@@ -221,7 +225,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[]) exe.run(g_startup_program, feed={}, fetch_list=[])
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
batch_id = 0 batch_id = 0
...@@ -239,14 +243,15 @@ for pass_id in range(PASS_NUM): ...@@ -239,14 +243,15 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(program, outs = exe.run(g_main_program,
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost, accuracy])
loss = np.array(outs[0]) loss = np.array(outs[0])
acc = np.array(outs[1])
print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) +
" loss:" + str(loss)) " loss:" + str(loss) + " acc:" + str(acc))
batch_id = batch_id + 1 batch_id = batch_id + 1
if batch_id > 1: if batch_id > 1:
......
...@@ -3,7 +3,7 @@ import paddle.v2.framework.layers as layers ...@@ -3,7 +3,7 @@ import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.framework import Program, g_main_program
from paddle.v2.framework.io import save_inference_model, load_inference_model from paddle.v2.framework.io import save_inference_model, load_inference_model
import paddle.v2.framework.executor as executor import paddle.v2.framework.executor as executor
import unittest import unittest
...@@ -20,28 +20,28 @@ class TestBook(unittest.TestCase): ...@@ -20,28 +20,28 @@ class TestBook(unittest.TestCase):
name='x', name='x',
shape=[2], shape=[2],
data_type='float32', data_type='float32',
program=program, main_program=program,
init_program=init_program) startup_program=init_program)
y = layers.data( y = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='float32', data_type='float32',
program=program, main_program=program,
init_program=init_program) startup_program=init_program)
y_predict = layers.fc(input=x, y_predict = layers.fc(input=x,
size=1, size=1,
act=None, act=None,
program=program, main_program=program,
init_program=init_program) startup_program=init_program)
cost = layers.square_error_cost( cost = layers.square_error_cost(
input=y_predict, input=y_predict,
label=y, label=y,
program=program, main_program=program,
init_program=init_program) startup_program=init_program)
avg_cost = layers.mean( avg_cost = layers.mean(
x=cost, program=program, init_program=init_program) x=cost, main_program=program, startup_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, init_program) opts = sgd_optimizer.minimize(avg_cost, init_program)
......
import paddle.v2.framework.layers as layers import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.framework.nets as nets
from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.framework import Program, g_main_program
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import unittest import unittest
...@@ -9,15 +9,15 @@ class TestBook(unittest.TestCase): ...@@ -9,15 +9,15 @@ class TestBook(unittest.TestCase):
def test_fit_a_line(self): def test_fit_a_line(self):
program = Program() program = Program()
x = layers.data( x = layers.data(
name='x', shape=[13], data_type='float32', program=program) name='x', shape=[13], data_type='float32', main_program=program)
y_predict = layers.fc(input=x, size=1, act=None, program=program) y_predict = layers.fc(input=x, size=1, act=None, main_program=program)
y = layers.data( y = layers.data(
name='y', shape=[1], data_type='float32', program=program) name='y', shape=[1], data_type='float32', main_program=program)
cost = layers.square_error_cost( cost = layers.square_error_cost(
input=y_predict, label=y, program=program) input=y_predict, label=y, main_program=program)
avg_cost = layers.mean(x=cost, program=program) avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost) program.append_backward(avg_cost)
print str(program) print str(program)
...@@ -27,26 +27,42 @@ class TestBook(unittest.TestCase): ...@@ -27,26 +27,42 @@ class TestBook(unittest.TestCase):
# Change g_program, so the rest layers use `g_program` # Change g_program, so the rest layers use `g_program`
images = layers.data( images = layers.data(
name='pixel', shape=[784], data_type='float32', program=program) name='pixel',
shape=[784],
data_type='float32',
main_program=program)
label = layers.data( label = layers.data(
name='label', shape=[1], data_type='int32', program=program) name='label', shape=[1], data_type='int32', main_program=program)
hidden1 = layers.fc(input=images, size=128, act='relu', program=program) hidden1 = layers.fc(input=images,
hidden2 = layers.fc(input=hidden1, size=64, act='relu', program=program) size=128,
act='relu',
main_program=program)
hidden2 = layers.fc(input=hidden1,
size=64,
act='relu',
main_program=program)
predict = layers.fc(input=hidden2, predict = layers.fc(input=hidden2,
size=10, size=10,
act='softmax', act='softmax',
program=program) main_program=program)
cost = layers.cross_entropy(input=predict, label=label, program=program) cost = layers.cross_entropy(
avg_cost = layers.mean(x=cost, program=program) input=predict, label=label, main_program=program)
avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
print str(program) print str(program)
def test_simple_conv2d(self): def test_simple_conv2d(self):
program = Program() program = Program()
images = layers.data( images = layers.data(
name='pixel', shape=[3, 48, 48], data_type='int32', program=program) name='pixel',
shape=[3, 48, 48],
data_type='int32',
main_program=program)
layers.conv2d( layers.conv2d(
input=images, num_filters=3, filter_size=[4, 4], program=program) input=images,
num_filters=3,
filter_size=[4, 4],
main_program=program)
print str(program) print str(program)
...@@ -57,9 +73,9 @@ class TestBook(unittest.TestCase): ...@@ -57,9 +73,9 @@ class TestBook(unittest.TestCase):
name='pixel', name='pixel',
shape=[1, 28, 28], shape=[1, 28, 28],
data_type='float32', data_type='float32',
program=program) main_program=program)
label = layers.data( label = layers.data(
name='label', shape=[1], data_type='int32', program=program) name='label', shape=[1], data_type='int32', main_program=program)
conv_pool_1 = nets.simple_img_conv_pool( conv_pool_1 = nets.simple_img_conv_pool(
input=images, input=images,
filter_size=5, filter_size=5,
...@@ -67,7 +83,7 @@ class TestBook(unittest.TestCase): ...@@ -67,7 +83,7 @@ class TestBook(unittest.TestCase):
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu",
program=program) main_program=program)
conv_pool_2 = nets.simple_img_conv_pool( conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1, input=conv_pool_1,
filter_size=5, filter_size=5,
...@@ -75,14 +91,15 @@ class TestBook(unittest.TestCase): ...@@ -75,14 +91,15 @@ class TestBook(unittest.TestCase):
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu",
program=program) main_program=program)
predict = layers.fc(input=conv_pool_2, predict = layers.fc(input=conv_pool_2,
size=10, size=10,
act="softmax", act="softmax",
program=program) main_program=program)
cost = layers.cross_entropy(input=predict, label=label, program=program) cost = layers.cross_entropy(
avg_cost = layers.mean(x=cost, program=program) input=predict, label=label, main_program=program)
avg_cost = layers.mean(x=cost, main_program=program)
program.append_backward(avg_cost) program.append_backward(avg_cost)
...@@ -93,58 +110,58 @@ class TestBook(unittest.TestCase): ...@@ -93,58 +110,58 @@ class TestBook(unittest.TestCase):
dict_size = 10000 dict_size = 10000
embed_size = 32 embed_size = 32
first_word = layers.data( first_word = layers.data(
name='firstw', shape=[1], data_type='int64', program=program) name='firstw', shape=[1], data_type='int64', main_program=program)
second_word = layers.data( second_word = layers.data(
name='secondw', shape=[1], data_type='int64', program=program) name='secondw', shape=[1], data_type='int64', main_program=program)
third_word = layers.data( third_word = layers.data(
name='thirdw', shape=[1], data_type='int64', program=program) name='thirdw', shape=[1], data_type='int64', main_program=program)
forth_word = layers.data( forth_word = layers.data(
name='forthw', shape=[1], data_type='int64', program=program) name='forthw', shape=[1], data_type='int64', main_program=program)
next_word = layers.data( next_word = layers.data(
name='nextw', shape=[1], data_type='int64', program=program) name='nextw', shape=[1], data_type='int64', main_program=program)
embed_first = layers.embedding( embed_first = layers.embedding(
input=first_word, input=first_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
data_type='float32', data_type='float32',
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program) main_program=program)
embed_second = layers.embedding( embed_second = layers.embedding(
input=second_word, input=second_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
data_type='float32', data_type='float32',
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program) main_program=program)
embed_third = layers.embedding( embed_third = layers.embedding(
input=third_word, input=third_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
data_type='float32', data_type='float32',
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program) main_program=program)
embed_forth = layers.embedding( embed_forth = layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
data_type='float32', data_type='float32',
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program) main_program=program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], input=[embed_first, embed_second, embed_third, embed_forth],
axis=1, axis=1,
program=program) main_program=program)
hidden1 = layers.fc(input=concat_embed, hidden1 = layers.fc(input=concat_embed,
size=256, size=256,
act='sigmoid', act='sigmoid',
program=program) main_program=program)
predict_word = layers.fc(input=hidden1, predict_word = layers.fc(input=hidden1,
size=dict_size, size=dict_size,
act='softmax', act='softmax',
program=program) main_program=program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict_word, label=next_word, program=program) input=predict_word, label=next_word, main_program=program)
avg_cost = layers.mean(x=cost, program=program) avg_cost = layers.mean(x=cost, main_program=program)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
print str(program) print str(program)
......
from paddle.v2.framework.layers import lod_rank_table, data
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import g_main_program
import paddle.v2.framework.core as core
import numpy
import unittest
class TestLoDRankTable(unittest.TestCase):
def test_lod_rank_table(self):
x = data(name='x', shape=[100])
cpu = core.CPUPlace()
rank_table = lod_rank_table(x=x, level=1)
rank_table.persistable = True
exe = Executor(cpu)
scope = core.Scope()
tensor = core.LoDTensor()
tensor.set(numpy.random.random(size=(17, 100)), cpu)
tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]])
exe.run(g_main_program, scope=scope, feed={'x': tensor})
var = scope.find_var(rank_table.name)
table = var.get_lod_rank_table()
self.assertEqual([(0, 5), (1, 1), (2, 1)], table.items())
if __name__ == '__main__':
unittest.main()
import unittest
import paddle.v2.framework.core as core
import numpy
class TestLoDTensorArray(unittest.TestCase):
def test_get_set(self):
scope = core.Scope()
arr = scope.var('tmp_lod_tensor_array')
tensor_array = arr.get_lod_tensor_array()
self.assertEqual(0, len(tensor_array))
cpu = core.CPUPlace()
for i in xrange(10):
t = core.LoDTensor()
t.set(numpy.array([i], dtype='float32'), cpu)
t.set_lod([[0, 1]])
tensor_array.append(t)
self.assertEqual(10, len(tensor_array))
for i in xrange(10):
t = tensor_array[i]
self.assertEqual(numpy.array(t), numpy.array([i], dtype='float32'))
self.assertEqual([[0, 1]], t.lod())
t = core.LoDTensor()
t.set(numpy.array([i + 10], dtype='float32'), cpu)
t.set_lod([[0, 2]])
tensor_array[i] = t
t = tensor_array[i]
self.assertEqual(
numpy.array(t), numpy.array(
[i + 10], dtype='float32'))
self.assertEqual([[0, 2]], t.lod())
if __name__ == '__main__':
unittest.main()
import unittest import unittest
from paddle.v2.framework.framework import Variable, Program, g_program from paddle.v2.framework.framework import Variable, Program, g_main_program
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
class TestOperator(unittest.TestCase): class TestOperator(unittest.TestCase):
def test_error_type(self): def test_error_type(self):
block = g_program.create_block() block = g_main_program.create_block()
try: try:
block.append_op() block.append_op()
self.assertFail() self.assertFail()
......
import unittest import unittest
from paddle.v2.framework.framework import g_program from paddle.v2.framework.framework import g_main_program
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
class TestParameter(unittest.TestCase): class TestParameter(unittest.TestCase):
def test_param(self): def test_param(self):
b = g_program.create_block() b = g_main_program.create_block()
param = b.create_parameter( param = b.create_parameter(
name='fc.w', name='fc.w',
shape=[784, 100], shape=[784, 100],
......
import unittest
import itertools
import numpy as np
from op_test import OpTest
def py_pnpair_op(score, label, query, column=-1, weight=None):
# group by query id
predictions = {}
batch_size = label.shape[0]
if weight is None:
weight = np.ones(shape=(batch_size, 1)).astype('float32')
for s, l, q, w in zip(score, label, query, weight):
s, l, q, w = s[column], l[0], q[0], w[0]
if q not in predictions:
predictions[q] = []
predictions[q].append((s, l, w))
# accumulate statistics
pos, neg, neu = 0, 0, 0
for _, ranks in predictions.items():
for e1, e2 in itertools.combinations(ranks, 2):
s1, s2, l1, l2, w1, w2 = e1[0], e2[0], e1[1], e2[1], e1[2], e2[2]
w = (w1 + w2) * 0.5
if l1 == l2:
continue
if s1 == s2:
neu += w
elif (s1 - s2) * (l1 - l2) > 0:
pos += w
else:
neg += w
return np.array(pos).astype('float32'), np.array(neg).astype(
'float32'), np.array(neu).astype('float32')
class TestPositiveNegativePairOp(OpTest):
def setUp(self):
self.op_type = 'positive_negative_pair'
batch_size = 20
max_query_id = 5
score = np.random.normal(size=(batch_size, 1)).astype('float32')
label = np.random.normal(size=(batch_size, 1)).astype('float32')
query = np.array(
[np.random.randint(max_query_id) for i in range(batch_size)])
query = np.reshape(query, newshape=(batch_size, 1)).astype('int64')
pos, neg, neu = py_pnpair_op(score, label, query)
self.inputs = {'Score': score, 'Label': label, 'QueryID': query}
self.attrs = {'column': -1}
self.outputs = {
'PositivePair': pos,
'NegativePair': neg,
'NeutralPair': neu
}
def test_check_output(self):
self.check_output()
class TestPositiveNegativePairOpAccumulateWeight(OpTest):
def setUp(self):
self.op_type = 'positive_negative_pair'
batch_size = 20
max_query_id = 5
max_random_num = 2 << 15
score_dim = 2
score = np.random.normal(size=(batch_size, 2)).astype('float32')
label = np.random.normal(size=(batch_size, 1)).astype('float32')
weight = np.random.normal(size=(batch_size, 1)).astype('float32')
query = np.array(
[np.random.randint(max_query_id) for i in range(batch_size)])
query = np.reshape(query, newshape=(batch_size, 1)).astype('int64')
acc_pos = np.reshape(
np.random.randint(max_random_num), newshape=(1)).astype('float32')
acc_neg = np.reshape(
np.random.randint(max_random_num), newshape=(1)).astype('float32')
acc_neu = np.reshape(
np.random.randint(max_random_num), newshape=(1)).astype('float32')
column = np.random.randint(score_dim)
pos, neg, neu = py_pnpair_op(
score, label, query, column=column, weight=weight)
self.inputs = {
'Score': score,
'Label': label,
'QueryID': query,
'AccumulatePositivePair': acc_pos,
'AccumulateNegativePair': acc_neg,
'AccumulateNeutralPair': acc_neu,
'Weight': weight
}
self.attrs = {'column': column}
self.outputs = {
'PositivePair': pos + acc_pos,
'NegativePair': neg + acc_neg,
'NeutralPair': neu + acc_neu
}
def test_check_output(self):
self.check_output()
if __name__ == '__main__':
unittest.main()
...@@ -2,35 +2,35 @@ import unittest ...@@ -2,35 +2,35 @@ import unittest
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.framework import Program from paddle.v2.framework.framework import Program
from paddle.v2.framework.framework import g_program from paddle.v2.framework.framework import g_main_program
class TestProgram(unittest.TestCase): class TestProgram(unittest.TestCase):
def test_program(self): def test_program(self):
b = g_program.current_block() b = g_main_program.current_block()
self.assertEqual(-1, b.parent_idx) self.assertEqual(-1, b.parent_idx)
self.assertEqual(0, b.idx) self.assertEqual(0, b.idx)
b = g_program.create_block() b = g_main_program.create_block()
self.assertEqual(1, b.idx) self.assertEqual(1, b.idx)
self.assertEqual(0, b.parent_idx) self.assertEqual(0, b.parent_idx)
b = g_program.create_block() b = g_main_program.create_block()
self.assertEqual(2, b.idx) self.assertEqual(2, b.idx)
self.assertEqual(1, b.parent_idx) self.assertEqual(1, b.parent_idx)
g_program.rollback() g_main_program.rollback()
b = g_program.current_block() b = g_main_program.current_block()
self.assertEqual(1, b.idx) self.assertEqual(1, b.idx)
self.assertEqual(0, b.parent_idx) self.assertEqual(0, b.parent_idx)
b = g_program.create_block() b = g_main_program.create_block()
self.assertEqual(3, b.idx) self.assertEqual(3, b.idx)
self.assertEqual(1, b.parent_idx) self.assertEqual(1, b.parent_idx)
g_program.rollback() g_main_program.rollback()
b = g_program.current_block() b = g_main_program.current_block()
self.assertEqual(1, b.idx) self.assertEqual(1, b.idx)
self.assertEqual(0, b.parent_idx) self.assertEqual(0, b.parent_idx)
......
...@@ -4,26 +4,26 @@ import paddle.v2.framework.nets as nets ...@@ -4,26 +4,26 @@ import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.framework import Program, g_main_program
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
init_program = Program() startup_program = Program()
program = Program() main_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[1, 28, 28], shape=[1, 28, 28],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
label = layers.data( label = layers.data(
name='label', name='label',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
conv_pool_1 = nets.simple_img_conv_pool( conv_pool_1 = nets.simple_img_conv_pool(
input=images, input=images,
filter_size=5, filter_size=5,
...@@ -31,8 +31,8 @@ conv_pool_1 = nets.simple_img_conv_pool( ...@@ -31,8 +31,8 @@ conv_pool_1 = nets.simple_img_conv_pool(
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
conv_pool_2 = nets.simple_img_conv_pool( conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1, input=conv_pool_1,
filter_size=5, filter_size=5,
...@@ -40,24 +40,30 @@ conv_pool_2 = nets.simple_img_conv_pool( ...@@ -40,24 +40,30 @@ conv_pool_2 = nets.simple_img_conv_pool(
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
predict = layers.fc(input=conv_pool_2, predict = layers.fc(input=conv_pool_2,
size=10, size=10,
act="softmax", act="softmax",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program) input=predict,
avg_cost = layers.mean(x=cost, program=program) label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=cost, main_program=main_program)
accuracy = layers.accuracy( accuracy = layers.accuracy(
input=predict, label=label, program=program, init_program=init_program) input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0, # optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0,
# momentum=0.9) # momentum=0.9)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, init_program) opts = optimizer.minimize(avg_cost, startup_program)
BATCH_SIZE = 50 BATCH_SIZE = 50
PASS_NUM = 3 PASS_NUM = 3
...@@ -69,7 +75,7 @@ train_reader = paddle.batch( ...@@ -69,7 +75,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[]) exe.run(startup_program, feed={}, fetch_list=[])
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
count = 0 count = 0
...@@ -84,7 +90,7 @@ for pass_id in range(PASS_NUM): ...@@ -84,7 +90,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(program, outs = exe.run(main_program,
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
...@@ -11,14 +11,14 @@ from paddle.v2.framework.initializer import UniformInitializer ...@@ -11,14 +11,14 @@ from paddle.v2.framework.initializer import UniformInitializer
import numpy as np import numpy as np
BATCH_SIZE = 128 BATCH_SIZE = 128
init_program = Program() startup_program = Program()
program = Program() main_program = Program()
image = layers.data( image = layers.data(
name='x', name='x',
shape=[784], shape=[784],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
param_attr = { param_attr = {
'name': None, 'name': None,
...@@ -30,36 +30,45 @@ param_attr = { ...@@ -30,36 +30,45 @@ param_attr = {
hidden1 = layers.fc(input=image, hidden1 = layers.fc(input=image,
size=128, size=128,
act='relu', act='relu',
program=program, main_program=main_program,
init_program=init_program, startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
hidden2 = layers.fc(input=hidden1, hidden2 = layers.fc(input=hidden1,
size=64, size=64,
act='relu', act='relu',
program=program, main_program=main_program,
init_program=init_program, startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
predict = layers.fc(input=hidden2, predict = layers.fc(input=hidden2,
size=10, size=10,
act='softmax', act='softmax',
program=program, main_program=main_program,
init_program=init_program, startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
label = layers.data( label = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict, label=label, program=program, init_program=init_program) input=predict,
avg_cost = layers.mean(x=cost, program=program, init_program=init_program) label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
accuracy = layers.accuracy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, init_program) opts = optimizer.minimize(avg_cost, startup_program)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -69,7 +78,7 @@ train_reader = paddle.batch( ...@@ -69,7 +78,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[]) exe.run(startup_program, feed={}, fetch_list=[])
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
...@@ -84,12 +93,12 @@ for pass_id in range(PASS_NUM): ...@@ -84,12 +93,12 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(program, outs = exe.run(main_program,
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost, accuracy])
out = np.array(outs[0]) out = np.array(outs[0])
acc = np.array(outs[1])
if out[0] < 5.0: if out[0] < 5.0:
exit(0) # if avg cost less than 5.0, we think our code is good. exit(0) # if avg cost less than 5.0, we think our code is good.
exit(1) exit(1)
...@@ -4,13 +4,13 @@ import paddle.v2.framework.nets as nets ...@@ -4,13 +4,13 @@ import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.framework import Program, g_main_program
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
init_program = Program() startup_program = Program()
program = Program() main_program = Program()
is_sparse = True is_sparse = True
use_gpu = False use_gpu = False
BATCH_SIZE = 256 BATCH_SIZE = 256
...@@ -26,8 +26,8 @@ def get_usr_combined_features(): ...@@ -26,8 +26,8 @@ def get_usr_combined_features():
name='user_id', name='user_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_emb = layers.embedding( usr_emb = layers.embedding(
input=uid, input=uid,
...@@ -35,13 +35,13 @@ def get_usr_combined_features(): ...@@ -35,13 +35,13 @@ def get_usr_combined_features():
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'}, param_attr={'name': 'user_table'},
is_sparse=is_sparse, is_sparse=is_sparse,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_fc = layers.fc(input=usr_emb, usr_fc = layers.fc(input=usr_emb,
size=32, size=32,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
...@@ -49,75 +49,75 @@ def get_usr_combined_features(): ...@@ -49,75 +49,75 @@ def get_usr_combined_features():
name='gender_id', name='gender_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_gender_emb = layers.embedding( usr_gender_emb = layers.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'}, param_attr={'name': 'gender_table'},
is_sparse=is_sparse, is_sparse=is_sparse,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_gender_fc = layers.fc(input=usr_gender_emb, usr_gender_fc = layers.fc(input=usr_gender_emb,
size=16, size=16,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data( usr_age_id = layers.data(
name='age_id', name='age_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_age_emb = layers.embedding( usr_age_emb = layers.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=is_sparse, is_sparse=is_sparse,
param_attr={'name': 'age_table'}, param_attr={'name': 'age_table'},
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_age_fc = layers.fc(input=usr_age_emb, usr_age_fc = layers.fc(input=usr_age_emb,
size=16, size=16,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data( usr_job_id = layers.data(
name='job_id', name='job_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_job_emb = layers.embedding( usr_job_emb = layers.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'}, param_attr={'name': 'job_table'},
is_sparse=is_sparse, is_sparse=is_sparse,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_job_fc = layers.fc(input=usr_job_emb, usr_job_fc = layers.fc(input=usr_job_emb,
size=16, size=16,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
axis=1, axis=1,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
usr_combined_features = layers.fc(input=concat_embed, usr_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
return usr_combined_features return usr_combined_features
...@@ -130,8 +130,8 @@ def get_mov_combined_features(): ...@@ -130,8 +130,8 @@ def get_mov_combined_features():
name='movie_id', name='movie_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
mov_emb = layers.embedding( mov_emb = layers.embedding(
input=mov_id, input=mov_id,
...@@ -139,13 +139,13 @@ def get_mov_combined_features(): ...@@ -139,13 +139,13 @@ def get_mov_combined_features():
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'}, param_attr={'name': 'movie_table'},
is_sparse=is_sparse, is_sparse=is_sparse,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
mov_fc = layers.fc(input=mov_emb, mov_fc = layers.fc(input=mov_emb,
size=32, size=32,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
...@@ -153,21 +153,21 @@ def get_mov_combined_features(): ...@@ -153,21 +153,21 @@ def get_mov_combined_features():
name='category_id', name='category_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
mov_categories_emb = layers.embedding( mov_categories_emb = layers.embedding(
input=category_id, input=category_id,
size=[CATEGORY_DICT_SIZE, 32], size=[CATEGORY_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=is_sparse,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
input=mov_categories_emb, input=mov_categories_emb,
pool_type="sum", pool_type="sum",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
...@@ -175,15 +175,15 @@ def get_mov_combined_features(): ...@@ -175,15 +175,15 @@ def get_mov_combined_features():
name='movie_title', name='movie_title',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
mov_title_emb = layers.embedding( mov_title_emb = layers.embedding(
input=mov_title_id, input=mov_title_id,
size=[MOV_TITLE_DICT_SIZE, 32], size=[MOV_TITLE_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=is_sparse,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb, input=mov_title_emb,
...@@ -191,21 +191,21 @@ def get_mov_combined_features(): ...@@ -191,21 +191,21 @@ def get_mov_combined_features():
filter_size=3, filter_size=3,
act="tanh", act="tanh",
pool_type="sum", pool_type="sum",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[mov_fc, mov_categories_hidden, mov_title_conv], input=[mov_fc, mov_categories_hidden, mov_title_conv],
axis=1, axis=1,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
# FIXME(dzh) : need tanh operator # FIXME(dzh) : need tanh operator
mov_combined_features = layers.fc(input=concat_embed, mov_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh",
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
return mov_combined_features return mov_combined_features
...@@ -218,24 +218,26 @@ def model(): ...@@ -218,24 +218,26 @@ def model():
inference = layers.cos_sim( inference = layers.cos_sim(
X=usr_combined_features, X=usr_combined_features,
Y=mov_combined_features, Y=mov_combined_features,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
label = layers.data( label = layers.data(
name='score', name='score',
shape=[1], shape=[1],
data_type='float32', data_type='float32',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
square_cost = layers.square_error_cost( square_cost = layers.square_error_cost(
input=inference, input=inference,
label=label, label=label,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
avg_cost = layers.mean( avg_cost = layers.mean(
x=square_cost, program=program, init_program=init_program) x=square_cost,
main_program=main_program,
startup_program=startup_program)
return avg_cost return avg_cost
...@@ -243,8 +245,8 @@ def model(): ...@@ -243,8 +245,8 @@ def model():
def main(): def main():
cost = model() cost = model()
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(cost, init_program=init_program) opts = sgd_optimizer.minimize(cost, startup_program=startup_program)
block = program.block(0) block = main_program.block(0)
if use_gpu: if use_gpu:
place = core.GPUPlace(0) place = core.GPUPlace(0)
...@@ -252,7 +254,7 @@ def main(): ...@@ -252,7 +254,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[]) exe.run(startup_program, feed={}, fetch_list=[])
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -301,7 +303,7 @@ def main(): ...@@ -301,7 +303,7 @@ def main():
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
outs = exe.run(program, outs = exe.run(main_program,
feed=func_feed(feeding, data), feed=func_feed(feeding, data),
fetch_list=[cost]) fetch_list=[cost])
out = np.array(outs[0]) out = np.array(outs[0])
......
...@@ -99,17 +99,17 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -99,17 +99,17 @@ class RecurrentOpTest1(unittest.TestCase):
batch_size = 1 batch_size = 1
sent_len = 1 sent_len = 1
def init_program(self): def setup_program(self):
self.program = Program() self.main_program = Program()
self.init_program = Program() self.startup_program = Program()
self.p_info = { self.p_info = {
"program": self.program, "main_program": self.main_program,
"init_program": self.init_program "startup_program": self.startup_program
} }
self.place = core.CPUPlace() self.place = core.CPUPlace()
def setUp(self): def setUp(self):
self.init_program() self.setup_program()
self.data_field = {"x", "h_boot"} self.data_field = {"x", "h_boot"}
self.input_shape = (self.sent_len, self.batch_size, self.input_dim) self.input_shape = (self.sent_len, self.batch_size, self.input_dim)
...@@ -125,13 +125,15 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -125,13 +125,15 @@ class RecurrentOpTest1(unittest.TestCase):
name='x', name='x',
append_batch_size=False, append_batch_size=False,
**self.p_info) **self.p_info)
x.stop_gradient = False
h_boot = data( h_boot = data(
shape=[self.input_dim], shape=[self.input_dim],
data_type='float32', data_type='float32',
name='h_boot', name='h_boot',
**self.p_info) **self.p_info)
h_boot.stop_gradient = False
rnn = StaticRNN(program=self.program) rnn = StaticRNN(main_program=self.main_program)
with rnn.step(): with rnn.step():
h_pre = rnn.memory(init=h_boot) h_pre = rnn.memory(init=h_boot)
x_t = rnn.step_input(x) x_t = rnn.step_input(x)
...@@ -153,7 +155,7 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -153,7 +155,7 @@ class RecurrentOpTest1(unittest.TestCase):
for x in self.data_field for x in self.data_field
} }
exe = Executor(self.place) exe = Executor(self.place)
out = exe.run(self.program, out = exe.run(self.main_program,
feed=self.feed_map, feed=self.feed_map,
fetch_list=[self.output]) fetch_list=[self.output])
...@@ -165,12 +167,14 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -165,12 +167,14 @@ class RecurrentOpTest1(unittest.TestCase):
for x in self.data_field for x in self.data_field
} }
fetch_list = [ fetch_list = [
self.program.global_block().var(x + "@GRAD") self.main_program.global_block().var(x + "@GRAD")
for x in self.data_field for x in self.data_field
] ]
exe = Executor(self.place) exe = Executor(self.place)
return exe.run(self.program, feed=self.feed_map, fetch_list=fetch_list) return exe.run(self.main_program,
feed=self.feed_map,
fetch_list=fetch_list)
def test_backward(self): def test_backward(self):
self.check_forward() self.check_forward()
...@@ -237,7 +241,7 @@ class RecurrentOpTest2(RecurrentOpTest1): ...@@ -237,7 +241,7 @@ class RecurrentOpTest2(RecurrentOpTest1):
sent_len = 2 sent_len = 2
def setUp(self): def setUp(self):
self.init_program() self.setup_program()
self.data_field = {"x", "h_boot", "W", "U"} self.data_field = {"x", "h_boot", "W", "U"}
...@@ -254,13 +258,15 @@ class RecurrentOpTest2(RecurrentOpTest1): ...@@ -254,13 +258,15 @@ class RecurrentOpTest2(RecurrentOpTest1):
name='x', name='x',
append_batch_size=False, append_batch_size=False,
**self.p_info) **self.p_info)
x.stop_gradient = False
h_boot = data( h_boot = data(
shape=[self.input_dim], shape=[self.input_dim],
data_type='float32', data_type='float32',
name='h_boot', name='h_boot',
**self.p_info) **self.p_info)
h_boot.stop_gradient = False
rnn = StaticRNN(program=self.program) rnn = StaticRNN(main_program=self.main_program)
with rnn.step(): with rnn.step():
h_pre = rnn.memory(init=h_boot) h_pre = rnn.memory(init=h_boot)
x_t = rnn.step_input(x) x_t = rnn.step_input(x)
...@@ -333,7 +339,7 @@ class RecurrentOpTest3(RecurrentOpTest1): ...@@ -333,7 +339,7 @@ class RecurrentOpTest3(RecurrentOpTest1):
sent_len = 2 sent_len = 2
def setUp(self): def setUp(self):
self.init_program() self.setup_program()
self.data_field = {"x", "h_boot1", "h_boot2"} self.data_field = {"x", "h_boot1", "h_boot2"}
...@@ -351,20 +357,23 @@ class RecurrentOpTest3(RecurrentOpTest1): ...@@ -351,20 +357,23 @@ class RecurrentOpTest3(RecurrentOpTest1):
name='x', name='x',
append_batch_size=False, append_batch_size=False,
**self.p_info) **self.p_info)
x.stop_gradient = False
h_boot1 = data( h_boot1 = data(
shape=[self.batch_size, self.input_dim], shape=[self.batch_size, self.input_dim],
data_type='float32', data_type='float32',
name='h_boot1', name='h_boot1',
append_batch_size=False, append_batch_size=False,
**self.p_info) **self.p_info)
h_boot1.stop_gradient = False
h_boot2 = data( h_boot2 = data(
shape=[self.batch_size, self.input_dim], shape=[self.batch_size, self.input_dim],
data_type='float32', data_type='float32',
name='h_boot2', name='h_boot2',
append_batch_size=False, append_batch_size=False,
**self.p_info) **self.p_info)
h_boot2.stop_gradient = False
rnn = StaticRNN(program=self.program) rnn = StaticRNN(main_program=self.main_program)
with rnn.step(): with rnn.step():
h_pre1 = rnn.memory(init=h_boot1) h_pre1 = rnn.memory(init=h_boot1)
h_pre2 = rnn.memory(init=h_boot2) h_pre2 = rnn.memory(init=h_boot2)
......
...@@ -29,6 +29,9 @@ class TestSeqAvgPool(OpTest): ...@@ -29,6 +29,9 @@ class TestSeqAvgPool(OpTest):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
# Remove MaxIndex after check_grad is refined.
self.outputs['MaxIndex'] = \
np.zeros(self.outputs['Out'].shape).astype('int32')
self.check_grad(["X"], "Out") self.check_grad(["X"], "Out")
...@@ -85,31 +88,53 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): ...@@ -85,31 +88,53 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D):
out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17)) out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17))
def test_check_grad(self): def test_check_grad(self):
# Remove MaxIndex after check_grad is refined.
self.outputs['MaxIndex'] = \
np.zeros(self.outputs['Out'].shape).astype('int32')
self.check_grad(["X"], "Out", max_relative_error=0.06) self.check_grad(["X"], "Out", max_relative_error=0.06)
class TestSeqMaxPool(TestSeqAvgPool): class TestSeqMaxPool(TestSeqAvgPool):
def set_data(self):
self.op_type = 'sequence_pool'
x = np.random.uniform(0.1, 1, [13, 23]).astype('float32')
lod = [[0, 4, 5, 8, 13]]
for i in range(4):
l = lod[0][i + 1] - lod[0][i]
x[lod[0][i] + np.random.randint(l), :] += 2.0
self.inputs = {'X': (x, lod)}
out = np.zeros((4, 23)).astype('float32')
self.outputs = {'Out': out}
return x, lod, out
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'pooltype': "MAX"} self.attrs = {'pooltype': "MAX"}
for i in range(4): for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :] sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = np.amax(sub_x, axis=0) out[i] = np.amax(sub_x, axis=0)
def test_check_grad(self):
# Remove MaxPool2D from gradient check to confirm the success of CI.
return
class TestSeqMaxPool2D(TestSeqAvgPool2D): class TestSeqMaxPool2D(TestSeqAvgPool2D):
def set_data(self):
self.op_type = 'sequence_pool'
x = np.random.uniform(0.1, 1, [13, 3, 11]).astype('float32')
lod = [[0, 4, 5, 8, 13]]
self.inputs = {'X': (x, lod)}
for i in range(4):
l = lod[0][i + 1] - lod[0][i]
x[lod[0][i] + np.random.randint(l), :] += 1.0
out = np.zeros((4, 3, 11)).astype('float32')
self.outputs = {'Out': out}
return x, lod, out
def compute(self, x, lod, out): def compute(self, x, lod, out):
self.attrs = {'pooltype': "MAX"} self.attrs = {'pooltype': "MAX"}
for i in range(4): for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 11))
out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17)) out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11))
def test_check_grad(self):
# Remove MaxPool2D from gradient check to confirm the success of CI.
return
class TestSeqLastPool(TestSeqAvgPool): class TestSeqLastPool(TestSeqAvgPool):
......
...@@ -12,30 +12,30 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): ...@@ -12,30 +12,30 @@ class TestSoftmaxWithCrossEntropyOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "softmax_with_cross_entropy" self.op_type = "softmax_with_cross_entropy"
batch_size = 3 batch_size = 2
class_num = 37 class_num = 37
logits = np.random.uniform(0.1, 1.0, logits = np.random.uniform(0.1, 1.0,
[batch_size, class_num]).astype("float32") [batch_size, class_num]).astype("float64")
softmax = np.apply_along_axis(stable_softmax, 1, logits) softmax = np.apply_along_axis(stable_softmax, 1, logits)
labels = np.random.randint(0, class_num, [batch_size, 1], dtype="int32") labels = np.random.randint(0, class_num, [batch_size, 1], dtype="int64")
cross_entropy = np.asmatrix( cross_entropy = np.asmatrix(
[[-np.log(softmax[i][labels[i][0]])] [[-np.log(softmax[i][labels[i][0]])]
for i in range(softmax.shape[0])], for i in range(softmax.shape[0])],
dtype="float32") dtype="float64")
self.inputs = {"Logits": logits, "Label": labels} self.inputs = {"Logits": logits, "Label": labels}
self.outputs = { self.outputs = {
"Softmax": softmax.astype('float32'), "Softmax": softmax.astype("float64"),
"Loss": cross_entropy.astype('float32') "Loss": cross_entropy.astype("float64")
} }
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
self.check_grad(["Logits"], "Loss", max_relative_error=0.05) self.check_grad(["Logits"], "Loss")
class TestSoftmaxWithCrossEntropyOp2(OpTest): class TestSoftmaxWithCrossEntropyOp2(OpTest):
...@@ -49,19 +49,19 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest): ...@@ -49,19 +49,19 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest):
class_num = 37 class_num = 37
logits = np.random.uniform(0.1, 1.0, logits = np.random.uniform(0.1, 1.0,
[batch_size, class_num]).astype("float32") [batch_size, class_num]).astype("float64")
softmax = np.apply_along_axis(stable_softmax, 1, logits) softmax = np.apply_along_axis(stable_softmax, 1, logits)
labels = np.random.uniform(0.1, 1.0, labels = np.random.uniform(0.1, 1.0,
[batch_size, class_num]).astype("float32") [batch_size, class_num]).astype("float64")
labels /= np.sum(labels, axis=1, keepdims=True) labels /= np.sum(labels, axis=1, keepdims=True)
cross_entropy = (-labels * np.log(softmax)).sum( cross_entropy = (-labels * np.log(softmax)).sum(
axis=1, keepdims=True).astype("float32") axis=1, keepdims=True).astype("float64")
self.inputs = {"Logits": logits, "Label": labels} self.inputs = {"Logits": logits, "Label": labels}
self.outputs = { self.outputs = {
"Softmax": softmax.astype('float32'), "Softmax": softmax.astype("float64"),
"Loss": cross_entropy.astype('float32') "Loss": cross_entropy.astype("float64")
} }
self.attrs = {"soft_label": True} self.attrs = {"soft_label": True}
...@@ -69,9 +69,8 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest): ...@@ -69,9 +69,8 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
self.check_grad(["Logits"], "Loss", max_relative_error=0.05) self.check_grad(["Logits"], "Loss")
if __name__ == "__main__": if __name__ == "__main__":
exit(0) # FIXME: xe has bug
unittest.main() unittest.main()
...@@ -4,7 +4,7 @@ import paddle.v2.framework.nets as nets ...@@ -4,7 +4,7 @@ import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program, g_init_program from paddle.v2.framework.framework import Program, g_main_program, g_startup_program
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
...@@ -70,7 +70,7 @@ def main(): ...@@ -70,7 +70,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_init_program) exe.run(g_startup_program)
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
...@@ -82,7 +82,7 @@ def main(): ...@@ -82,7 +82,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_program, outs = exe.run(g_main_program,
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
import unittest import unittest
from paddle.v2.framework.framework import Variable, g_program, Program from paddle.v2.framework.framework import Variable, g_main_program, Program
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import numpy as np import numpy as np
...@@ -18,7 +18,7 @@ class TestVariable(unittest.TestCase): ...@@ -18,7 +18,7 @@ class TestVariable(unittest.TestCase):
self.assertRaises(ValueError, lambda: convert("int8")) self.assertRaises(ValueError, lambda: convert("int8"))
def test_var(self): def test_var(self):
b = g_program.current_block() b = g_main_program.current_block()
w = b.create_var( w = b.create_var(
dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") dtype="float64", shape=[784, 100], lod_level=0, name="fc.w")
self.assertNotEqual(str(w), "") self.assertNotEqual(str(w), "")
......
...@@ -3,13 +3,13 @@ import paddle.v2.framework.layers as layers ...@@ -3,13 +3,13 @@ import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_program from paddle.v2.framework.framework import Program, g_main_program
from paddle.v2.framework.executor import Executor from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
init_program = Program() startup_program = Program()
program = Program() main_program = Program()
embed_size = 32 embed_size = 32
hidden_size = 256 hidden_size = 256
...@@ -24,32 +24,32 @@ first_word = layers.data( ...@@ -24,32 +24,32 @@ first_word = layers.data(
name='firstw', name='firstw',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
second_word = layers.data( second_word = layers.data(
name='secondw', name='secondw',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
third_word = layers.data( third_word = layers.data(
name='thirdw', name='thirdw',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
forth_word = layers.data( forth_word = layers.data(
name='forthw', name='forthw',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
next_word = layers.data( next_word = layers.data(
name='nextw', name='nextw',
shape=[1], shape=[1],
data_type='int64', data_type='int64',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
embed_first = layers.embedding( embed_first = layers.embedding(
input=first_word, input=first_word,
...@@ -57,16 +57,16 @@ embed_first = layers.embedding( ...@@ -57,16 +57,16 @@ embed_first = layers.embedding(
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=is_sparse,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
embed_second = layers.embedding( embed_second = layers.embedding(
input=second_word, input=second_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=is_sparse,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
embed_third = layers.embedding( embed_third = layers.embedding(
input=third_word, input=third_word,
...@@ -74,42 +74,43 @@ embed_third = layers.embedding( ...@@ -74,42 +74,43 @@ embed_third = layers.embedding(
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=is_sparse,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
embed_forth = layers.embedding( embed_forth = layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, embed_size], size=[dict_size, embed_size],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=is_sparse,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'},
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], input=[embed_first, embed_second, embed_third, embed_forth],
axis=1, axis=1,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
hidden1 = layers.fc(input=concat_embed, hidden1 = layers.fc(input=concat_embed,
size=hidden_size, size=hidden_size,
act='sigmoid', act='sigmoid',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
predict_word = layers.fc(input=hidden1, predict_word = layers.fc(input=hidden1,
size=dict_size, size=dict_size,
act='softmax', act='softmax',
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict_word, input=predict_word,
label=next_word, label=next_word,
program=program, main_program=main_program,
init_program=init_program) startup_program=startup_program)
avg_cost = layers.mean(x=cost, program=program, init_program=init_program) avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, init_program) opts = sgd_optimizer.minimize(avg_cost, startup_program)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size) paddle.dataset.imikolov.train(word_dict, N), batch_size)
...@@ -117,7 +118,7 @@ train_reader = paddle.batch( ...@@ -117,7 +118,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(init_program, feed={}, fetch_list=[]) exe.run(startup_program, feed={}, fetch_list=[])
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
...@@ -145,7 +146,7 @@ for pass_id in range(PASS_NUM): ...@@ -145,7 +146,7 @@ for pass_id in range(PASS_NUM):
next_tensor = core.LoDTensor() next_tensor = core.LoDTensor()
next_tensor.set(next_data, place) next_tensor.set(next_data, place)
outs = exe.run(program, outs = exe.run(main_program,
feed={ feed={
'firstw': first_tensor, 'firstw': first_tensor,
'secondw': second_tensor, 'secondw': second_tensor,
......
...@@ -11,11 +11,6 @@ ...@@ -11,11 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Optimizers(update equation) for SGD method.
TODO(yuyang18): Complete comments.
"""
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.trainer_config_helpers.optimizers as v1_optimizers import paddle.trainer_config_helpers.optimizers as v1_optimizers
...@@ -101,32 +96,37 @@ class Optimizer(object): ...@@ -101,32 +96,37 @@ class Optimizer(object):
class Momentum(Optimizer): class Momentum(Optimizer):
""" """
SGD Optimizer. Momentum Optimizer.
SGD is an optimization method, trying to find a neural network that
minimize the "cost/error" of it by iteration. In paddle's implementation
SGD Optimizer is synchronized, which means all gradients will be wait to
calculate and reduced into one gradient, then do optimize operation.
The neural network consider the learning problem of minimizing an objective When sparse=False, the momentum update formula is as follows:
function, that has the form of a sum
.. math:: .. math::
Q(w) = \\sum_{i}^{n} Q_i(w) v_{t} &= k * v_{t-1} - \\gamma_t / (g_{t} + \\lambda w_{t-1}) \\\\
w_{t} &= w_{t-1} + v_{t} \\\\
The value of function Q sometimes is the cost of neural network (Mean where, :math:`k` is momentum, :math:`\\lambda` is decay rate,
Square Error between prediction and label for example). The function Q is :math:`\\gamma_t` is learning rate at the t'th iteration.
parametrised by w, the weight/bias of neural network. And weights is what to :math:`w_{t}` is the weight as the t'th iteration.
be learned. The i is the i-th observation in (trainning) data. And the :math:`v_{t}` is the history momentum variable.
So, the SGD method will optimize the weight by When sparse=True, the update scheme:
.. math:: .. math::
w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w) \\alpha_t &= \\alpha_{t-1} / k \\\\
\\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\
u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\
v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\
\\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t
where :math:`k` is momentum, :math:`\\lambda` is decay rate,
:math:`\\gamma_t` is learning rate at the t'th iteration.
where :math:`\\eta` is learning rate. And :math:`n` is batch size. :param momentum: the momentum factor.
:type momentum: float
:param sparse: with sparse support or not, False by default.
:type sparse: bool
""" """
def __init__(self, momentum=None, sparse=False, **kwargs): def __init__(self, momentum=None, sparse=False, **kwargs):
...@@ -146,7 +146,7 @@ class Adam(Optimizer): ...@@ -146,7 +146,7 @@ class Adam(Optimizer):
m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\ m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\
v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\ v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\
w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}} w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}}
:param beta1: the :math:`\\beta_1` in equation. :param beta1: the :math:`\\beta_1` in equation.
:type beta1: float :type beta1: float
......
from setuptools import setup, Distribution from setuptools import setup, Distribution, Extension
class BinaryDistribution(Distribution): class BinaryDistribution(Distribution):
def has_ext_modules(foo): def has_ext_modules(foo):
return True return True
...@@ -41,6 +41,7 @@ setup(name='paddlepaddle', ...@@ -41,6 +41,7 @@ setup(name='paddlepaddle',
description='Parallel Distributed Deep Learning', description='Parallel Distributed Deep Learning',
install_requires=setup_requires, install_requires=setup_requires,
packages=packages, packages=packages,
ext_modules=[Extension('_foo', ['stub.cc'])],
package_data={ package_data={
'paddle.v2.master': ['libpaddle_master.so'], 'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so'], 'paddle.v2.framework': ['core.so'],
...@@ -54,6 +55,5 @@ setup(name='paddlepaddle', ...@@ -54,6 +55,5 @@ setup(name='paddlepaddle',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
}, },
scripts=paddle_bins, scripts=paddle_bins,
distclass=BinaryDistribution,
data_files=[(paddle_rt_lib_dir, paddle_rt_libs)] data_files=[(paddle_rt_lib_dir, paddle_rt_libs)]
) )
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册