提交 acc32788 编写于 作者: C chengduoZH

remove conflict

...@@ -126,7 +126,7 @@ include(external/swig) # download, build, install swig ...@@ -126,7 +126,7 @@ include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/nccl) include(external/nccl)
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
......
...@@ -79,9 +79,8 @@ if(NOT DEFINED IOS_ARCH) ...@@ -79,9 +79,8 @@ if(NOT DEFINED IOS_ARCH)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future # FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set(IOS_ARCH "arm64") set(IOS_ARCH "arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR") elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
set(IOS_ARCH "i386;x86_64") # FIXME(liuyiqun): support "i386;x86_64" future
elseif(IOS_PLATFORM STREQUAL "WATCHOS") set(IOS_ARCH "x86_64")
set(IOS_ARCH armv7k)
endif() endif()
endif() endif()
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_GPU)
return()
endif()
include(ExternalProject) include(ExternalProject)
set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl) set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......
INCLUDE(ExternalProject) # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SET(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind) if(NOT WITH_PYTHON)
return()
endif()
include(ExternalProject)
INCLUDE_DIRECTORIES(${PYBIND_SOURCE_DIR}/src/extern_pybind/include) set(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind)
include_directories(${PYBIND_SOURCE_DIR}/src/extern_pybind/include)
ExternalProject_Add( ExternalProject_Add(
extern_pybind extern_pybind
...@@ -17,14 +35,12 @@ ExternalProject_Add( ...@@ -17,14 +35,12 @@ ExternalProject_Add(
TEST_COMMAND "" TEST_COMMAND ""
) )
if (${CMAKE_VERSION} VERSION_LESS "3.3.0") if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c) set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") file(WRITE ${dummyfile} "const char * dummy_pybind = \"${dummyfile}\";")
add_library(pybind STATIC ${dummyfile}) add_library(pybind STATIC ${dummyfile})
else() else()
add_library(pybind INTERFACE) add_library(pybind INTERFACE)
endif() endif()
add_dependencies(pybind extern_pybind) add_dependencies(pybind extern_pybind)
LIST(APPEND external_project_dependencies pybind)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......
# This file is use to check all support level of AVX on your machine # This file is use to check all support level of AVX on your machine
# so that PaddlePaddle can unleash the vectorization power of muticore. # so that PaddlePaddle can unleash the vectorization power of muticore.
INCLUDE(CheckCXXSourceRuns) include(CheckCXXSourceRuns)
INCLUDE(CheckCXXSourceCompiles) include(CheckCXXSourceCompiles)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(MMX_FLAG "-mmmx") set(MMX_FLAG "-mmmx")
set(SSE2_FLAG "-msse2") set(SSE2_FLAG "-msse2")
set(SSE3_FLAG "-msse3") set(SSE3_FLAG "-msse3")
SET(AVX_FLAG "-mavx") set(AVX_FLAG "-mavx")
SET(AVX2_FLAG "-mavx2") set(AVX2_FLAG "-mavx2")
ELSEIF(MSVC) elseif(MSVC)
set(MMX_FLAG "/arch:MMX") set(MMX_FLAG "/arch:MMX")
set(SSE2_FLAG "/arch:SSE2") set(SSE2_FLAG "/arch:SSE2")
set(SSE3_FLAG "/arch:SSE3") set(SSE3_FLAG "/arch:SSE3")
SET(AVX_FLAG "/arch:AVX") SET(AVX_FLAG "/arch:AVX")
SET(AVX2_FLAG "/arch:AVX2") SET(AVX2_FLAG "/arch:AVX2")
ENDIF() endif()
set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS})
# Check MMX # Check MMX
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG}) set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
set(MMX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <mmintrin.h> #include <mmintrin.h>
int main() int main()
...@@ -32,6 +33,7 @@ int main() ...@@ -32,6 +33,7 @@ int main()
# Check SSE2 # Check SSE2
set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG}) set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
set(SSE2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <emmintrin.h> #include <emmintrin.h>
int main() int main()
...@@ -42,6 +44,7 @@ int main() ...@@ -42,6 +44,7 @@ int main()
# Check SSE3 # Check SSE3
set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG}) set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
set(SSE3_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <pmmintrin.h> #include <pmmintrin.h>
int main() int main()
...@@ -55,6 +58,7 @@ int main() ...@@ -55,6 +58,7 @@ int main()
# Check AVX # Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
...@@ -67,6 +71,7 @@ int main() ...@@ -67,6 +71,7 @@ int main()
# Check AVX 2 # Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE)
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
......
...@@ -145,7 +145,7 @@ PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以 ...@@ -145,7 +145,7 @@ PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以
Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。 Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。
PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Nodebook。 PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Notebook。
如果您想要更深入了解deep learning,PaddlePaddle Book一定是您最好的选择。 如果您想要更深入了解deep learning,PaddlePaddle Book一定是您最好的选择。
我们提供可以直接运行PaddlePaddle Book的Docker镜像,直接运行: 我们提供可以直接运行PaddlePaddle Book的Docker镜像,直接运行:
......
...@@ -29,32 +29,32 @@ add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER} ...@@ -29,32 +29,32 @@ add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER}
add_dependencies(paddle_capi paddle_proto) add_dependencies(paddle_capi paddle_proto)
# TODO: paddle_capi_whole will be removed. # TODO: paddle_capi_whole will be removed.
set(PADDLE_CAPI_LAYERS_LIBS
paddle_function
paddle_gserver)
if(MOBILE_INFERENCE) if(MOBILE_INFERENCE)
set(PADDLE_CAPI_INFER_LIBS set(PADDLE_CAPI_ENGINE_LIBS
paddle_utils paddle_utils
paddle_parameter paddle_parameter
paddle_math paddle_math
paddle_cuda paddle_cuda
paddle_function paddle_proto)
paddle_gserver
paddle_proto)
else() else()
set(PADDLE_CAPI_INFER_LIBS set(PADDLE_CAPI_ENGINE_LIBS
paddle_utils paddle_utils
paddle_parameter paddle_parameter
paddle_math paddle_math
paddle_cuda paddle_cuda
paddle_function paddle_proto
paddle_gserver paddle_pserver
paddle_proto paddle_network)
paddle_pserver
paddle_network)
endif() endif()
set(PADDLE_CAPI_INFER_LIBS ${PADDLE_CAPI_LAYERS_LIBS} ${PADDLE_CAPI_ENGINE_LIBS})
cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS}) cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS})
# Link the static library for inference # Link the static library for inference
cc_library(paddle_capi_engine DEPS paddle_capi paddle_utils paddle_parameter paddle_math paddle_cuda paddle_proto) cc_library(paddle_capi_engine DEPS paddle_capi ${PADDLE_CAPI_ENGINE_LIBS})
cc_library(paddle_capi_layers DEPS paddle_function paddle_gserver) cc_library(paddle_capi_layers DEPS ${PADDLE_CAPI_LAYERS_LIBS})
# Link the shared library for inference # Link the shared library for inference
if(NOT IOS) if(NOT IOS)
......
...@@ -23,6 +23,7 @@ limitations under the License. */ ...@@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
...@@ -73,6 +74,8 @@ static void CreateTensor(Variable* var, VarDesc::VarType var_type) { ...@@ -73,6 +74,8 @@ static void CreateTensor(Variable* var, VarDesc::VarType var_type) {
var->GetMutable<std::vector<framework::Scope>>(); var->GetMutable<std::vector<framework::Scope>>();
} else if (var_type == VarDesc::LOD_RANK_TABLE) { } else if (var_type == VarDesc::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>(); var->GetMutable<LoDRankTable>();
} else if (var_type == VarDesc::LOD_TENSOR_ARRAY) {
var->GetMutable<LoDTensorArray>();
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"Variable type %d is not in " "Variable type %d is not in "
......
...@@ -109,6 +109,11 @@ message LoDTensorDesc { ...@@ -109,6 +109,11 @@ message LoDTensorDesc {
optional int32 lod_level = 2 [ default = 0 ]; optional int32 lod_level = 2 [ default = 0 ];
} }
message LoDTensorArrayDesc {
required TensorDesc tensor = 1;
optional int32 lod_level = 2 [ default = 0 ];
}
message VarDesc { message VarDesc {
enum VarType { enum VarType {
LOD_TENSOR = 1; LOD_TENSOR = 1;
...@@ -117,11 +122,13 @@ message VarDesc { ...@@ -117,11 +122,13 @@ message VarDesc {
FETCH_LIST = 4; FETCH_LIST = 4;
STEP_SCOPES = 5; STEP_SCOPES = 5;
LOD_RANK_TABLE = 6; LOD_RANK_TABLE = 6;
LOD_TENSOR_ARRAY = 7;
} }
required string name = 1; required string name = 1;
required VarType type = 2; required VarType type = 2;
optional LoDTensorDesc lod_tensor = 3; optional LoDTensorDesc lod_tensor = 3;
optional TensorDesc selected_rows = 4; optional TensorDesc selected_rows = 4;
optional LoDTensorArrayDesc tensor_array = 6;
optional bool persistable = 5 [ default = false ]; optional bool persistable = 5 [ default = false ];
} }
......
...@@ -33,10 +33,15 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) { ...@@ -33,10 +33,15 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
item.length = vec[i + 1] - vec[i]; item.length = vec[i + 1] - vec[i];
items_.emplace_back(item); items_.emplace_back(item);
} }
std::sort(items_.begin(), items_.end(), // NOTE(yuyang18):
[](const TableItem& a, const TableItem& b) { //
return a.length > b.length; // The time complexity of stable_sort is O(N*log(N)) if additional memory is
}); // available. It is easy to debug and unit test when using `stable_sort`
// instead of `sort`. Also, the items of a rank table will not be too large.
std::stable_sort(items_.begin(), items_.end(),
[](const TableItem& a, const TableItem& b) {
return a.length > b.length;
});
} }
} // namespace framework } // namespace framework
......
...@@ -135,5 +135,43 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, ...@@ -135,5 +135,43 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty."); PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty.");
ShareDataWith(Slice(begin, end)); ShareDataWith(Slice(begin, end));
} }
void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx,
std::vector<std::vector<size_t>>* lod_length,
size_t* start_offset) {
lod_length->clear();
PADDLE_ENFORCE(start_idx < lod.size() - 1,
"start_idx should be >= 0 and < lod.size() - 1.");
PADDLE_ENFORCE(end_idx < lod.size(),
"end_idx should be >= 0 and < lod.size().");
PADDLE_ENFORCE_LE(start_idx, end_idx,
"start_idx should be less than end_idx.");
for (size_t level_idx = 0; level_idx < lod.size(); ++level_idx) {
std::vector<size_t> level_lens;
for (size_t i = start_idx; i < end_idx; ++i) {
level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
}
lod_length->emplace_back(level_lens);
start_idx = lod[level_idx][start_idx];
end_idx = lod[level_idx][end_idx];
}
*start_offset = start_idx;
}
void AppendLoD(LoD* lod, const std::vector<std::vector<size_t>>& lod_length) {
PADDLE_ENFORCE_EQ(
lod->size(), lod_length.size(),
"The lod_length should has the same size with the appended lod.");
for (size_t i = 0; i < lod->size(); ++i) {
auto& level = (*lod)[i];
if (level.empty()) {
level.push_back(0);
}
for (size_t len : lod_length[i]) {
level.push_back(level.back() + len);
}
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -181,5 +181,11 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, ...@@ -181,5 +181,11 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
return tensor; return tensor;
} }
void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx,
std::vector<std::vector<size_t>>* lod_length,
size_t* start_offset);
void AppendLoD(LoD* lod, const std::vector<std::vector<size_t>>& lod_length);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/framework/lod_tensor.h"
namespace paddle {
namespace framework {
using LoDTensorArray = std::vector<LoDTensor>;
}
} // namespace paddle
...@@ -144,5 +144,47 @@ TEST(LodExpand, test) { ...@@ -144,5 +144,47 @@ TEST(LodExpand, test) {
} }
} }
TEST(LoD, GetFineGrainedLoDLength) {
LoD lod;
lod.push_back(std::vector<size_t>{0, 2, 4, 5});
lod.push_back(std::vector<size_t>{0, 1, 6, 8, 10, 11});
lod.push_back(
std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26, 29});
std::vector<std::vector<size_t>> lod_length;
size_t start_offset;
paddle::framework::GetFineGrainedLoDLength(lod, 1, 2, &lod_length,
&start_offset);
std::vector<std::vector<size_t>> expected;
expected.push_back(std::vector<size_t>{2});
expected.push_back(std::vector<size_t>{2, 2});
expected.push_back(std::vector<size_t>{2, 3, 4, 2});
EXPECT_EQ(lod_length, expected);
EXPECT_EQ(start_offset, 15UL);
}
TEST(LoD, AppendLoD) {
std::vector<std::vector<size_t>> lod_lens;
lod_lens.push_back(std::vector<size_t>{2});
lod_lens.push_back(std::vector<size_t>{2, 2});
lod_lens.push_back(std::vector<size_t>{2, 3, 4, 2});
LoD origin;
origin.push_back(std::vector<size_t>{0, 2});
origin.push_back(std::vector<size_t>{0, 1, 6});
origin.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15});
paddle::framework::AppendLoD(&origin, lod_lens);
LoD expected;
expected.push_back(std::vector<size_t>{0, 2, 4});
expected.push_back(std::vector<size_t>{0, 1, 6, 8, 10});
expected.push_back(
std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20, 24, 26});
EXPECT_EQ(origin, expected);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -37,13 +37,27 @@ std::vector<int64_t> VarDescBind::Shape() const { ...@@ -37,13 +37,27 @@ std::vector<int64_t> VarDescBind::Shape() const {
DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); } DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); }
void VarDescBind::SetLoDLevel(int32_t lod_level) { void VarDescBind::SetLoDLevel(int32_t lod_level) {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); switch (desc_.type()) {
desc_.mutable_lod_tensor()->set_lod_level(lod_level); case VarDesc::LOD_TENSOR:
desc_.mutable_lod_tensor()->set_lod_level(lod_level);
break;
case VarDesc::LOD_TENSOR_ARRAY:
desc_.mutable_tensor_array()->set_lod_level(lod_level);
break;
default:
PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type());
}
} }
int32_t VarDescBind::GetLodLevel() const { int32_t VarDescBind::GetLodLevel() const {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); switch (desc_.type()) {
return desc_.lod_tensor().lod_level(); case VarDesc::LOD_TENSOR:
return desc_.lod_tensor().lod_level();
case VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().lod_level();
default:
PADDLE_THROW("Tensor type=%d does not support LoDLevel", desc_.type());
}
} }
const TensorDesc &VarDescBind::tensor_desc() const { const TensorDesc &VarDescBind::tensor_desc() const {
...@@ -53,6 +67,8 @@ const TensorDesc &VarDescBind::tensor_desc() const { ...@@ -53,6 +67,8 @@ const TensorDesc &VarDescBind::tensor_desc() const {
return desc_.selected_rows(); return desc_.selected_rows();
case VarDesc::LOD_TENSOR: case VarDesc::LOD_TENSOR:
return desc_.lod_tensor().tensor(); return desc_.lod_tensor().tensor();
case VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("Unexpected branch.");
} }
...@@ -66,6 +82,8 @@ TensorDesc *VarDescBind::mutable_tensor_desc() { ...@@ -66,6 +82,8 @@ TensorDesc *VarDescBind::mutable_tensor_desc() {
return desc_.mutable_selected_rows(); return desc_.mutable_selected_rows();
case VarDesc::LOD_TENSOR: case VarDesc::LOD_TENSOR:
return desc_.mutable_lod_tensor()->mutable_tensor(); return desc_.mutable_lod_tensor()->mutable_tensor();
case VarDesc::LOD_TENSOR_ARRAY:
return desc_.mutable_tensor_array()->mutable_tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("Unexpected branch.");
} }
......
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MKLDNNAddtoLayer.h"
using namespace mkldnn; // NOLINT
namespace paddle {
REGISTER_LAYER(mkldnn_addto, MKLDNNAddtoLayer);
bool MKLDNNAddtoLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
if (!MKLDNNLayer::init(layerMap, parameterMap)) {
return false;
}
layerSize_ = getSize();
for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(layerSize_, inputLayers_[i]->getSize()) << "input size must equal";
}
if (biasParameter_.get() != NULL) {
biases_ =
std::unique_ptr<Weight>(new Weight(1, layerSize_, biasParameter_, 0));
}
return true;
}
void MKLDNNAddtoLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed";
reshapeInput(bs, ih, iw);
ic = inputLayers_[0]->getSize() / ih / iw;
CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize());
CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw);
for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize());
CHECK_EQ(layerSize_, inputLayers_[i]->getSize());
}
oc = ic;
oh = ih;
ow = iw;
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
if (biases_) {
LOG(FATAL) << "not implemented yet";
}
resetFwdBuffers(inVals_, out);
in = inVals_[0];
std::shared_ptr<sum::primitive_desc> fwdPD;
resetFwdPD(fwdPD, inVals_, out);
resetFwdPipeline(pipeline, fwdPD, inVals_, out);
}
void MKLDNNAddtoLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
resetBwdBuffers(inGrads_, out);
in = inGrads_[0];
// backward only need share output grad to input grad
for (size_t i = 0; i < inGrads_.size(); i++) {
if (inGrads_[i] != nullptr) {
inGrads_[i] = out;
inputLayers_[i]->getOutputGrad()->setData(inGrads_[i]->getData());
}
}
}
void MKLDNNAddtoLayer::updateWeights(const UpdateCallback& callback) {
if (biases_ && biases_->getWGrad()) {
biases_->getParameterPtr()->incUpdate(callback);
}
}
void MKLDNNAddtoLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) {
resetInValue(inputs[i], nullptr, i);
CHECK(inputs[i]);
inputs[i]->downSpatial();
}
for (size_t i = 1; i < inputs.size(); i++) {
CHECK_PRIMITIVE_DESC_EQ(inputs[i], inputs[0]->getPrimitiveDesc());
}
resetOutValue(out, inputs[0]->getPrimitiveDesc());
}
void MKLDNNAddtoLayer::resetFwdPD(std::shared_ptr<sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr out) {
std::vector<double> scales(inputs.size(), 1.0);
std::vector<memory::primitive_desc> srcPDs;
for (size_t i = 0; i < inputs.size(); i++) {
srcPDs.push_back(inputs[i]->getPrimitiveDesc());
}
CHECK(out);
pd.reset(new sum::primitive_desc(out->getMemoryDesc(), scales, srcPDs));
CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc());
}
void MKLDNNAddtoLayer::resetFwdPipeline(
std::vector<primitive>& pipeline,
std::shared_ptr<sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
std::vector<primitive::at> srcs;
for (size_t i = 0; i < inputs.size(); i++) {
srcs.push_back(*(inputs[i]));
}
fwd_.reset(new sum(*pd, srcs, *out));
pipeline.push_back(*fwd_);
}
void MKLDNNAddtoLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out) {
CHECK(outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc());
CHECK(out);
inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) {
resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i);
CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc());
}
}
} // namespace paddle
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "MKLDNNLayer.h"
#include "mkldnn.hpp"
namespace paddle {
/**
* @brief A subclass of MKLDNNLayer Addto layer.
*
* The config file api is mkldnn_addto
*/
class MKLDNNAddtoLayer : public MKLDNNLayer {
protected:
std::vector<MKLDNNMatrixPtr> inVals_;
std::vector<MKLDNNMatrixPtr> inGrads_;
// layer size == ic * ih * iw == oc * oh *ow, and can not be changed
size_t layerSize_;
// TODO(TJ): this part has not been optimized by MKL-DNN
std::unique_ptr<Weight> biases_;
public:
explicit MKLDNNAddtoLayer(const LayerConfig& config) : MKLDNNLayer(config) {}
~MKLDNNAddtoLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override;
void printValueFormat() override {
for (size_t i = 0; i < inVals_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>";
}
if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
}
if (extOutVal_) {
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
}
}
void printGradFormat() override {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< ";
}
for (size_t i = 0; i < inGrads_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inGrads_[i]->getFormat() << "<<<";
}
}
protected:
/**
* Forward functions: reset buffers(inputs, output, bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<mkldnn::sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr out);
void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<mkldnn::sum::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(inputs, output, bias)
*/
void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out);
};
} // namespace paddle
...@@ -77,7 +77,7 @@ void MKLDNNLayer::forward(PassType passType) { ...@@ -77,7 +77,7 @@ void MKLDNNLayer::forward(PassType passType) {
needResetBwd_ = true; needResetBwd_ = true;
} }
if (inputLayers_[0]->getType() == "data") { if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) {
// Update input value data when input layer is "data" type, // Update input value data when input layer is "data" type,
// since the input value data address might be changed. // since the input value data address might be changed.
CHECK(extInVal_); CHECK(extInVal_);
...@@ -171,14 +171,16 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, ...@@ -171,14 +171,16 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn,
} }
void MKLDNNLayer::resetInValue( void MKLDNNLayer::resetInValue(
MKLDNNMatrixPtr& in, const std::shared_ptr<memory::primitive_desc>& intPD) { MKLDNNMatrixPtr& in,
const std::shared_ptr<memory::primitive_desc>& intPD,
size_t inputIdx) {
cvtInVal_ = nullptr; cvtInVal_ = nullptr;
extInVal_ = nullptr; extInVal_ = nullptr;
in = nullptr; in = nullptr;
CHECK_GT(bs_ * ic_ * ih_ * iw_, 0); CHECK_GT(bs_ * ic_ * ih_ * iw_, 0);
auto extPD = MKLDNNMatrix::createPrimitiveDesc( auto extPD = MKLDNNMatrix::createPrimitiveDesc(
{bs_, ic_, ih_, iw_}, format::nchw, engine_); {bs_, ic_, ih_, iw_}, format::nchw, engine_);
const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue();
in = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat); in = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
CHECK_EQ(inputIsOnlyMKLDNN(), in != nullptr); CHECK_EQ(inputIsOnlyMKLDNN(), in != nullptr);
if (in == nullptr || in->getFormat() == format::nc) { if (in == nullptr || in->getFormat() == format::nc) {
...@@ -216,11 +218,12 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, ...@@ -216,11 +218,12 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
} }
void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
memory::primitive_desc intPD) { memory::primitive_desc intPD,
size_t inputIdx) {
cvtInGrad_ = nullptr; cvtInGrad_ = nullptr;
extInGrad_ = nullptr; extInGrad_ = nullptr;
in = nullptr; in = nullptr;
LayerPtr& input = inputLayers_[0]; LayerPtr& input = inputLayers_[inputIdx];
if (input->getOutputGrad() == nullptr) { if (input->getOutputGrad() == nullptr) {
// no need input grad // no need input grad
return; return;
...@@ -245,7 +248,6 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, ...@@ -245,7 +248,6 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
return; return;
} }
// need create reorder // need create reorder
// TODO(TJ): add macro definition to simplify it
CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat())) CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat()))
<< "should have external input value and the format must be nchw(nc)"; << "should have external input value and the format must be nchw(nc)";
extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat); extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat);
......
...@@ -199,7 +199,8 @@ protected: ...@@ -199,7 +199,8 @@ protected:
*/ */
void resetInValue( void resetInValue(
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr); const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr,
size_t inputIdx = 0);
/** /**
* reset output value from internal primitive desc. * reset output value from internal primitive desc.
...@@ -212,7 +213,9 @@ protected: ...@@ -212,7 +213,9 @@ protected:
* reset input grad from internal primitive desc. * reset input grad from internal primitive desc.
* reset both internal and external buffer and create reorder if necessary. * reset both internal and external buffer and create reorder if necessary.
*/ */
void resetInGrad(MKLDNNMatrixPtr& in, mkldnn::memory::primitive_desc intPD); void resetInGrad(MKLDNNMatrixPtr& in,
mkldnn::memory::primitive_desc intPD,
size_t inputIdx = 0);
/** /**
* reset output grad from internal primitive desc. * reset output grad from internal primitive desc.
......
...@@ -98,8 +98,19 @@ void SubSequenceLayer::forward(PassType passType) { ...@@ -98,8 +98,19 @@ void SubSequenceLayer::forward(PassType passType) {
CHECK_EQ(numSequences2, numSequences3); CHECK_EQ(numSequences2, numSequences3);
MatrixPtr inputValue = input.value; MatrixPtr inputValue = input.value;
IVectorPtr offsetValue = offsetSeq.ids; IVectorPtr offsetValue;
IVectorPtr sizeValue = sizeSeq.ids; IVectorPtr sizeValue;
if (useGpu_) {
// copy to cpu
IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false);
IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false);
offsetValue->copyFrom(*offsetSeq.ids);
sizeValue->copyFrom(*sizeSeq.ids);
} else {
offsetValue = offsetSeq.ids;
sizeValue = sizeSeq.ids;
}
CHECK_EQ(offsetValue->getSize(), numSequences1); CHECK_EQ(offsetValue->getSize(), numSequences1);
CHECK_EQ(sizeValue->getSize(), numSequences1); CHECK_EQ(sizeValue->getSize(), numSequences1);
...@@ -176,8 +187,21 @@ void SubSequenceLayer::backward(const UpdateCallback& callback) { ...@@ -176,8 +187,21 @@ void SubSequenceLayer::backward(const UpdateCallback& callback) {
size_t numSequences1 = startPositions1->getSize() - 1; size_t numSequences1 = startPositions1->getSize() - 1;
const int* starts1 = startPositions1->getData(); const int* starts1 = startPositions1->getData();
IVectorPtr offsetValue = getInput(1).ids; const Argument& offsetSeq = getInput(1);
IVectorPtr sizeValue = getInput(2).ids; const Argument& sizeSeq = getInput(2);
IVectorPtr offsetValue;
IVectorPtr sizeValue;
if (useGpu_) {
// copy to cpu
IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false);
IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false);
offsetValue->copyFrom(*offsetSeq.ids);
sizeValue->copyFrom(*sizeSeq.ids);
} else {
offsetValue = offsetSeq.ids;
sizeValue = sizeSeq.ids;
}
int* offsets = offsetValue->getData(); int* offsets = offsetValue->getData();
int* sizes = sizeValue->getData(); int* sizes = sizeValue->getData();
......
...@@ -132,7 +132,7 @@ void MKLDNNTester::checkForward() { ...@@ -132,7 +132,7 @@ void MKLDNNTester::checkForward() {
VLOG(MKLDNN_TESTS) << "Check Forward"; VLOG(MKLDNN_TESTS) << "Check Forward";
printTopDatas(); printTopDatas();
double delta = double delta =
compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue()); compareMatrix(refLayer_->getOutputValue(), dnnLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
...@@ -147,7 +147,7 @@ void MKLDNNTester::checkBackwardData() { ...@@ -147,7 +147,7 @@ void MKLDNNTester::checkBackwardData() {
VLOG(MKLDNN_ALL) << "Reference Backward Result: InputGrad " << i; VLOG(MKLDNN_ALL) << "Reference Backward Result: InputGrad " << i;
printMatrix(refDiff); printMatrix(refDiff);
double delta = compareMatrix(dnnDiff, refDiff); double delta = compareMatrix(refDiff, dnnDiff);
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
if (isBN) { if (isBN) {
// the other two inputs in batch norm are for moving mean and var // the other two inputs in batch norm are for moving mean and var
...@@ -177,7 +177,7 @@ void MKLDNNTester::checkBackwardWgts() { ...@@ -177,7 +177,7 @@ void MKLDNNTester::checkBackwardWgts() {
<< parameters_[REF][i]->getName(); << parameters_[REF][i]->getName();
printVector(ref); printVector(ref);
double delta = compareVector(dnn, ref); double delta = compareVector(ref, dnn);
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
......
...@@ -271,20 +271,53 @@ TEST(MKLDNNLayer, BatchNormLayer) { ...@@ -271,20 +271,53 @@ TEST(MKLDNNLayer, BatchNormLayer) {
testBatchNormLayer({16, 32, 16, 16}); testBatchNormLayer({16, 32, 16, 16});
} }
struct testActDesc { struct testImageDesc {
int bs, ic, ih, iw; int bs, ic, ih, iw;
}; };
static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) { static void getAddtoConfig(TestConfig& cfg,
const testImageDesc& pm,
const size_t nInputs = 1) {
cfg.biasSize = 0; cfg.biasSize = 0;
cfg.layerConfig.set_type("addto"); cfg.layerConfig.set_type("addto");
size_t layerSize = pm.ic * pm.ih * pm.iw; size_t layerSize = pm.ic * pm.ih * pm.iw;
cfg.layerConfig.set_size(layerSize); cfg.layerConfig.set_size(layerSize);
cfg.inputDefs.push_back({INPUT_DATA, "layer_0", layerSize, 0}); cfg.layerConfig.set_active_type("relu");
cfg.layerConfig.add_inputs(); for (size_t i = 0; i < nInputs; ++i) {
std::stringstream ss;
ss << "layer_" << i;
cfg.inputDefs.push_back({INPUT_DATA, ss.str(), layerSize, 0});
LayerInputConfig* input = cfg.layerConfig.add_inputs();
ImageConfig* img_conf = input->mutable_image_conf();
img_conf->set_channels(pm.ic);
img_conf->set_img_size_y(pm.ih);
img_conf->set_img_size(pm.iw);
}
}
void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) {
CHECK_GE(nInputs, 1);
TestConfig dnnConfig;
getAddtoConfig(dnnConfig, pm, nInputs);
dnnConfig.layerConfig.set_type("mkldnn_addto");
// TODO(TJ): test with bias
for (auto withBias : {false}) {
if (withBias) {
dnnConfig.biasSize = pm.ic * pm.ih * pm.iw;
} else {
dnnConfig.biasSize = 0;
}
RUN_MKLDNN_TEST_LAYER(dnnConfig, "addto", pm)
}
}
TEST(MKLDNNLayer, AddtoLayer) {
testAddtoLayer({16, 5, 14, 14}, 1);
testAddtoLayer({8, 10, 8, 8}, 2);
testAddtoLayer({4, 12, 1, 1}, 3);
} }
void testActivation(std::string actType, const testActDesc& pm) { void testActivation(std::string actType, const testImageDesc& pm) {
// TODO(TJ): remove me when paddle support elu activation // TODO(TJ): remove me when paddle support elu activation
if (actType == "mkldnn_elu") { if (actType == "mkldnn_elu") {
return; return;
......
...@@ -150,8 +150,10 @@ set(DEPS_OPS ...@@ -150,8 +150,10 @@ set(DEPS_OPS
conv_transpose_op conv_transpose_op
nccl_op nccl_op
sequence_conv_op sequence_conv_op
sequence_pool_op
lod_rank_table_op lod_rank_table_op
lstm_op) lstm_op
gru_op)
op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
...@@ -164,8 +166,10 @@ if(WITH_GPU) ...@@ -164,8 +166,10 @@ if(WITH_GPU)
op_library(nccl_op DEPS nccl_common) op_library(nccl_op DEPS nccl_common)
endif() endif()
op_library(sequence_conv_op DEPS context_project) op_library(sequence_conv_op DEPS context_project)
op_library(sequence_pool_op DEPS sequence_pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(conv_transpose_op DEPS vol2col) op_library(conv_transpose_op DEPS vol2col)
op_library(gru_op DEPS sequence2batch gru_compute)
op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS net_op tensor_array) DEPS net_op tensor_array)
op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) op_library(recurrent_op SRCS recurrent_op.cc DEPS executor)
......
...@@ -44,7 +44,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -44,7 +44,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Sigmoid operator"); AddInput("X", "Input of Sigmoid operator");
AddOutput("Y", "Output of Sigmoid operator"); AddOutput("Y", "Output of Sigmoid operator");
AddComment(R"DOC( AddComment(R"DOC(
Sigmoid activation operator. Sigmoid Activation Operator.
$y = 1 / (1 + e^{-x})$ $y = 1 / (1 + e^{-x})$
...@@ -60,7 +60,7 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -60,7 +60,7 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of LogSigmoid operator"); AddInput("X", "Input of LogSigmoid operator");
AddOutput("Y", "Output of LogSigmoid operator"); AddOutput("Y", "Output of LogSigmoid operator");
AddComment(R"DOC( AddComment(R"DOC(
Logsigmoid activation operator. Logsigmoid Activation Operator.
$y = \log(1 / (1 + e^{-x}))$ $y = \log(1 / (1 + e^{-x}))$
...@@ -75,7 +75,7 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -75,7 +75,7 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Exp operator"); AddInput("X", "Input of Exp operator");
AddOutput("Y", "Output of Exp operator"); AddOutput("Y", "Output of Exp operator");
AddComment(R"DOC( AddComment(R"DOC(
Exp activation operator. Exp Activation Operator.
$y = e^x$ $y = e^x$
...@@ -90,7 +90,7 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -90,7 +90,7 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Relu operator"); AddInput("X", "Input of Relu operator");
AddOutput("Y", "Output of Relu operator"); AddOutput("Y", "Output of Relu operator");
AddComment(R"DOC( AddComment(R"DOC(
Relu activation operator. Relu Activation Operator.
$y = \max(x, 0)$ $y = \max(x, 0)$
...@@ -109,7 +109,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -109,7 +109,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("alpha", "The small negative slope") AddAttr<AttrType>("alpha", "The small negative slope")
.SetDefault(static_cast<AttrType>(0.02f)); .SetDefault(static_cast<AttrType>(0.02f));
AddComment(R"DOC( AddComment(R"DOC(
LeakyRelu activation operator. LeakyRelu Activation Operator.
$y = \max(x, \alpha * x)$ $y = \max(x, \alpha * x)$
...@@ -128,7 +128,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -128,7 +128,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("lambda", "non-negative offset") AddAttr<AttrType>("lambda", "non-negative offset")
.SetDefault(static_cast<AttrType>(0.5f)); .SetDefault(static_cast<AttrType>(0.5f));
AddComment(R"DOC( AddComment(R"DOC(
Softshrink activation operator. Softshrink Activation Operator.
$$ $$
y = \begin{cases} y = \begin{cases}
...@@ -149,7 +149,7 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -149,7 +149,7 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Tanh operator"); AddInput("X", "Input of Tanh operator");
AddOutput("Y", "Output of Tanh operator"); AddOutput("Y", "Output of Tanh operator");
AddComment(R"DOC( AddComment(R"DOC(
Tanh activation operator. Tanh Activation Operator.
$$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ $$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
...@@ -165,7 +165,7 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -165,7 +165,7 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of TanhShrink operator"); AddInput("X", "Input of TanhShrink operator");
AddOutput("Y", "Output of TanhShrink operator"); AddOutput("Y", "Output of TanhShrink operator");
AddComment(R"DOC( AddComment(R"DOC(
TanhShrink activation operator. TanhShrink Activation Operator.
$$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ $$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
...@@ -184,7 +184,7 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -184,7 +184,7 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The value of threshold for HardShrink") AddAttr<AttrType>("threshold", "The value of threshold for HardShrink")
.SetDefault(static_cast<AttrType>(0.5)); .SetDefault(static_cast<AttrType>(0.5));
AddComment(R"DOC( AddComment(R"DOC(
HardShrink activation operator. HardShrink Activation Operator.
$$ $$
y = \begin{cases} y = \begin{cases}
...@@ -205,7 +205,7 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -205,7 +205,7 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Sqrt operator"); AddInput("X", "Input of Sqrt operator");
AddOutput("Y", "Output of Sqrt operator"); AddOutput("Y", "Output of Sqrt operator");
AddComment(R"DOC( AddComment(R"DOC(
Sqrt activation operator. Sqrt Activation Operator.
$y = \sqrt{x}$ $y = \sqrt{x}$
...@@ -220,7 +220,7 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -220,7 +220,7 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Abs operator"); AddInput("X", "Input of Abs operator");
AddOutput("Y", "Output of Abs operator"); AddOutput("Y", "Output of Abs operator");
AddComment(R"DOC( AddComment(R"DOC(
Abs activation operator. Abs Activation Operator.
$y = |x|$ $y = |x|$
...@@ -236,7 +236,7 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -236,7 +236,7 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Reciprocal operator"); AddInput("X", "Input of Reciprocal operator");
AddOutput("Y", "Output of Reciprocal operator"); AddOutput("Y", "Output of Reciprocal operator");
AddComment(R"DOC( AddComment(R"DOC(
Reciprocal activation operator. Reciprocal Activation Operator.
$$y = \frac{1}{x}$$ $$y = \frac{1}{x}$$
...@@ -251,7 +251,7 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -251,7 +251,7 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Log operator"); AddInput("X", "Input of Log operator");
AddOutput("Y", "Output of Log operator"); AddOutput("Y", "Output of Log operator");
AddComment(R"DOC( AddComment(R"DOC(
Log activation operator. Log Activation Operator.
$y = \ln(x)$ $y = \ln(x)$
...@@ -268,7 +268,7 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -268,7 +268,7 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Square operator"); AddInput("X", "Input of Square operator");
AddOutput("Y", "Output of Square operator"); AddOutput("Y", "Output of Square operator");
AddComment(R"DOC( AddComment(R"DOC(
Square activation operator. Square Activation Operator.
$y = x^2$ $y = x^2$
...@@ -284,7 +284,7 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -284,7 +284,7 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Softplus operator"); AddInput("X", "Input of Softplus operator");
AddOutput("Y", "Output of Softplus operator"); AddOutput("Y", "Output of Softplus operator");
AddComment(R"DOC( AddComment(R"DOC(
Softplus activation operator. Softplus Activation Operator.
$y = \ln(1 + e^{x})$ $y = \ln(1 + e^{x})$
...@@ -300,7 +300,7 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -300,7 +300,7 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input of Softsign operator"); AddInput("X", "Input of Softsign operator");
AddOutput("Y", "Output of Softsign operator"); AddOutput("Y", "Output of Softsign operator");
AddComment(R"DOC( AddComment(R"DOC(
Softsign activation operator. Softsign Activation Operator.
$$y = \frac{x}{1 + |x|}$$ $$y = \frac{x}{1 + |x|}$$
...@@ -320,7 +320,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -320,7 +320,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("t_max", "The max marginal value of BRelu") AddAttr<AttrType>("t_max", "The max marginal value of BRelu")
.SetDefault(static_cast<AttrType>(24)); .SetDefault(static_cast<AttrType>(24));
AddComment(R"DOC( AddComment(R"DOC(
BRelu activation operator. BRelu Activation Operator.
$y = \max(\min(x, t_{min}), t_{max})$ $y = \max(\min(x, t_{min}), t_{max})$
...@@ -339,7 +339,7 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -339,7 +339,7 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold value of SoftRelu") AddAttr<AttrType>("threshold", "The threshold value of SoftRelu")
.SetDefault(static_cast<AttrType>(40)); .SetDefault(static_cast<AttrType>(40));
AddComment(R"DOC( AddComment(R"DOC(
SoftRelu activation operator. SoftRelu Activation Operator.
$y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ $y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$
...@@ -357,7 +357,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -357,7 +357,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("alpha", "The alpha value of ELU") AddAttr<AttrType>("alpha", "The alpha value of ELU")
.SetDefault(static_cast<AttrType>(1.0f)); .SetDefault(static_cast<AttrType>(1.0f));
AddComment(R"DOC( AddComment(R"DOC(
ELU activation operator. ELU Activation Operator.
Applies the following element-wise computation on the input according to Applies the following element-wise computation on the input according to
https://arxiv.org/abs/1511.07289. https://arxiv.org/abs/1511.07289.
...@@ -378,7 +378,7 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -378,7 +378,7 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold value of Relu6") AddAttr<AttrType>("threshold", "The threshold value of Relu6")
.SetDefault(static_cast<AttrType>(6)); .SetDefault(static_cast<AttrType>(6));
AddComment(R"DOC( AddComment(R"DOC(
Relu6 activation operator. Relu6 Activation Operator.
$y = \min(\max(0, x), 6)$ $y = \min(\max(0, x), 6)$
...@@ -396,7 +396,7 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -396,7 +396,7 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("factor", "The exponential factor of Pow") AddAttr<AttrType>("factor", "The exponential factor of Pow")
.SetDefault(static_cast<AttrType>(1)); .SetDefault(static_cast<AttrType>(1));
AddComment(R"DOC( AddComment(R"DOC(
Pow activation operator. Pow Activation Operator.
$y = x^{factor}$ $y = x^{factor}$
...@@ -416,7 +416,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -416,7 +416,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("scale_b", "The scale parameter of b for the input") AddAttr<AttrType>("scale_b", "The scale parameter of b for the input")
.SetDefault(static_cast<AttrType>(1.7159)); .SetDefault(static_cast<AttrType>(1.7159));
AddComment(R"DOC( AddComment(R"DOC(
STanh activation operator. STanh Activation Operator.
$$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ $$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
...@@ -435,7 +435,7 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -435,7 +435,7 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("threshold", "The threshold location of activation") AddAttr<AttrType>("threshold", "The threshold location of activation")
.SetDefault(static_cast<AttrType>(1.0)); .SetDefault(static_cast<AttrType>(1.0));
AddComment(R"DOC( AddComment(R"DOC(
ThresholdedRelu activation operator. ThresholdedRelu Activation Operator.
$$ $$
y = \begin{cases} y = \begin{cases}
...@@ -461,7 +461,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -461,7 +461,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid") AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.5)); .SetDefault(static_cast<AttrType>(0.5));
AddComment(R"DOC( AddComment(R"DOC(
HardSigmoid activation operator. HardSigmoid Activation Operator.
Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391), Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391),
which is much faster than sigmoid. which is much faster than sigmoid.
......
...@@ -64,16 +64,15 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -64,16 +64,15 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddInput("AvgSquaredGrad", AddInput("AvgSquaredGrad", "(Tensor) Input average of squared gradient");
"(Tensor) Input expectation of squared gradient");
AddInput("AvgSquaredUpdate", AddInput("AvgSquaredUpdate",
"(Tensor) Input expectation of squared parameter updates"); "(Tensor) Input average of squared parameter updates");
AddOutput("ParamOut", "(Tensor) Output parameter"); AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("AvgSquaredGradOut", AddOutput("AvgSquaredGradOut",
"(Tensor) Output expectation of squared gradient"); "(Tensor) Output average of squared gradient");
AddOutput("AvgSquaredUpdateOut", AddOutput("AvgSquaredUpdateOut",
"(Tensor) Output expectation of squared parameter updates"); "(Tensor) Output average of squared parameter updates");
AddAttr<float>("rho", AddAttr<float>("rho",
"(float, default 0.95) Exponential decay rate " "(float, default 0.95) Exponential decay rate "
...@@ -84,22 +83,21 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -84,22 +83,21 @@ class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker {
"numerical stability") "numerical stability")
.SetDefault(1.0e-6f); .SetDefault(1.0e-6f);
AddComment(R"DOC( AddComment(R"DOC(
Adadelta Updates Operator. Adadelta Optimizer.
This implements the Adadelta optimizer[1]. Adadelta is a per-dimension Adadelta optimizer is implemented as explained in:
adaptive learning rate method for gradient descent. https://arxiv.org/abs/1212.5701
Adadelta is a per-dimension adaptive learning rate method used
for gradient descent.
Adadelta updates: Adadelta updates are as follows:
avg_squared_grad_out = rho * avg_squared_grad + (1 - rho) * grad * grad $$avgSquaredGradOut = \rho * avgSquaredGrad + (1 - \rho) * grad * grad \break
param_update = - sqrt((avg_squared_update + epsilon) / paramUpdate = - $\sqrt{((avgSquaredUpdate + \epsilon) /
(avg_squared_grad_out + epsilon)) * grad (avgSquaredGrad_out + \epsilon))}$ * grad \break
avg_squared_update_out = rho * avg_squared_update + (1 - rho) * param_update**2 avgSquaredUpdateOut = \rho * avgSquaredUpdate + (1 - \rho) *
param_out = param + param_update {(paramUpdate)}^2 \break
paramOut = param + paramUpdate$$
References:
[1] ADADELTA: An Adaptive Learning Rate Method
https://arxiv.org/abs/1212.5701
)DOC"); )DOC");
} }
......
...@@ -73,12 +73,16 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -73,12 +73,16 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
Adaptive Gradient Algorithm (Adagrad). Adaptive Gradient Algorithm (Adagrad).
moment_out = moment + grad * grad The update is done as follows:
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
$$momentOut = moment + grad * grad \break
paramOut = param - learningRate * grad / ($\sqrt{momentOut}$ + \epsilon) \break
$$
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have the epsilon attribute. It is added here for numerical stability does not have the epsilon attribute. It is added here in our implementation
by avoiding division by zero. as also proposed here: http://cs231n.github.io/neural-networks-3/#ada
for numerical stability to avoid the division by zero error.
)DOC"); )DOC");
} }
......
...@@ -51,8 +51,8 @@ class AdamOp : public framework::OperatorWithKernel { ...@@ -51,8 +51,8 @@ class AdamOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension"); "Beta1 power accumulator should have 1 dimension");
auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow"); auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, PADDLE_ENFORCE_EQ(framework::product(beta2_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension"); "Beta2 power accumulator should have 1 dimension");
auto param_dims = ctx->GetInputDim("Param"); auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -60,10 +60,10 @@ class AdamOp : public framework::OperatorWithKernel { ...@@ -60,10 +60,10 @@ class AdamOp : public framework::OperatorWithKernel {
"Param and Grad input of AdamOp should have same dimension"); "Param and Grad input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment1"), param_dims, ctx->GetInputDim("Moment1"),
"Param and Moment input of AdamOp should have same dimension"); "Param and Moment1 input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment2"), param_dims, ctx->GetInputDim("Moment2"),
"Param and InfNorm input of AdamOp should have same dimension"); "Param and Moment2 input of AdamOp should have same dimension");
ctx->SetOutputDim("ParamOut", param_dims); ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("Moment1Out", param_dims); ctx->SetOutputDim("Moment1Out", param_dims);
...@@ -103,23 +103,20 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -103,23 +103,20 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(1.0e-8f); .SetDefault(1.0e-8f);
AddComment(R"DOC( AddComment(R"DOC(
Adam Updates Operator. Adam Optimizer.
This implements the Adam optimizer from Section 2 of the Adam This implements the Adam optimizer from Section 2 of the Adam
paper[1]. Adam is a first-order gradient-based optimization paper : https://arxiv.org/abs/1412.6980.
method based on adaptive estimates of lower-order moments. Adam is a first-order gradient-based optimization method based on
adaptive estimates of lower-order moments.
Adam updates: Adam updates:
moment1_out = beta1 * moment1 + (1 − beta1) * grad $$moment_1_{out} = \beta_1 * moment_1 + (1 - \beta_1) * grad \break
moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad moment_2_{out} = \beta_2 * moment_2 + (1 - \beta_2) * grad * grad \break
learning_rate_t = learning_rate_t * learningRate = learningRate *
sqrt(1 - beta2_pow) / (1 - beta1_pow) $\sqrt{(1 - \beta_2_{pow})}$ / (1 - \beta_1_{pow}) \break
param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon) paramOut = param - learningRate * moment_1/ ($\sqrt{(moment_2)} + \epsilon)$$
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC"); )DOC");
} }
......
...@@ -99,26 +99,22 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -99,26 +99,22 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
"Constant for numerical stability") "Constant for numerical stability")
.SetDefault(1.0e-8f); .SetDefault(1.0e-8f);
AddComment(R"DOC( AddComment(R"DOC(
Adamax Updates Operator. Adamax Optimizer.
This implements the Adamax optimizer from Section 7 of the Adam We implement the Adamax optimizer from Section 7 of the Adam
paper[1]. Adamax is a variant of the paper: https://arxiv.org/abs/1412.6980. Adamax is a variant of the
Adam algorithm based on the infinity norm. Adam algorithm based on the infinity norm.
Adamax updates: Adamax updates:
moment_out = beta1 * moment + (1 - beta1) * grad $$momentOut = \beta_1 * moment + (1 - \beta_1) * grad \break
inf_norm_out = max(beta2 * inf_norm + epsilon, abs(grad)) infNormOut = max(\beta_2 * infNorm + \epsilon, |grad|) \break
learning_rate_t = learning_rate/(1 - beta1_pow) learningRate = learningRate /(1 - \beta_1_{pow}) \break
param_out = param - learning_rate_t * moment_out/inf_norm_out paramOut = param - learningRate * momentPut / infNormOut$$
The original paper does not have an epsilon attribute. The original paper does not have an epsilon attribute.
However, it is added here for numerical stability However, it is added here for numerical stability to prevent the
by preventing divide by 0. division by 0 error.
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC"); )DOC");
} }
......
...@@ -23,11 +23,11 @@ class AucOp : public framework::OperatorWithKernel { ...@@ -23,11 +23,11 @@ class AucOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Out"), "Input of Out must be initialized."); PADDLE_ENFORCE(ctx->HasInput("Out"), "Input of Out should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Indices"), PADDLE_ENFORCE(ctx->HasInput("Indices"),
"Input of Indices must be initialized."); "Input of Indices should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), PADDLE_ENFORCE(ctx->HasInput("Label"),
"Input of Label must be initialized."); "Input of Label should not be null.");
auto inference_height = ctx->GetInputDim("Out")[0]; auto inference_height = ctx->GetInputDim("Out")[0];
auto label_height = ctx->GetInputDim("Label")[0]; auto label_height = ctx->GetInputDim("Label")[0];
...@@ -52,20 +52,20 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -52,20 +52,20 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Out", AddInput("Out",
"A floating point 2D tensor, values are in the range [0, 1]." "A floating point 2D tensor, values are in the range [0, 1]."
"Each row is descend sorted. This input should be the" "Each row is sorted in descending order. This input should be the"
"output of topk." "output of topk."
"Typically, this tensor indicates the probability of each label"); "Typically, this tensor indicates the probability of each label");
AddInput("Indices", AddInput("Indices",
"An int 2D tensor, indicating the indices of original" "An int 2D tensor, indicating the indices of original"
"tensor before sort. Typically, this tensor indicates which label" "tensor before sorting. Typically, this tensor indicates which "
"the probability stands for."); "label the probability stands for.");
AddInput("Label", AddInput("Label",
"A 2D int tensor indicating the label of the training data." "A 2D int tensor indicating the label of the training data."
"The height is batch size and width is always 1."); "The height is batch size and width is always 1.");
// TODO(typhoonzero): support weight input // TODO(typhoonzero): support weight input
AddOutput("AUC", AddOutput("AUC",
"A scalar representing the " "A scalar representing the "
"current area-under-curve."); "current area-under-the-curve.");
AddAttr<std::string>("curve", "Curve type, can be 'ROC' or 'PR'.") AddAttr<std::string>("curve", "Curve type, can be 'ROC' or 'PR'.")
.SetDefault("ROC"); .SetDefault("ROC");
...@@ -74,19 +74,18 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -74,19 +74,18 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
" roc curve.") " roc curve.")
.SetDefault(200); .SetDefault(200);
AddComment( AddComment(R"DOC(
R"DOC(Computes the AUC according forward output and label. Area Under The Curve (AUC) Operator.
Best to use for binary classification evaluations.
This implementation computes the AUC according to forward output and label.
It is used very widely in binary classification evaluation. As a note:
If input label contains values other than 0 and 1, it will be cast If input label contains values other than 0 and 1, it will be cast
to bool. to bool. You can find the relevant definitions here:
You can find the definations here:
https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve
Possible curves are: There are two types of possible curves:
- ROC: Receiver operating characteristic 1. ROC: Receiver operating characteristic
- PR: Precision Recall 2. PR: Precision Recall
)DOC"); )DOC");
} }
}; };
......
...@@ -70,7 +70,7 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -70,7 +70,7 @@ class BatchNormOp : public framework::OperatorWithKernel {
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5,
"Input x must have 3 to 5 dimensions."); "Input X must have 3 to 5 dimensions.");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C);
...@@ -97,16 +97,16 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -97,16 +97,16 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "The input tensor"); AddInput("X", "The input tensor");
AddInput("Scale", AddInput("Scale",
"Scale is a 1-dimensional tensor of size C " "Scale is a 1-dimensional tensor of size C "
"to be applied to the output"); "that is applied to the output");
AddInput("Bias", AddInput("Bias",
"Bias is a 1-dimensional tensor of size C " "Bias is a 1-dimensional tensor of size C "
"to be applied to the output"); "that is applied to the output");
AddInput("Mean", AddInput("Mean",
"The global mean (for training) or the " "The global mean (for training) or "
"estimated mean (for testing)"); "estimated mean (for testing)");
AddInput("Variance", AddInput("Variance",
"The global variance (for training) " "The global variance (for training) "
"or the estimated Variance (for testing)"); "or estimated Variance (for testing)");
AddOutput("Y", "result after normalization"); AddOutput("Y", "result after normalization");
AddOutput("MeanOut", AddOutput("MeanOut",
"Share memory with Mean. " "Share memory with Mean. "
...@@ -123,10 +123,14 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -123,10 +123,14 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
"will apply to output when training") "will apply to output when training")
.AsIntermediate(); .AsIntermediate();
AddComment(R"DOC( AddComment(R"DOC(
https://arxiv.org/pdf/1502.03167.pdf Batch Normalization.
NHWC `[batch, in_height, in_width, in_channels]` Batch Norm has been implemented as discussed in the paper:
NCHW `[batch, in_channels, in_height, in_width]` https://arxiv.org/pdf/1502.03167.pdf
Can be used as a normalizer function for conv2d and fully_connected operations.
The required data format for this layer is one of the following:
1. NHWC `[batch, in_height, in_width, in_channels]`
2. NCHW `[batch, in_channels, in_height, in_width]`
)DOC"); )DOC");
} }
......
...@@ -23,13 +23,17 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -23,13 +23,17 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker {
CastOpProtoMaker(framework::OpProto *proto, CastOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensor of cast op"); AddInput("X", "The input tensor of cast op");
AddOutput("Out", "the output tensor of cast op"); AddOutput("Out", "The output tensor of cast op");
AddComment(R"DOC(Cast operator.
cast the input tensor to other data type.
)DOC");
AddAttr<int>("out_data_type", "output data type"); AddAttr<int>("out_data_type", "output data type");
AddAttr<int>("in_data_type", "input data type"); AddAttr<int>("in_data_type", "input data type");
AddComment(R"DOC(
Cast Operator.
This Operator casts the input tensor to another data type and
returns tha Output Tensor.
)DOC");
} }
}; };
......
...@@ -49,8 +49,11 @@ class ClipOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,8 +49,11 @@ class ClipOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<AttrType>( AddAttr<AttrType>(
"max", "(float)Maximum value, above which element is replaced by max"); "max", "(float)Maximum value, above which element is replaced by max");
AddComment(R"DOC( AddComment(R"DOC(
Clip operator limits the given input within an interval. The interval is Clip Operator.
The clip operator limits the value of given input within an interval. The interval is
specified with arguments 'min' and 'max'. specified with arguments 'min' and 'max'.
)DOC"); )DOC");
} }
}; };
......
...@@ -114,21 +114,17 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -114,21 +114,17 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
"where N is the batch size and D is the number of classes. " "where N is the batch size and D is the number of classes. "
"This input is a probability computed by the previous operator, " "This input is a probability computed by the previous operator, "
"which is almost always the result of a softmax operator."); "which is almost always the result of a softmax operator.");
AddInput( AddInput("Label",
"Label", "(Tensor), the ground truth which is a 2-D tensor. When "
"(Tensor, default Tensor<int>), the ground truth which is " "soft_label is set to false, Label is a Tensor<int64> with shape "
"a 2-D tensor. " "[N x 1]. When soft_label is set to true, Label is a "
"When soft_label is set to false, Label is a Tensor<int> with shape " "Tensor<float/double> with shape [N x K].");
"[N x 1]. "
"When soft_label is set to true, Label is a Tensor<float/double> "
"with shape [N x K].");
AddOutput("Y", AddOutput("Y",
"(Tensor, default Tensor<float>), a 2-D tensor " "(Tensor, default Tensor<float>), a 2-D tensor with shape "
"with shape [N x 1]. The cross entropy loss."); "[N x 1]. The cross entropy loss.");
AddAttr<bool>( AddAttr<bool>("soft_label",
"soft_label", "(bool, default false), a flag indicating whether to "
"(bool, default false), a flag to indicate whether to interpretate " "interpretate the given labels as soft labels.")
"the given labels as soft labels.")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
CrossEntropy Operator. CrossEntropy Operator.
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/gru_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class GRUOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasOutput("BatchGate"),
"Output(%s) of GRUOp should not be null.", "BatchGate");
PADDLE_ENFORCE(ctx->HasOutput("BatchResetHiddenPrev"),
"Output(%s) of GRUOp should not be null.",
"BatchResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasOutput("BatchHidden"),
"Output(%s) of GRUOp should not be null.", "BatchHidden");
PADDLE_ENFORCE(ctx->HasOutput("Hidden"),
"Output(%s) of GRUOp should not be null.", "Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto weight_dims = ctx->GetInputDim("Weight");
int input_size = input_dims[1];
int frame_size = weight_dims[0];
PADDLE_ENFORCE_EQ(input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUOp.");
PADDLE_ENFORCE_EQ(
weight_dims[1], frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
if (ctx->HasInput("H0")) {
auto h0_dims = ctx->GetInputDim("H0");
PADDLE_ENFORCE_EQ(h0_dims[1], frame_size,
"The width of H0 must be equal to frame_size.");
}
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
}
ctx->SetOutputDim("BatchGate", input_dims);
ctx->SetOutputDim("BatchResetHiddenPrev", {input_dims[0], frame_size});
ctx->SetOutputDim("BatchHidden", {input_dims[0], frame_size});
ctx->SetOutputDim("Hidden", {input_dims[0], frame_size});
ctx->ShareLoD("Input", "Hidden");
}
};
class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(LoDTensor) The first input is a LodTensor, which supports "
"variable-time length input sequence. The underlying tensor in "
"this LoDTenosr is a matrix with shape (T X 3D), where, T is the "
"total time steps in this mini-batch, D is the hidden size.");
AddInput("H0",
"(Tensor, optional) The initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size, D is the hidden size.")
.AsDispensable();
AddInput(
"Weight",
"(Tensor) The learnable hidden-hidden weight matrix with shape "
"(D x 3D), where D is the hidden size. The elements continuous in "
"memory can be divided into two parts. The first part are weights of "
"the update gate and reset gate with shape (D x 2D), and the second "
"part are weights of output candidate with shape (D x D).");
AddInput("Bias",
"(Tensor, optional) Bias vector with shape (1 x 3D) concating "
"bias of the update gate, reset gate and output candidate.")
.AsDispensable();
AddOutput("BatchGate",
"(LoDTensor) To compute with batches, sequence data will be "
"reorganized into several successive batches each containing "
"data from the same time step. The LoDTensor BatchGate contains "
"the update gate, reset gate and output candidate values "
"organized in batches. The LoD size is 2. The first LoD contains "
"the batch offsets and the second LoD contains the indexes in "
"the raw sequence data.")
.AsIntermediate();
AddOutput(
"BatchResetHiddenPrev",
"(LoDTensor) The reseted hidden state LoDTensor organized in batches. "
"This LoDTensor is a matrix with shape (T X D) and has the same LoD "
"with `BatchGate`.")
.AsIntermediate();
AddOutput(
"BatchHidden",
"(LoDTensor) The hidden state LoDTensor organized in batches. "
"This LoDTensor is a matrix with shape (T X D) and has the same LoD "
"with `BatchGate`.")
.AsIntermediate();
AddOutput(
"Hidden",
"(LoDTensor) the hidden state LoDTensor organized in sequences. "
"This LoDTensor is a matrix with shape (T X D) and has the same LoD "
"with `BatchGate`.");
AddAttr<std::string>("activation",
"(string, default tanh) "
"The activation type used for output candidate {h}_t.")
.SetDefault("tanh");
AddAttr<std::string>(
"gate_activation",
"(string, default sigmoid) "
"The activation type used in update gate and reset gate.")
.SetDefault("sigmoid");
AddAttr<bool>("is_reverse",
"(bool, defalut: False) "
"whether to compute reversed GRU.")
.SetDefault(false);
AddComment(R"DOC(
GRU Operator implements part calculations of the complete GRU as following:
\f[
update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\
output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t)
\f]
@note To implement the complete GRU, fully-connected operator must be used
before to feed xu, xr and xc as the Input of GRU operator.
)DOC");
}
};
class GRUGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUGradOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUGradOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("BatchGate"),
"Input(%s) of GRUGradOp should not be null.", "BatchGate");
PADDLE_ENFORCE(ctx->HasInput("BatchResetHiddenPrev"),
"Input(%s) of GRUGradOp should not be null.",
"BatchResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("BatchHidden"),
"Input(%s) of GRUOp should not be null.", "BatchHidden");
PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of GRUGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of GRUGradOp should not be null.", "Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto weight_dims = ctx->GetInputDim("Weight");
int input_size = input_dims[1];
int frame_size = weight_dims[0];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
if (ctx->HasInput("H0")) {
auto h0_dims = ctx->GetInputDim("H0");
PADDLE_ENFORCE_EQ(h0_dims[1], frame_size,
"The width of H0 must be equal to frame_size.");
auto h0_grad_name = framework::GradVarName("H0");
if (ctx->HasOutput(h0_grad_name))
ctx->SetOutputDim(h0_grad_name, h0_dims);
}
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto input_grad_name = framework::GradVarName("Input");
if (ctx->HasOutput(input_grad_name))
ctx->SetOutputDim(input_grad_name, input_dims);
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(weight_grad_name))
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gru, ops::GRUOp, ops::GRUOpMaker, gru_grad, ops::GRUGradOp);
REGISTER_OP_CPU_KERNEL(gru, ops::GRUKernel<paddle::platform::CPUPlace, float>,
ops::GRUKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(gru_grad,
ops::GRUGradKernel<paddle::platform::CPUPlace, float>,
ops::GRUGradKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/gru_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(gru, ops::GRUKernel<paddle::platform::GPUPlace, float>,
ops::GRUKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(gru_grad,
ops::GRUGradKernel<paddle::platform::GPUPlace, float>,
ops::GRUGradKernel<paddle::platform::GPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/math/gru_compute.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence2batch.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename Place, typename T>
class GRUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
const T* h0_data = h0 ? h0->data<T>() : nullptr;
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
context.ShareLoD("Input", "Hidden");
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<Place, T> to_batch;
to_batch(context.device_context(), *input, *batch_gate, true, is_reverse);
int frame_size = hidden_dims[1];
int batch_size = hidden_dims[0];
auto g = EigenMatrix<T>::From(*batch_gate);
auto place = context.GetEigenDevice<Place>();
if (bias) {
auto b = EigenMatrix<T>::From(*bias);
g.device(place) = g +
b.reshape(Eigen::array<int, 2>({{1, frame_size * 3}}))
.broadcast(Eigen::array<int, 2>({{batch_size, 1}}));
}
math::hl_gru_value<T> gru_value;
gru_value.gateWeight = const_cast<T*>(weight_data);
gru_value.stateWeight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
gru_value.prevOutValue = const_cast<T*>(h0_data);
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.outputValue = hidden_t.data<T>();
gru_value.gateValue = gate_t.data<T>();
gru_value.resetOutputValue = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<Place, T>::compute(
context.device_context(), gru_value, frame_size, cur_batch_size,
math::ActiveType(context.Attr<std::string>("activation")),
math::ActiveType(context.Attr<std::string>("gate_activation")));
gru_value.prevOutValue = gru_value.outputValue;
}
math::Batch2LoDTensorFunctor<Place, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(context.device_context(), *batch_hidden, *hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
template <typename Place, typename T>
class GRUGradKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* h0 = context.Input<Tensor>("H0");
const T* h0_data = h0 ? h0->data<T>() : nullptr;
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* batch_gate = context.Input<LoDTensor>("BatchGate");
auto* batch_reset_hidden_prev =
context.Input<LoDTensor>("BatchResetHiddenPrev");
auto* batch_hidden = context.Input<LoDTensor>("BatchHidden");
auto* hidden = context.Input<LoDTensor>("Hidden");
auto* hidden_grad =
context.Input<LoDTensor>(framework::GradVarName("Hidden"));
auto* input_grad =
context.Output<LoDTensor>(framework::GradVarName("Input"));
auto* h0_grad = context.Output<Tensor>(framework::GradVarName("H0"));
auto* weight_grad =
context.Output<Tensor>(framework::GradVarName("Weight"));
auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias"));
auto gate_dims = batch_gate->dims();
auto hidden_dims = hidden->dims();
int frame_size = hidden_dims[1];
math::LoDTensor2BatchFunctor<Place, T> to_batch;
LoDTensor batch_hidden_grad, batch_gate_grad, batch_reset_hidden_prev_grad;
batch_hidden_grad.mutable_data<T>(hidden_dims, context.GetPlace());
batch_gate_grad.mutable_data<T>(gate_dims, context.GetPlace());
batch_reset_hidden_prev_grad.mutable_data<T>(hidden_dims,
context.GetPlace());
math::SetConstant<Place, T> zero;
zero(context.device_context(), &batch_hidden_grad, static_cast<T>(0.0));
zero(context.device_context(), &batch_gate_grad, static_cast<T>(0.0));
zero(context.device_context(), &batch_reset_hidden_prev_grad,
static_cast<T>(0.0));
bool is_reverse = context.Attr<bool>("is_reverse");
batch_hidden_grad.set_lod(batch_hidden->lod());
to_batch(context.device_context(), *hidden_grad, batch_hidden_grad, false,
is_reverse);
math::hl_gru_value<T> gru_value;
gru_value.gateWeight = const_cast<T*>(weight_data);
gru_value.stateWeight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
math::hl_gru_grad<T> gru_grad;
if (weight_grad) {
gru_grad.gateWeightGrad =
weight_grad->mutable_data<T>(context.GetPlace());
zero(context.device_context(), weight_grad, static_cast<T>(0.0));
gru_grad.stateWeightGrad =
weight_grad->data<T>() + 2 * frame_size * frame_size;
} else {
gru_grad.gateWeightGrad = nullptr;
gru_grad.stateWeightGrad = nullptr;
}
auto batch_starts = batch_hidden_grad.lod()[0];
size_t num_batch = batch_starts.size() - 1;
for (int n = static_cast<int>(num_batch) - 1; n >= 0; n--) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
gru_value.gateValue = gate_t.data<T>();
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
gru_value.resetOutputValue = reset_hidden_prev_t.data<T>();
Tensor hidden_grad_t = batch_hidden_grad.Slice(bstart, bend);
gru_grad.outputGrad = hidden_grad_t.data<T>();
Tensor gate_grad_t = batch_gate_grad.Slice(bstart, bend);
gru_grad.gateGrad = gate_grad_t.data<T>();
Tensor reset_hidden_prev_grad_t =
batch_reset_hidden_prev_grad.Slice(bstart, bend);
gru_grad.resetOutputGrad = reset_hidden_prev_grad_t.data<T>();
if (n == 0) {
gru_value.prevOutValue = const_cast<T*>(h0_data);
if (h0_grad) {
T* h0_grad_data = h0_grad->mutable_data<T>(context.GetPlace());
zero(context.device_context(), h0_grad, static_cast<T>(0.0));
gru_grad.prevOutGrad = h0_grad_data;
} else {
gru_grad.prevOutGrad = nullptr;
}
} else {
int bstart_pre = static_cast<int>(batch_starts[n - 1]);
Tensor hidden_prev_t = batch_hidden->Slice(bstart_pre, bstart);
gru_value.prevOutValue = hidden_prev_t.data<T>();
Tensor hidden_prev_grad_t = batch_hidden_grad.Slice(bstart_pre, bstart);
gru_grad.prevOutGrad = hidden_prev_grad_t.data<T>();
}
math::GRUUnitGradFunctor<Place, T>::compute(
context.device_context(), gru_value, gru_grad, frame_size,
cur_batch_size,
math::ActiveType(context.Attr<std::string>("activation")),
math::ActiveType(context.Attr<std::string>("gate_activation")));
}
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
math::Batch2LoDTensorFunctor<Place, T> to_seq;
batch_gate_grad.set_lod(batch_gate->lod());
to_seq(context.device_context(), batch_gate_grad, *input_grad);
}
if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace());
auto d_b = EigenMatrix<T>::From(*bias_grad);
auto d_g = EigenMatrix<T>::From(batch_gate_grad);
auto place = context.GetEigenDevice<Place>();
d_b.device(place) = d_g.sum(Eigen::array<int, 1>({{0}}));
}
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
} // namespace operators
} // namespace paddle
...@@ -23,21 +23,21 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -23,21 +23,21 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Emission", AddInput("Emission",
"(LoDTensor, default: LoDTensor<float>). " "(LoDTensor, default LoDTensor<float>) "
"A 2-D LoDTensor with shape [N x D] where N is the size of the " "A 2-D LoDTensor with shape [N x D], where N is the size of the "
"mini-batch and D is the total tag number. The unscaled emission " "mini-batch and D is the total tag number. The unscaled emission "
"weight matrix for the linear chain CRF. "); "weight matrix for the linear chain CRF. ");
AddInput("Transition", AddInput("Transition",
"(Tensor, default: Tensor<float>). A 2-D Tensor with shape " "(Tensor, default Tensor<float>) A 2-D Tensor with shape "
"[(D + 2) x D]. The learnable parameter for the linear_chain_crf " "[(D + 2) x D]. The learnable parameter for the linear_chain_crf "
"operator. See more details in the operator's comments."); "operator. See more details in the operator's comments.");
AddInput("Label", AddInput("Label",
"(LoDTensor, default: LoDTensor<int>). A LoDTensor with shape " "(LoDTensor, default LoDTensor<int>) A LoDTensor with shape "
"[N x 1], where N is the total element number in a mini-batch. " "[N x 1], where N is the total element number in a mini-batch. "
"The ground truth."); "The ground truth.");
AddOutput( AddOutput(
"Alpha", "Alpha",
"(Tensor, default: Tensor<float>). A 2-D Tensor with shape [N x D]. " "(Tensor, default Tensor<float>) A 2-D Tensor with shape [N x D]. "
"The forward vectors for the entire batch. Denote it as \f$\alpha\f$. " "The forward vectors for the entire batch. Denote it as \f$\alpha\f$. "
"\f$\alpha$\f is a memo table used to calculate the normalization " "\f$\alpha$\f is a memo table used to calculate the normalization "
"factor in CRF. \f$\alpha[k, v]$\f stores the unnormalized " "factor in CRF. \f$\alpha[k, v]$\f stores the unnormalized "
...@@ -49,26 +49,28 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,26 +49,28 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
.AsIntermediate(); .AsIntermediate();
AddOutput( AddOutput(
"EmissionExps", "EmissionExps",
"(Tensor, default: Tensor<float>). A 2-D Tensor with shape [N x D]. " "(Tensor, default Tensor<float>) A 2-D Tensor with shape [N x D]. "
"The exponentials of Input(Emission). This is an intermediate " "The exponentials of Input(Emission). This is an intermediate "
"computational result in forward computation, and will be reused in " "computational result in forward computation, and will be reused in "
"backward computation.") "backward computation.")
.AsIntermediate(); .AsIntermediate();
AddOutput( AddOutput(
"TransitionExps", "TransitionExps",
"(Tensor, default: Tensor<float>). A 2-D Tensor with shape " "(Tensor, default Tensor<float>) A 2-D Tensor with shape "
"[(D + 2) x D]. The exponentials of Input(Transition). This is an " "[(D + 2) x D]. The exponentials of Input(Transition). This is an "
"intermediate computational result in forward computation, and " "intermediate computational result in forward computation, and "
"will be reused in backward computation.") "will be reused in backward computation.")
.AsIntermediate(); .AsIntermediate();
AddOutput( AddOutput(
"LogLikelihood", "LogLikelihood",
"(Tensor, default: Tensor<float>). The logarithm of the conditional " "(Tensor, default Tensor<float>) The logarithm of the conditional "
"likelihood of each training sample in a mini-batch. This is a 2-D " "likelihood of each training sample in a mini-batch. This is a 2-D "
"tensor with shape [S x 1], where S is the sequence number in a " "tensor with shape [S x 1], where S is the sequence number in a "
"mini-batch. Note: S is equal to the sequence number in a mini-batch. " "mini-batch. Note: S is equal to the sequence number in a mini-batch. "
"The output is no longer a LoDTensor."); "The output is no longer a LoDTensor.");
AddComment(R"DOC( AddComment(R"DOC(
LinearChainCRF Operator.
Conditional Random Field defines an undirected probabilistic graph with nodes Conditional Random Field defines an undirected probabilistic graph with nodes
denoting random variables and edges denoting dependencies between these denoting random variables and edges denoting dependencies between these
variables. CRF learns the conditional probability \f$P(Y|X)\f$, where variables. CRF learns the conditional probability \f$P(Y|X)\f$, where
...@@ -82,29 +84,28 @@ and output must be linear sequences. Thus, the graph of such a CRF is a simple ...@@ -82,29 +84,28 @@ and output must be linear sequences. Thus, the graph of such a CRF is a simple
chain or a line, which results in the linear chain CRF. chain or a line, which results in the linear chain CRF.
This operator implements the Forward-Backward algorithm for the linear chain This operator implements the Forward-Backward algorithm for the linear chain
CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf and CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and
http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for reference. http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details.
Equation: Equation:
1. Denote Input(Emission) to this operator as \f$x\f$ here.
- Denote Input(Emission) to this operator as \f$x\f$ here. 2. The first D values of Input(Transition) to this operator are for starting
- The first D values of Input(Transition) to this operator are for starting
weights, denoted as \f$a\f$ here. weights, denoted as \f$a\f$ here.
- The next D values of Input(Transition) of this operator are for ending 3. The next D values of Input(Transition) of this operator are for ending
weights, denoted as \f$b\f$ here. weights, denoted as \f$b\f$ here.
- The remaning values of Input(Transition) are for transition weights, 4. The remaning values of Input(Transition) are for transition weights,
denoted as \f$w\f$ here. denoted as \f$w\f$ here.
- Denote Input(Label) as \f$s\f$ here. 5. Denote Input(Label) as \f$s\f$ here.
The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as: The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as:
\f$P(s) = (1/Z) exp(a_{s_1} + b_{s_L} \f$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L}
+ \sum_{l=1}^L x_{s_l} + \sum_{l=1}^L x_{s_l}
+ \sum_{l=2}^L w_{s_{l-1},s_l})\f$ + \sum_{l=2}^L w_{s_{l-1},s_l})\f$
where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over
all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight
to the linear chain CRF. to the linear chain CRF.
Finaly, the linear chain CRF operator outputs the logarithm of the conditional Finally, the linear chain CRF operator outputs the logarithm of the conditional
likelihood of each training sample in a mini-batch. likelihood of each training sample in a mini-batch.
NOTE: NOTE:
......
...@@ -55,8 +55,6 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -55,8 +55,6 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
"(2-D tensor with shape [batch_size x 1]) " "(2-D tensor with shape [batch_size x 1]) "
"The label indicating X1 ranked higher than X2 or not, " "The label indicating X1 ranked higher than X2 or not, "
"can only be +1 or -1."); "can only be +1 or -1.");
AddAttr<T>("margin", "(scalar, default 0) Margin for MarginRankLossOp.")
.SetDefault(static_cast<T>(0));
AddOutput("Activated", AddOutput("Activated",
"(2-D tensor with shape [batch_size x 1]) Intermediate tensor " "(2-D tensor with shape [batch_size x 1]) Intermediate tensor "
"to indicate whether each element of Output(Out) is activated.") "to indicate whether each element of Output(Out) is activated.")
...@@ -64,23 +62,26 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -64,23 +62,26 @@ class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", AddOutput("Out",
"(2-D tensor with shape [batch_size x 1]) " "(2-D tensor with shape [batch_size x 1]) "
"The output loss of MarginRankLoss operator."); "The output loss of MarginRankLoss operator.");
AddAttr<T>("margin", "(scalar, default 0) Margin for MarginRankLossOp.")
.SetDefault(static_cast<T>(0));
AddComment(R"DOC( AddComment(R"DOC(
MarginRankLoss Operator.
MarginRankLoss operator measures the loss given a pair of training sample This operator measures the loss given a pair of training sample
{`X1`, `X2`} and the `Label` with attribute `margin`, where `Label = +1` {`X1`, `X2`} and the `Label` with attribute `margin`, where `Label = +1`
indicating X1 is ranked higher than `X2`, otherwise `Label = -1`. The loss indicating X1 is ranked higher than `X2` and `Label = -1` otherwise. The loss
turns out is calculated as:
loss(X1, X2, Label) = max(0, -Label * (X1 - X2) + margin). $loss(X1, X2, Label) = \max(0, -Label * (X1 - X2) + margin)$
The attribute `margin` involved here helps make the predictions more robust. The attribute `margin` here helps make the predictions more robust.
Denote the item ranked higher as the positive sample, otherwise the negative Denote the item ranked higher as the positive sample, otherwise the negative
sample. If the score of the two samples satisfies sample. If the score of the two samples satisfies
positive sample - negative sample < margin, $positive sample - negative sample < margin$
the pair of samples will contribute to the final loss, which will backpropogate the pair of samples will contribute to the final loss, which will backpropagate
and train the ranking model to enlarge the difference of the two score. and train the ranking model to enlarge the difference between the two scores.
For batch input with size `batch_size`, `X1`, `X2` and `Label` For batch input with size `batch_size`, `X1`, `X2` and `Label`
all have the same shape [batch_size x 1]. all have the same shape [batch_size x 1].
......
...@@ -8,20 +8,24 @@ if(WITH_GPU) ...@@ -8,20 +8,24 @@ if(WITH_GPU)
nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator) nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator)
nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator)
nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context)
nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function)
nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context) nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context)
nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context) nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context)
nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context)
nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions)
else() else()
cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator) cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator)
cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function)
cc_library(softmax SRCS softmax.cc DEPS operator) cc_library(softmax SRCS softmax.cc DEPS operator)
cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator) cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator)
cc_library(pooling SRCS pooling.cc DEPS device_context) cc_library(pooling SRCS pooling.cc DEPS device_context)
cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function)
cc_library(vol2col SRCS vol2col.cc DEPS device_context) cc_library(vol2col SRCS vol2col.cc DEPS device_context)
cc_library(context_project SRCS context_project.cc DEPS device_context) cc_library(context_project SRCS context_project.cc DEPS device_context)
cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context)
cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions)
cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function)
endif() endif()
cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <type_traits>
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/operators/math/gru_compute.h"
namespace paddle {
namespace operators {
namespace math {
namespace detail {
#ifndef __NVCC__
template <class OpResetOutput, typename T>
void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput,
T *gateValue, T *resetOutputValue,
T *prevOutputValue, int frameSize,
activation_mode_t active_gate) {
T rValueUpdateGate;
T rValueResetGate;
T rValueResetOutput;
T rPrevOut = 0;
T *updateGate = gateValue;
T *resetGate = gateValue + frameSize;
for (int i = 0; i < frameSize; i++) {
rValueUpdateGate = updateGate[i];
rValueResetGate = resetGate[i];
if (prevOutputValue) {
rPrevOut = prevOutputValue[i];
}
opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut,
rValueResetOutput, active_gate);
updateGate[i] = rValueUpdateGate;
resetGate[i] = rValueResetGate;
resetOutputValue[i] = rValueResetOutput;
}
}
template <class OpFinalOutput, typename T>
void hl_naive_gru_forward_final_output(OpFinalOutput opFinalOutput,
T *gateValue, T *prevOutputValue,
T *outputValue, int frameSize,
activation_mode_t active_node) {
T rValueUpdateGate;
T rValueFrameState;
T rPrevOut = 0;
T rOutput;
T *updateGate = gateValue;
T *frameState = gateValue + frameSize * 2;
for (int i = 0; i < frameSize; i++) {
rValueUpdateGate = updateGate[i];
rValueFrameState = frameState[i];
if (prevOutputValue) {
rPrevOut = prevOutputValue[i];
}
opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput,
active_node);
frameState[i] = rValueFrameState;
outputValue[i] = rOutput;
}
}
template <class OpResetOutput, typename T>
void hl_avx_gru_forward_reset_output(OpResetOutput opResetOutput, T *gateValue,
T *resetOutputValue, T *prevOutputValue,
int frameSize,
activation_mode_t active_gate) {
#ifdef __AVX__
__m256 rValueUpdateGate;
__m256 rValueResetGate;
__m256 rValueResetOutput;
__m256 rPrevOut = _mm256_set1_ps(0.0f);
__m256 *updateGate = (__m256 *)gateValue;
__m256 *resetGate = (__m256 *)(gateValue + frameSize);
for (int i = 0; i < frameSize / 8; i++) {
rValueUpdateGate = updateGate[i];
rValueResetGate = resetGate[i];
if (prevOutputValue) {
rPrevOut = ((__m256 *)prevOutputValue)[i];
}
opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut,
rValueResetOutput, active_gate);
updateGate[i] = rValueUpdateGate;
resetGate[i] = rValueResetGate;
((__m256 *)resetOutputValue)[i] = rValueResetOutput;
}
#endif
}
template <class OpFinalOutput, typename T>
void hl_avx_gru_forward_final_output(OpFinalOutput opFinalOutput, T *gateValue,
T *prevOutputValue, T *outputValue,
int frameSize,
activation_mode_t active_node) {
#ifdef __AVX__
__m256 rValueUpdateGate;
__m256 rValueFrameState;
__m256 rPrevOut = _mm256_set1_ps(0.0f);
__m256 rOutput;
__m256 *updateGate = (__m256 *)gateValue;
__m256 *frameState = (__m256 *)(gateValue + frameSize * 2);
for (int i = 0; i < frameSize / 8; i++) {
rValueUpdateGate = updateGate[i];
rValueFrameState = frameState[i];
if (prevOutputValue) {
rPrevOut = ((__m256 *)prevOutputValue)[i];
}
opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput,
active_node);
frameState[i] = rValueFrameState;
((__m256 *)outputValue)[i] = rOutput;
}
#endif
}
template <class OpResetOutput, typename T>
inline void forward_reset_output(OpResetOutput opResetOutput,
hl_gru_value<T> value, int frameSize,
int batchSize, activation_mode_t active_gate) {
for (int b = 0; b < batchSize; b++) {
if (OpResetOutput::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_forward_reset_output(
opResetOutput, value.gateValue, value.resetOutputValue,
value.prevOutValue, frameSize, active_gate);
} else {
hl_naive_gru_forward_reset_output(
opResetOutput, value.gateValue, value.resetOutputValue,
value.prevOutValue, frameSize, active_gate);
}
value.gateValue += frameSize * 3;
value.resetOutputValue += frameSize;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
}
}
template <class OpFinalOutput, typename T>
inline void forward_final_output(OpFinalOutput opFinalOutput,
hl_gru_value<T> value, int frameSize,
int batchSize, activation_mode_t active_node) {
for (int b = 0; b < batchSize; b++) {
if (OpFinalOutput::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_forward_final_output(opFinalOutput, value.gateValue,
value.prevOutValue, value.outputValue,
frameSize, active_node);
} else {
hl_naive_gru_forward_final_output(opFinalOutput, value.gateValue,
value.prevOutValue, value.outputValue,
frameSize, active_node);
}
value.gateValue += frameSize * 3;
value.outputValue += frameSize;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
}
}
template <class OpStateGrad, typename T>
void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *outputGrad,
int frameSize,
activation_mode_t active_node) {
T rUpdateGateValue;
T rUpdateGateGrad;
T rFrameStateValue;
T rFrameStateGrad;
T rOutGrad;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T *updateGateValue = gateValue;
T *updateGateGrad = gateGrad;
T *frameStateValue = gateValue + frameSize * 2;
T *frameStateGrad = gateGrad + frameSize * 2;
for (int i = 0; i < frameSize; i++) {
rUpdateGateValue = updateGateValue[i];
rFrameStateValue = frameStateValue[i];
rOutGrad = outputGrad[i];
if (prevOutValue) {
rPrevOutValue = prevOutValue[i];
}
if (prevOutGrad) {
rPrevOutGrad = prevOutGrad[i];
}
opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue,
rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad,
active_node);
updateGateGrad[i] = rUpdateGateGrad;
frameStateGrad[i] = rFrameStateGrad;
if (prevOutGrad) {
prevOutGrad[i] = rPrevOutGrad;
}
}
}
template <class OpResetGrad, typename T>
void hl_naive_gru_backward_reset_grad(OpResetGrad opResetGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *resetOutputGrad,
int frameSize,
activation_mode_t active_gate) {
T rUpdateGateValue;
T rUpdateGateGrad;
T rResetGateValue;
T rResetGateGrad;
T rResetOutputGrad = 0;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T *updateGateValue = gateValue;
T *updateGateGrad = gateGrad;
T *resetGateValue = gateValue + frameSize;
T *resetGateGrad = gateGrad + frameSize;
for (int i = 0; i < frameSize; i++) {
rUpdateGateValue = updateGateValue[i];
rUpdateGateGrad = updateGateGrad[i];
rResetGateValue = resetGateValue[i];
if (prevOutValue && prevOutGrad) {
rResetOutputGrad = resetOutputGrad[i];
}
if (prevOutValue) {
rPrevOutValue = prevOutValue[i];
}
if (prevOutGrad) {
rPrevOutGrad = prevOutGrad[i];
}
opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue,
rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad,
active_gate);
updateGateGrad[i] = rUpdateGateGrad;
resetGateGrad[i] = rResetGateGrad;
if (prevOutGrad) {
prevOutGrad[i] = rPrevOutGrad;
}
}
}
template <class OpStateGrad, typename T>
void hl_avx_gru_backward_state_grad(OpStateGrad opStateGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *outputGrad,
int frameSize,
activation_mode_t active_node) {
#ifdef __AVX__
__m256 rUpdateGateValue;
__m256 rUpdateGateGrad;
__m256 rFrameStateValue;
__m256 rFrameStateGrad;
__m256 rOutGrad;
__m256 rPrevOutValue = _mm256_set1_ps(0.0f);
__m256 rPrevOutGrad = _mm256_set1_ps(0.0f);
__m256 *updateGateValue = (__m256 *)gateValue;
__m256 *updateGateGrad = (__m256 *)gateGrad;
__m256 *frameStateValue = (__m256 *)(gateValue + frameSize * 2);
__m256 *frameStateGrad = (__m256 *)(gateGrad + frameSize * 2);
for (int i = 0; i < frameSize / 8; i++) {
rUpdateGateValue = updateGateValue[i];
rFrameStateValue = frameStateValue[i];
rOutGrad = ((__m256 *)outputGrad)[i];
if (prevOutValue) {
rPrevOutValue = ((__m256 *)prevOutValue)[i];
}
if (prevOutGrad) {
rPrevOutGrad = ((__m256 *)prevOutGrad)[i];
}
opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue,
rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad,
active_node);
updateGateGrad[i] = rUpdateGateGrad;
frameStateGrad[i] = rFrameStateGrad;
if (prevOutGrad) {
((__m256 *)prevOutGrad)[i] = rPrevOutGrad;
}
}
#endif
}
template <class OpResetGrad, typename T>
void hl_avx_gru_backward_reset_grad(OpResetGrad opResetGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *resetOutputGrad,
int frameSize,
activation_mode_t active_gate) {
#ifdef __AVX__
__m256 rUpdateGateValue;
__m256 rUpdateGateGrad;
__m256 rResetGateValue;
__m256 rResetGateGrad;
__m256 rResetOutputGrad = _mm256_set1_ps(0.0f);
__m256 rPrevOutValue = _mm256_set1_ps(0.0f);
__m256 rPrevOutGrad = _mm256_set1_ps(0.0f);
__m256 *updateGateValue = (__m256 *)gateValue;
__m256 *updateGateGrad = (__m256 *)gateGrad;
__m256 *resetGateValue = (__m256 *)(gateValue + frameSize);
__m256 *resetGateGrad = (__m256 *)(gateGrad + frameSize);
for (int i = 0; i < frameSize / 8; i++) {
rUpdateGateValue = updateGateValue[i];
rUpdateGateGrad = updateGateGrad[i];
rResetGateValue = resetGateValue[i];
if (prevOutValue && prevOutGrad) {
rResetOutputGrad = ((__m256 *)resetOutputGrad)[i];
}
if (prevOutValue) {
rPrevOutValue = ((__m256 *)prevOutValue)[i];
}
if (prevOutGrad) {
rPrevOutGrad = ((__m256 *)prevOutGrad)[i];
}
opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue,
rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad,
active_gate);
updateGateGrad[i] = rUpdateGateGrad;
resetGateGrad[i] = rResetGateGrad;
if (prevOutGrad) {
((__m256 *)prevOutGrad)[i] = rPrevOutGrad;
}
}
#endif
}
template <class OpStateGrad, typename T>
inline void backward_state_grad(OpStateGrad opStateGrad, hl_gru_value<T> value,
hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node) {
for (int b = 0; b < batchSize; b++) {
if (OpStateGrad::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_backward_state_grad(
opStateGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.outputGrad, frameSize, active_node);
} else {
hl_naive_gru_backward_state_grad(
opStateGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.outputGrad, frameSize, active_node);
}
value.gateValue += frameSize * 3;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
grad.gateGrad += frameSize * 3;
grad.outputGrad += frameSize;
if (grad.prevOutGrad) {
grad.prevOutGrad += frameSize;
}
}
}
template <class OpResetGrad, typename T>
inline void backward_reset_grad(OpResetGrad opResetGrad, hl_gru_value<T> value,
hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_gate) {
for (int b = 0; b < batchSize; b++) {
if (OpResetGrad::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) {
hl_avx_gru_backward_reset_grad(
opResetGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.resetOutputGrad, frameSize, active_gate);
} else {
hl_naive_gru_backward_reset_grad(
opResetGrad, value.gateValue, grad.gateGrad, value.prevOutValue,
grad.prevOutGrad, grad.resetOutputGrad, frameSize, active_gate);
}
value.gateValue += frameSize * 3;
if (value.prevOutValue) {
value.prevOutValue += frameSize;
}
grad.gateGrad += frameSize * 3;
grad.resetOutputGrad += frameSize;
if (grad.prevOutGrad) {
grad.prevOutGrad += frameSize;
}
}
}
#endif
} // namespace detail
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <type_traits>
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/operators/math/gru_compute.h"
#include "paddle/platform/cuda_helper.h"
#include "paddle/platform/device_context.h"
#include <glog/logging.h>
namespace paddle {
namespace operators {
namespace math {
namespace detail {
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpResetOutput, bool isBatch, typename T>
__global__ void KeGruForwardResetOutput(OpResetOutput opResetOutput,
T *gateValue, T *resetOutputValue,
T *prevOutputValue, int frameSize,
int batchSize,
activation_mode_t active_gate) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
resetOutputValue += batchIdx * frameSize;
}
T rPrevOut = 0;
T rValueResetOutput;
T rValueUpdateGate = gateValue[frameIdx + frameSize * 0];
T rValueResetGate = gateValue[frameIdx + frameSize * 1];
if (prevOutputValue) {
if (isBatch) prevOutputValue += batchIdx * frameSize;
rPrevOut = prevOutputValue[frameIdx];
}
opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut, rValueResetOutput,
active_gate);
gateValue[frameIdx + frameSize * 0] = rValueUpdateGate;
gateValue[frameIdx + frameSize * 1] = rValueResetGate;
resetOutputValue[frameIdx] = rValueResetOutput;
}
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpFinalOutput, bool isBatch, typename T>
__global__ void KeGruForwardFinalOutput(OpFinalOutput opFinalOutput,
T *gateValue, T *prevOutputValue,
T *outputValue, int frameSize,
int batchSize,
activation_mode_t active_node) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
outputValue += batchIdx * frameSize;
}
T rOutput;
T rPrevOut = 0;
T rValueUpdateGate = gateValue[frameIdx + frameSize * 0];
T rValueFrameState = gateValue[frameIdx + frameSize * 2];
if (prevOutputValue) {
if (isBatch) prevOutputValue += batchIdx * frameSize;
rPrevOut = prevOutputValue[frameIdx];
}
opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput,
active_node);
gateValue[frameIdx + frameSize * 2] = rValueFrameState;
outputValue[frameIdx] = rOutput;
}
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpStateGrad, bool isBatch, typename T>
__global__ void KeGruBackwardStateGrad(OpStateGrad opStateGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *outputGrad,
int frameSize, int batchSize,
activation_mode_t active_node) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
gateGrad += batchIdx * 3 * frameSize;
outputGrad += batchIdx * frameSize;
}
T rUpdateGateGrad;
T rFrameStateGrad;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T rUpdateGateValue = gateValue[frameIdx + frameSize * 0];
T rFrameStateValue = gateValue[frameIdx + frameSize * 2];
T rOutGrad = outputGrad[frameIdx];
if (prevOutValue && prevOutGrad) {
if (isBatch) prevOutValue += batchIdx * frameSize;
rPrevOutValue = prevOutValue[frameIdx];
if (isBatch) prevOutGrad += batchIdx * frameSize;
rPrevOutGrad = prevOutGrad[frameIdx];
}
opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue,
rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad,
active_node);
gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad;
gateGrad[frameIdx + frameSize * 2] = rFrameStateGrad;
if (prevOutGrad) {
prevOutGrad[frameIdx] = rPrevOutGrad;
}
}
/*
* threads(framePerBlock, batchPerBlock)
* grid(frameBlocks, batchBlocks)
*/
template <class OpResetGrad, bool isBatch, typename T>
__global__ void KeGruBackwardResetGrad(OpResetGrad opResetGrad, T *gateValue,
T *gateGrad, T *prevOutValue,
T *prevOutGrad, T *resetOutputGrad,
int frameSize, int batchSize,
activation_mode_t active_gate) {
const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x;
if (frameIdx >= frameSize) return;
int batchIdx = 0;
if (isBatch) {
batchIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (batchIdx >= batchSize) return;
gateValue += batchIdx * 3 * frameSize;
gateGrad += batchIdx * 3 * frameSize;
resetOutputGrad += batchIdx * frameSize;
}
T rResetGateGrad;
T rPrevOutValue = 0;
T rPrevOutGrad = 0;
T rResetOutputGrad = 0;
T rUpdateGateValue = gateValue[frameIdx + frameSize * 0];
T rUpdateGateGrad = gateGrad[frameIdx + frameSize * 0];
T rResetGateValue = gateValue[frameIdx + frameSize * 1];
if (prevOutValue && prevOutGrad) {
if (isBatch) prevOutValue += batchIdx * frameSize;
if (isBatch) prevOutGrad += batchIdx * frameSize;
rPrevOutValue = prevOutValue[frameIdx];
rPrevOutGrad = prevOutGrad[frameIdx];
rResetOutputGrad = resetOutputGrad[frameIdx];
}
opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue,
rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad,
active_gate);
gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad;
gateGrad[frameIdx + frameSize * 1] = rResetGateGrad;
if (prevOutGrad) {
prevOutGrad[frameIdx] = rPrevOutGrad;
}
}
} // namespace detail
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/detail/activation_functions.h"
#include "paddle/platform/hostdevice.h"
#include <type_traits>
// TODO(guosheng): refine code style in gru_kernel
namespace paddle {
namespace operators {
namespace math {
namespace detail {
namespace forward {
template <typename T>
class gru_resetOutput {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &valueResetGate, T &prevOut,
T &valueResetOutput, activation_mode_t actGate) {
valueUpdateGate = activation(valueUpdateGate, actGate);
valueResetGate = activation(valueResetGate, actGate);
valueResetOutput = prevOut * valueResetGate;
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &valueResetGate,
__m256 &prevOut, __m256 &valueResetOutput,
activation_mode_t actGate) {
valueUpdateGate = activation(valueUpdateGate, actGate);
valueResetGate = activation(valueResetGate, actGate);
valueResetOutput = _mm256_mul_ps(prevOut, valueResetGate);
}
#endif
#endif
};
template <typename T>
class gru_finalOutput {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &valueFrameState, T &prevOut,
T &valueOutput, activation_mode_t actInput) {
valueFrameState = activation(valueFrameState, actInput);
valueOutput = prevOut - (valueUpdateGate * prevOut) +
(valueUpdateGate * valueFrameState);
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &valueFrameState,
__m256 &prevOut, __m256 &valueOutput,
activation_mode_t actInput) {
valueFrameState = activation(valueFrameState, actInput);
valueOutput = _mm256_add_ps(
_mm256_sub_ps(prevOut, _mm256_mul_ps(valueUpdateGate, prevOut)),
_mm256_mul_ps(valueUpdateGate, valueFrameState));
}
#endif
#endif
};
} // namespace forward
namespace backward {
template <typename T>
class gru_stateGrad {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &gradUpdateGate,
T &valueFrameState, T &gradFrameState,
T &valuePrevOut, T &gradPrevOut, T &gradOutput,
activation_mode_t actInput) {
gradUpdateGate = (gradOutput * valueFrameState);
gradUpdateGate -= (gradOutput * valuePrevOut);
gradPrevOut -= (gradOutput * valueUpdateGate);
gradPrevOut += gradOutput;
gradFrameState =
activation(gradOutput * valueUpdateGate, valueFrameState, actInput);
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &gradUpdateGate,
__m256 &valueFrameState, __m256 &gradFrameState,
__m256 &valuePrevOut, __m256 &gradPrevOut,
__m256 &gradOutput, activation_mode_t actInput) {
gradUpdateGate = _mm256_mul_ps(gradOutput, valueFrameState);
gradUpdateGate =
_mm256_sub_ps(gradUpdateGate, _mm256_mul_ps(gradOutput, valuePrevOut));
gradPrevOut = _mm256_add_ps(
_mm256_sub_ps(gradPrevOut, _mm256_mul_ps(gradOutput, valueUpdateGate)),
gradOutput);
gradFrameState = activation(_mm256_mul_ps(gradOutput, valueUpdateGate),
valueFrameState, actInput);
}
#endif
#endif
};
template <typename T>
class gru_resetGrad {
public:
HOSTDEVICE void operator()(T &valueUpdateGate, T &gradUpdateGate,
T &valueResetGate, T &gradResetGate,
T &valuePrevOut, T &gradPrevOut,
T &gradResetOutput, activation_mode_t actGate) {
gradResetGate = (gradResetOutput * valuePrevOut);
gradPrevOut += (gradResetOutput * valueResetGate);
gradUpdateGate = activation(gradUpdateGate, valueUpdateGate, actGate);
gradResetGate = activation(gradResetGate, valueResetGate, actGate);
}
#ifndef __NVCC__
#ifndef __AVX__
static const bool avx = false;
#else
static const bool avx = true;
HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &gradUpdateGate,
__m256 &valueResetGate, __m256 &gradResetGate,
__m256 &valuePrevOut, __m256 &gradPrevOut,
__m256 &gradResetOutput,
activation_mode_t actGate) {
gradResetGate = _mm256_mul_ps(gradResetOutput, valuePrevOut);
gradPrevOut = _mm256_add_ps(gradPrevOut,
_mm256_mul_ps(gradResetOutput, valueResetGate));
gradUpdateGate = activation(gradUpdateGate, valueUpdateGate, actGate);
gradResetGate = activation(gradResetGate, valueResetGate, actGate);
}
#endif
#endif
};
} // namespace backward
} // namespace detail
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/gru_compute.h"
#include "paddle/operators/math/detail/gru_cpu_kernel.h"
#include "paddle/operators/math/detail/gru_kernel.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
struct GRUUnitFunctor<platform::CPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, int frameSize, int batchSize,
activation_mode_t active_node,
activation_mode_t active_gate) {
#ifndef __NVCC__
if (value.prevOutValue) {
math::gemm<platform::CPUPlace, T>(
context, false, false, batchSize, frameSize * 2, frameSize, 1,
value.prevOutValue, frameSize, value.gateWeight, frameSize * 2, 1,
value.gateValue, frameSize * 3);
}
detail::forward_reset_output(detail::forward::gru_resetOutput<T>(), value,
frameSize, batchSize, active_gate);
if (value.prevOutValue) {
math::gemm<platform::CPUPlace, T>(
context, false, false, batchSize, frameSize, frameSize, 1,
value.resetOutputValue, frameSize, value.stateWeight, frameSize, 1,
value.gateValue + frameSize * 2, frameSize * 3);
}
detail::forward_final_output(detail::forward::gru_finalOutput<T>(), value,
frameSize, batchSize, active_node);
#endif
}
};
template <typename T>
struct GRUUnitGradFunctor<platform::CPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node,
activation_mode_t active_gate) {
#ifndef __NVCC__
detail::backward_state_grad(detail::backward::gru_stateGrad<T>(), value,
grad, frameSize, batchSize, active_node);
if (value.prevOutValue && grad.prevOutGrad) {
math::gemm<platform::CPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize, 1,
grad.gateGrad + frameSize * 2, frameSize * 3, value.stateWeight,
frameSize, 0, grad.resetOutputGrad, frameSize);
if (grad.stateWeightGrad) {
math::gemm<platform::CPUPlace, T>(
context, true, false, frameSize, frameSize, batchSize, 1,
value.resetOutputValue, frameSize, grad.gateGrad + frameSize * 2,
frameSize * 3, 1, grad.stateWeightGrad, frameSize);
}
}
detail::backward_reset_grad(detail::backward::gru_resetGrad<T>(), value,
grad, frameSize, batchSize, active_gate);
if (grad.prevOutGrad && value.prevOutValue) {
math::gemm<platform::CPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize * 2, 1,
grad.gateGrad, frameSize * 3, value.gateWeight, frameSize * 2, 1,
grad.prevOutGrad, frameSize);
if (grad.gateWeightGrad) {
math::gemm<platform::CPUPlace, T>(
context, true, false, frameSize, frameSize * 2, batchSize, 1,
value.prevOutValue, frameSize, grad.gateGrad, frameSize * 3, 1,
grad.gateWeightGrad, frameSize * 2);
}
}
#endif
}
};
template struct GRUUnitFunctor<platform::CPUPlace, float>;
template struct GRUUnitFunctor<platform::CPUPlace, double>;
template struct GRUUnitGradFunctor<platform::CPUPlace, float>;
template struct GRUUnitGradFunctor<platform::CPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/detail/gru_gpu_kernel.h"
#include "paddle/operators/math/detail/gru_kernel.h"
#include "paddle/operators/math/gru_compute.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
struct GRUUnitFunctor<platform::GPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, int frameSize, int batchSize,
activation_mode_t active_node,
activation_mode_t active_gate) {
auto stream =
reinterpret_cast<const platform::CUDADeviceContext &>(context).stream();
dim3 threads;
dim3 grid;
if (batchSize == 1) {
int framePerBlock = frameSize <= 1024 ? frameSize : 1024;
int frameBlocks = (frameSize + 1024 - 1) / 1024;
threads = dim3(framePerBlock, 1);
grid = dim3(frameBlocks, 1);
} else {
threads = dim3(32, 32);
grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
}
if (value.prevOutValue) {
math::gemm<platform::GPUPlace, T>(
context, false, false, batchSize, frameSize * 2, frameSize, 1,
value.prevOutValue, frameSize, value.gateWeight, frameSize * 2, 1,
value.gateValue, frameSize * 3);
}
if (batchSize == 1) {
detail::KeGruForwardResetOutput<detail::forward::gru_resetOutput<T>,
/* isBatch= */ false,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_resetOutput<T>(), value.gateValue,
value.resetOutputValue, value.prevOutValue, frameSize, batchSize,
active_gate);
} else {
detail::KeGruForwardResetOutput<detail::forward::gru_resetOutput<T>,
/* isBatch= */ true,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_resetOutput<T>(), value.gateValue,
value.resetOutputValue, value.prevOutValue, frameSize, batchSize,
active_gate);
}
if (value.prevOutValue) {
math::gemm<platform::GPUPlace, T>(
context, false, false, batchSize, frameSize, frameSize, 1,
value.resetOutputValue, frameSize, value.stateWeight, frameSize, 1,
value.gateValue + frameSize * 2, frameSize * 3);
}
if (batchSize == 1) {
detail::KeGruForwardFinalOutput<detail::forward::gru_finalOutput<T>,
/* isBatch= */ false,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_finalOutput<T>(), value.gateValue,
value.prevOutValue, value.outputValue, frameSize, batchSize,
active_node);
} else {
detail::KeGruForwardFinalOutput<detail::forward::gru_finalOutput<T>,
/* isBatch= */ true,
T><<<grid, threads, 0, stream>>>(
detail::forward::gru_finalOutput<T>(), value.gateValue,
value.prevOutValue, value.outputValue, frameSize, batchSize,
active_node);
}
}
};
template <typename T>
struct GRUUnitGradFunctor<platform::GPUPlace, T> {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node,
activation_mode_t active_gate) {
auto stream =
reinterpret_cast<const platform::CUDADeviceContext &>(context).stream();
dim3 threads;
dim3 grid;
if (batchSize == 1) {
int framePerBlock = frameSize <= 1024 ? frameSize : 1024;
int frameBlocks = (frameSize + 1024 - 1) / 1024;
threads = dim3(framePerBlock, 1);
grid = dim3(frameBlocks, 1);
} else {
threads = dim3(32, 32);
grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32);
}
if (batchSize == 1) {
detail::KeGruBackwardStateGrad<
detail::backward::gru_stateGrad<T>,
/* isBatch= */ false><<<grid, threads, 0, stream>>>(
detail::backward::gru_stateGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.outputGrad, frameSize,
batchSize, active_node);
} else {
detail::KeGruBackwardStateGrad<
detail::backward::gru_stateGrad<T>,
/* isBatch= */ true><<<grid, threads, 0, stream>>>(
detail::backward::gru_stateGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.outputGrad, frameSize,
batchSize, active_node);
}
if (value.prevOutValue && grad.prevOutGrad) {
math::gemm<platform::GPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize, 1,
grad.gateGrad + frameSize * 2, frameSize * 3, value.stateWeight,
frameSize, 0, grad.resetOutputGrad, frameSize);
if (grad.stateWeightGrad) {
math::gemm<platform::GPUPlace, T>(
context, true, false, frameSize, frameSize, batchSize, 1,
value.resetOutputValue, frameSize, grad.gateGrad + frameSize * 2,
frameSize * 3, 1, grad.stateWeightGrad, frameSize);
}
}
if (batchSize == 1) {
detail::KeGruBackwardResetGrad<
detail::backward::gru_resetGrad<T>,
/* isBatch= */ false><<<grid, threads, 0, stream>>>(
detail::backward::gru_resetGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.resetOutputGrad, frameSize,
batchSize, active_gate);
} else {
detail::KeGruBackwardResetGrad<
detail::backward::gru_resetGrad<T>,
/* isBatch= */ true><<<grid, threads, 0, stream>>>(
detail::backward::gru_resetGrad<T>(), value.gateValue, grad.gateGrad,
value.prevOutValue, grad.prevOutGrad, grad.resetOutputGrad, frameSize,
batchSize, active_gate);
}
if (grad.prevOutGrad && value.prevOutValue) {
math::gemm<platform::GPUPlace, T>(
context, false, true, batchSize, frameSize, frameSize * 2, 1,
grad.gateGrad, frameSize * 3, value.gateWeight, frameSize * 2, 1,
grad.prevOutGrad, frameSize);
if (grad.gateWeightGrad) {
math::gemm<platform::GPUPlace, T>(
context, true, false, frameSize, frameSize * 2, batchSize, 1,
value.prevOutValue, frameSize, grad.gateGrad, frameSize * 3, 1,
grad.gateWeightGrad, frameSize * 2);
}
}
}
};
template struct GRUUnitFunctor<platform::GPUPlace, float>;
template struct GRUUnitFunctor<platform::GPUPlace, double>;
template struct GRUUnitGradFunctor<platform::GPUPlace, float>;
template struct GRUUnitGradFunctor<platform::GPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/math/lstm_compute.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace operators {
namespace math {
// TODO(guosheng): refine code style in gru_compute
template <typename T>
struct hl_gru_value {
T *gateWeight;
T *stateWeight;
T *gateValue;
T *resetOutputValue;
T *outputValue;
T *prevOutValue;
};
template <typename T>
struct hl_gru_grad {
T *gateWeightGrad;
T *stateWeightGrad;
T *gateGrad;
T *resetOutputGrad;
T *outputGrad;
T *prevOutGrad;
};
template <typename Place, typename T>
struct GRUUnitFunctor {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, int frameSize, int batchSize,
activation_mode_t active_node,
activation_mode_t active_gate);
};
template <typename Place, typename T>
struct GRUUnitGradFunctor {
static void compute(const platform::DeviceContext &context,
hl_gru_value<T> value, hl_gru_grad<T> grad, int frameSize,
int batchSize, activation_mode_t active_node,
activation_mode_t active_gate);
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/sequence_pooling.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
class MaxSeqPoolFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::LoDTensor& input, framework::Tensor* output,
framework::Tensor* index) {
auto in_dims = input.dims();
auto out_dims = output->dims();
auto idx_dims = index->dims();
PADDLE_ENFORCE_GT(in_dims.size(), 1);
PADDLE_ENFORCE_GT(out_dims.size(), 1);
for (int64_t i = 1; i < in_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, out_dims);
auto starts = input.lod()[0];
const T* in_data = input.data<T>();
T* out_data = output->data<T>();
int* max_index = index->data<int>();
int64_t num_seq = out_dims[0];
int64_t dim = output->numel() / num_seq;
for (int64_t i = 0; i < num_seq; ++i) {
for (int64_t k = 0; k < dim; ++k) {
out_data[i * dim + k] = in_data[starts[i] * dim + k];
max_index[i * dim + k] = starts[i];
}
for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) {
for (int64_t k = 0; k < dim; ++k) {
if (in_data[j * dim + k] > out_data[i * dim + k]) {
out_data[i * dim + k] = in_data[j * dim + k];
max_index[i * dim + k] = j;
}
}
}
}
}
};
template <typename T>
class MaxSeqPoolGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& out_grad,
const framework::Tensor& index,
framework::LoDTensor* in_grad) {
auto og_dims = out_grad.dims();
auto ig_dims = in_grad->dims();
auto idx_dims = index.dims();
PADDLE_ENFORCE_GT(og_dims.size(), 1);
PADDLE_ENFORCE_GT(ig_dims.size(), 1);
for (int64_t i = 1; i < og_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, og_dims);
const T* og_data = out_grad.data<T>();
const int* max_index = index.data<int>();
T* ig_data = in_grad->data<T>();
SetConstant<platform::CPUPlace, T> set_zero;
set_zero(context, in_grad, static_cast<T>(0.0));
int64_t num_seq = og_dims[0];
int64_t dim = out_grad.numel() / num_seq;
for (int64_t i = 0; i < num_seq; ++i) {
for (int64_t j = 0; j < dim; ++j) {
int step_id = max_index[i * dim + j];
ig_data[step_id * dim + j] = og_data[i * dim + j];
}
}
}
};
template class MaxSeqPoolFunctor<platform::CPUPlace, float>;
template class MaxSeqPoolFunctor<platform::CPUPlace, double>;
template class MaxSeqPoolGradFunctor<platform::CPUPlace, float>;
template class MaxSeqPoolGradFunctor<platform::CPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence_pooling.h"
namespace paddle {
namespace operators {
namespace math {
#define FLT_MAX __FLT_MAX__
template <typename T>
__global__ void KeMaxSequencePool(const T* input, const size_t* starts,
T* output, int* index, int64_t num_seq,
int64_t dim) {
int dim_idx = threadIdx.x;
int seq_id = blockIdx.x;
if (seq_id >= num_seq) return;
size_t start = starts[seq_id];
size_t end = starts[seq_id + 1];
for (int64_t i = dim_idx; i < dim; i += blockDim.x) {
T max_val = static_cast<T>(-FLT_MAX);
int max_id = -1;
for (size_t step_id = start; step_id < end; step_id++) {
if (max_val < input[step_id * dim + i]) {
max_val = input[step_id * dim + i];
max_id = step_id;
}
}
output[seq_id * dim + i] = max_val;
index[seq_id * dim + i] = max_id;
}
}
template <typename T>
class MaxSeqPoolFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::LoDTensor& input, framework::Tensor* output,
framework::Tensor* index) {
auto in_dims = input.dims();
auto out_dims = output->dims();
auto idx_dims = index->dims();
PADDLE_ENFORCE_GT(in_dims.size(), static_cast<int64_t>(1));
PADDLE_ENFORCE_GT(out_dims.size(), 1);
for (int64_t i = 1; i < in_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, out_dims);
auto starts = input.lod()[0];
const T* in_data = input.data<T>();
T* out_data = output->data<T>();
int* max_index = index->data<int>();
int64_t num_seq = out_dims[0];
int64_t dim = output->numel() / num_seq;
dim3 threads(256, 1);
dim3 grid(num_seq, 1);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(context).stream();
KeMaxSequencePool<T><<<grid, threads, 0, stream>>>(
in_data, starts.data(), out_data, max_index, num_seq, dim);
}
};
template <typename T>
__global__ void KeMaxSequencePoolGrad(const T* out_grad, const int* max_index,
T* in_grad, int64_t num_seq,
int64_t dim) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
int col_idx = idx % dim;
if (idx < num_seq * dim) {
int step_id = max_index[idx];
in_grad[step_id * dim + col_idx] = out_grad[idx];
}
}
template <typename T>
class MaxSeqPoolGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& out_grad,
const framework::Tensor& index,
framework::LoDTensor* in_grad) {
auto og_dims = out_grad.dims();
auto idx_dims = index.dims();
auto ig_dims = in_grad->dims();
PADDLE_ENFORCE_GT(og_dims.size(), static_cast<int64_t>(1));
PADDLE_ENFORCE_GT(ig_dims.size(), static_cast<int64_t>(1));
for (int64_t i = 1; i < og_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]);
}
PADDLE_ENFORCE_EQ(idx_dims, og_dims);
const T* og_data = out_grad.data<T>();
const int* max_index = index.data<int>();
T* ig_data = in_grad->data<T>();
SetConstant<platform::GPUPlace, T> set_zero;
set_zero(context, in_grad, static_cast<T>(0.0));
int64_t num_seq = og_dims[0];
int64_t dim = out_grad.numel() / num_seq;
unsigned int blocks = (num_seq * dim + 128 - 1) / 128;
dim3 threads(128, 1);
dim3 grid(blocks, 1);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(context).stream();
KeMaxSequencePoolGrad<T><<<grid, threads, 0, stream>>>(
og_data, max_index, ig_data, num_seq, dim);
}
};
template class MaxSeqPoolFunctor<platform::GPUPlace, float>;
template class MaxSeqPoolFunctor<platform::GPUPlace, double>;
template class MaxSeqPoolGradFunctor<platform::GPUPlace, float>;
template class MaxSeqPoolGradFunctor<platform::GPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
#define FLT_MAX __FLT_MAX__
template <typename Place, typename T>
class MaxSeqPoolFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::LoDTensor& input, framework::Tensor* output,
framework::Tensor* index);
};
template <typename Place, class T>
class MaxSeqPoolGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& out_grad,
const framework::Tensor& index,
framework::LoDTensor* in_grad);
};
} // namespace math
} // namespace operators
} // namespace paddle
...@@ -144,7 +144,10 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -144,7 +144,10 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
)DOC") )DOC")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
The MatMul operator is used to perform (batched) matrix multiplication MatMul Operator.
This operator is used to perform (batched) matrix multiplication
over the last two dimensions of the input tensors `X` and `Y`. over the last two dimensions of the input tensors `X` and `Y`.
If a transpose flag is specified, the last two dimensions of the If a transpose flag is specified, the last two dimensions of the
...@@ -166,7 +169,8 @@ The differences are: ...@@ -166,7 +169,8 @@ The differences are:
- We add `transpose_X` and `transpose_Y` flags. - We add `transpose_X` and `transpose_Y` flags.
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -36,7 +36,11 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -36,7 +36,11 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op"); AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op"); AddOutput("Out", "The output of mean op");
AddComment(R"DOC( Mean Operator AddComment(R"DOC(
Mean Operator.
Out is a scalar which is the mean of all elements in X.
)DOC"); )DOC");
} }
}; };
......
...@@ -52,14 +52,16 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -52,14 +52,16 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Y", "The right tensor of minus operator."); AddInput("Y", "The right tensor of minus operator.");
AddOutput("Out", "The output tensor of minus operator."); AddOutput("Out", "The output tensor of minus operator.");
AddComment(R"DOC(Minus Operator AddComment(R"DOC(
Minus Operator.
Equation: Equation:
Out = X - Y $Out = X - Y$
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -43,27 +43,35 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -43,27 +43,35 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input tensor of modified huber loss op." "The input tensor of modified huber loss op. "
"X is 2-D tensor with shape [batch_size, 1]."); "X is 2-D tensor with shape [batch_size, 1].");
AddInput("Y", AddInput("Y",
"The target labels of modified huber loss op." "The target labels of modified huber loss op. "
"The shape of Y is same as X. Values of Y must be 0 or 1."); "The shape of Y is the same as X. Values of Y must be 0 or 1.");
AddOutput("IntermediateVal", AddOutput("IntermediateVal",
"Variable to save intermediate result which will be reused in " "Variable to save intermediate result which will be reused in "
"backward processing.") "backward processing.")
.AsIntermediate(); .AsIntermediate();
AddOutput("Out", "Classification loss for X."); AddOutput("Out", "Classification loss for X.");
AddComment(R"DOC( AddComment(R"DOC(
Modified huber loss is used in binary classification problem. The shape of Modified Huber Loss Operator.
input X and target Y are both [N, 1] and so is the shape of output loss.
Since target Y is not differentiable, cacluating gradient for Y is illegal. This operator is used in binary classification problem. The shape of
The formulation of modified huber loss is: input X and target Y are both [N, 1] and so is the shape of the output loss.
Since target Y is not differentiable, calculating gradient for Y is illegal.
L(y, f(x)) = max(0, 1 - yf(x))^2 for yf(x) >= -1, The formula of modified huber loss is:
-4yf(x) otherwise.
$$
Make sure the values of target label Y are in {0, 1} here. The operator will L(y, f(x)) =
\begin{cases}
(\max(0, 1 - yf(x)))^2, \text{if} \ yf(x) >= -1 \\
-4yf(x), \quad \text{otherwise}
\end{cases}
$$
Make sure the values of target label Y are in {0, 1} here. This operator will
scale values of Y to {-1, +1} when computing losses and gradients. scale values of Y to {-1, +1} when computing losses and gradients.
)DOC"); )DOC");
} }
}; };
......
...@@ -75,17 +75,23 @@ class MomentumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -75,17 +75,23 @@ class MomentumOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("VelocityOut", "(Tensor) Output updated velocity"); AddOutput("VelocityOut", "(Tensor) Output updated velocity");
AddAttr<float>("mu", "(float) Momentum coefficient"); AddAttr<float>("mu", "(float) Momentum coefficient");
AddAttr<bool>("useNesterov", "(bool) Use Nesterov Momentum") AddAttr<bool>("useNesterov",
"(bool, default false) "
"Use Nesterov Momentum")
.SetDefault(false); .SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Momentum Optimizer.
Momentum Algorithm with a flag for Nestrov Moemntum (momentum).
This optimizer has a flag for Nestrov Momentum.
velocity = mu * velocity + gradient The update equations are as follows:
if (use_nesterov):
param = param - gradient * learning_rate + mu * velocity * learning_rate $$
else: velocity = mu * velocity + gradient \\
param = param - learning_rate * velocity if (use\_nesterov): \\
param = param - gradient * learning\_rate + mu * velocity * learning\_rate \\
else: \\
param = param - learning\_rate * velocity. \\
$$
)DOC"); )DOC");
} }
......
...@@ -78,6 +78,7 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -78,6 +78,7 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("Out", "The output of mul op"); AddOutput("Out", "The output of mul op");
AddAttr<int>( AddAttr<int>(
"x_num_col_dims", "x_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `X`, R"DOC(mul_op can take tensors with more than two dimensions as input `X`,
in that case, tensors will be reshaped to a matrix. The matrix's first in that case, tensors will be reshaped to a matrix. The matrix's first
dimension(column length) will be the product of tensor's last dimension(column length) will be the product of tensor's last
...@@ -88,20 +89,24 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -88,20 +89,24 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
.EqualGreaterThan(1); .EqualGreaterThan(1);
AddAttr<int>( AddAttr<int>(
"y_num_col_dims", "y_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `Y`, R"DOC(mul_op can take tensors with more than two dimensions as input `Y`,
in that case, tensors will be reshaped to a matrix. Just like input `X`. in that case, tensors will be reshaped to a matrix. Just like input `X`.
)DOC") )DOC")
.SetDefault(1) .SetDefault(1)
.EqualGreaterThan(1); .EqualGreaterThan(1);
AddComment(R"DOC( AddComment(R"DOC(
Mul operator is used to perform matrix multiplication for input X and Y. Mul Operator.
This operator is used to perform matrix multiplication for input X and Y.
The equation is: The equation is:
Out = X * Y $$Out = X * Y$$
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`. or not. But the output only shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -66,7 +66,8 @@ class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -66,7 +66,8 @@ class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "The candidate tensors of multiplex operator.") AddInput("X", "The candidate tensors of multiplex operator.")
.AsDuplicable(); .AsDuplicable();
AddOutput("Out", "The output tensor of multiplex operator."); AddOutput("Out", "The output tensor of multiplex operator.");
AddComment(R"DOC(Multiplex operator AddComment(R"DOC(
Multiplex Operator.
Multiplex multiple tensors according to the index provided by the index tensor. Multiplex multiple tensors according to the index provided by the index tensor.
...@@ -77,10 +78,11 @@ the (Ids[i])-th tensor. ...@@ -77,10 +78,11 @@ the (Ids[i])-th tensor.
For i-th row of the output tensor: For i-th row of the output tensor:
y[i] = x_{k}[i] $$y[i] = x_{k}[i]$$
where y is the output tensor. `x_{k}` is the k-th input tensor where `y` is the output tensor, `x_{k}` is the k-th input tensor,
and `k = Ids[i]`. and `k = Ids[i]`.
)DOC"); )DOC");
} }
}; };
......
...@@ -4,10 +4,10 @@ To make the operator document itself more clear, we recommend operator names obe ...@@ -4,10 +4,10 @@ To make the operator document itself more clear, we recommend operator names obe
### OpProtoMaker names ### OpProtoMaker names
When defining an operator in Paddle, a corresponding [OpProtoMaker](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L170) (TODO: OpProtoMaker Doc)need to be defined. All the Input/Output and Attributes will write into the [OpProto](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L61) , and will be used in client language to create operator. When defining an operator in Paddle, a corresponding [OpProtoMaker](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L170) (TODO: OpProtoMaker Doc)need to be defined. All the Input/Output and Attributes will write into the [OpProto](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L61) , and will be used in client language to create operator.
- Input/Output. - Input/Output.
- Input/Output names follow the **CamelCase**. e.g. `X`, `Y`, `Matrix`, `LastAxisInMatrix`. Input/Output much more like Variables, we prefer to meaningful English words. - Input/Output names follow the **CamelCase**. e.g. `X`, `Y`, `Matrix`, `LastAxisInMatrix`. Input/Output much more like Variables, we prefer to meaningful English words.
- If an operator's Input/Output are tensors in math, not match to any meaningful words, input name should starts from `X`. e.g. `X`, `Y`, and output name should starts from `Out`. e.g. `Out`. This rule intends making operators which have few inputs/outputs unified. - If an operator's Input/Output are tensors in math, not match to any meaningful words, input name should starts from `X`. e.g. `X`, `Y`, and output name should starts from `Out`. e.g. `Out`. This rule intends making operators which have few inputs/outputs unified.
- Attribute. - Attribute.
...@@ -15,7 +15,7 @@ When defining an operator in Paddle, a corresponding [OpProtoMaker](https://gith ...@@ -15,7 +15,7 @@ When defining an operator in Paddle, a corresponding [OpProtoMaker](https://gith
- Comments. - Comments.
- Input/Output/Attr comment follow the format of **(type,default value) usage**, corresponding to which type it can be and how it will be used in the operator. e.g. Attribute in Accumulator`"gamma" `,`(float, default 1.0) Accumulation multiplier`. - Input/Output/Attr comment follow the format of **(type,default value) usage**, corresponding to which type it can be and how it will be used in the operator. e.g. Attribute in Accumulator`"gamma" `,`(float, default 1.0) Accumulation multiplier`.
- Operator comment format of` R"DOC(your comment here)DOC"`. You should explain the input/output of the operator first. If there is math calculation in this operator, you should write the equation in the comment. e.g. `Out = X + Y`. - Operator comment format of` R"DOC(your comment here)DOC"`. You should explain the input/output of the operator first. If there is math calculation in this operator, you should write the equation in the comment. e.g. `Out = X + Y`.
- Order. - Order.
- Follow the order of Input/Output, then Attribute, then Comments. See the example in best practice. - Follow the order of Input/Output, then Attribute, then Comments. See the example in best practice.
...@@ -24,7 +24,7 @@ When defining an operator in Paddle, a corresponding [OpProtoMaker](https://gith ...@@ -24,7 +24,7 @@ When defining an operator in Paddle, a corresponding [OpProtoMaker](https://gith
Here we give some examples to show how these rules will be used. Here we give some examples to show how these rules will be used.
- The operator has one input, one output. e.g.`relu`, inputs: `X`, outputs: `Out`. - The operator has one input, one output. e.g.`relu`, inputs: `X`, outputs: `Out`.
- The operator has two input, one output. e.g. `rowwise_add`, inputs : `X`, `Y`, outputs : `Out`. - The operator has two input, one output. e.g. `rowwise_add`, inputs : `X`, `Y`, outputs : `Out`.
...@@ -38,23 +38,27 @@ public: ...@@ -38,23 +38,27 @@ public:
AccumulateOpMaker(framework::OpProto *proto, AccumulateOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input tensor that has to be accumulated to the output tensor. AddInput("X", "(Tensor) The input tensor that has to be accumulated to the output tensor.
If the output size is not the same as input size, If the output size is not the same as input size,
the output tensor is first reshaped and initialized to zero, and only then, accumulation is done."); the output tensor is first reshaped and initialized to zero, and only then, accumulation is done.");
AddOutput("Out", "(Tensor) Accumulated output tensor"); AddOutput("Out", "(Tensor) Accumulated output tensor");
AddAttr<float>("gamma", "(float, default 1.0) Accumulation multiplier").SetDefault(1.0f); AddAttr<float>("gamma", "(float, default 1.0) Accumulation multiplier").SetDefault(1.0f);
AddComment(R"DOC( AddComment(R"DOC(
Accumulate operator accumulates the input tensor to the output tensor. If the Accumulate Operator.
This operator accumulates the input tensor to the output tensor. If the
output tensor already has the right size, we add to it; otherwise, we first output tensor already has the right size, we add to it; otherwise, we first
initialize the output tensor to all zeros, and then do accumulation. Any initialize the output tensor to all zeros, and then do accumulation. Any
further calls to the operator, given that no one else fiddles with the output further calls to the operator, given that no one else fiddles with the output
in the interim, will do simple accumulations. in the interim, will do simple accumulations.
Accumulation is done as shown:
Accumulation is done as follows:
Out = 1*X + gamma*Out Out = 1*X + gamma*Out
where X is the input tensor, Out is the output tensor and gamma is the multiplier where X is the input tensor, Out is the output tensor and gamma is the multiplier
argument. argument.
)DOC"); )DOC");
} }
}; };
......
...@@ -48,12 +48,17 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -48,12 +48,17 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Communicator", AddOutput("Communicator",
"Create Communicator for communicating between gpus"); "Create Communicator for communicating between gpus");
AddAttr<std::vector<int>>("gpus", "gpu id lists"); AddAttr<std::vector<int>>("gpus", "(vector<int>) GPU id lists");
AddAttr<int>("data_type", "output data type") AddAttr<int>("data_type",
"(int, default 5 (FP32)) "
"Output data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::DataType::FP32);
AddComment(R"DOC( AddComment(R"DOC(
create communicator. NCCLInit Operator.
)DOC");
Create communicator.
)DOC");
} }
}; };
...@@ -143,11 +148,15 @@ class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -143,11 +148,15 @@ class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
AddOutput("Out", "The output of AllReduce op"); AddOutput("Out", "The output of AllReduce op");
AddAttr<std::string>("reduction", AddAttr<std::string>("reduction",
"(string, default 'ncclSum') "
"{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.")
.SetDefault("ncclSum"); .SetDefault("ncclSum");
AddComment(R"DOC( AddComment(R"DOC(
AllReduce the input tensors. NCCLAllReduce Operator.
)DOC");
AllReduce the input tensors.
)DOC");
} }
}; };
...@@ -161,14 +170,20 @@ class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -161,14 +170,20 @@ class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
AddOutput("Out", "The output of Reduce op"); AddOutput("Out", "The output of Reduce op");
AddAttr<std::string>("reduction", AddAttr<std::string>("reduction",
"(string, default 'ncclSum') "
"{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.") "{'ncclMin', 'ncclMax', 'ncclProd', 'ncclSum'}.")
.SetDefault("ncclSum"); .SetDefault("ncclSum");
AddAttr<int>("root", AddAttr<int>("root",
"root gpu of the parameter. if not " "(int, default kInvalidGPUId) "
"set(platform::kInvalidGPUId). hashed by name.") "Root gpu of the parameter. If not, "
"set(platform::kInvalidGPUId). Hashed by name.")
.SetDefault(platform::kInvalidGPUId); .SetDefault(platform::kInvalidGPUId);
AddComment(R"DOC( AddComment(R"DOC(
Reduce the tensors)DOC"); NCCLReduce Operator.
Reduce the tensors.
)DOC");
} }
}; };
...@@ -182,12 +197,16 @@ class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -182,12 +197,16 @@ class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
AddOutput("Out", "The output of Bcast"); AddOutput("Out", "The output of Bcast");
AddAttr<int>("root", AddAttr<int>("root",
"root gpu of the parameter. if not " "(int, default kInvalidGPUId) "
"set(platform::kInvalidGPUId). hashed by name.") "Root gpu of the parameter. If not, "
"set(platform::kInvalidGPUId). Hashed by name.")
.SetDefault(platform::kInvalidGPUId); .SetDefault(platform::kInvalidGPUId);
AddComment(R"DOC( AddComment(R"DOC(
Bcast the tensors. NCCLBcast Operator.
)DOC");
Bcast the tensors.
)DOC");
} }
}; };
......
...@@ -54,41 +54,44 @@ class PadOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -54,41 +54,44 @@ class PadOpMaker : public framework::OpProtoAndCheckerMaker {
"The input of pad op. " "The input of pad op. "
"The input should be a k-D tensor(k > 0 and k < 7)"); "The input should be a k-D tensor(k > 0 and k < 7)");
AddOutput("Out", AddOutput("Out",
"The output of pad op." "The output of pad op. "
"A tensor with the same shape as X."); "A tensor with the same shape as X.");
AddAttr<std::vector<int>>(
"paddings",
"(vector<int>) "
"A list<int> to describe the padding rules for each dimension. "
"For 2-D image tensor, paddings=[0, 1, 2, 3] means "
"padding 0 row to top, 1 row to bottom, 2 columns to left "
"and 3 columns to right. Size of paddings should be equal to "
"2 * dimension size of the input tensor.");
AddAttr<float>("pad_value",
"(float, default 0.0) "
"The value to fill the padded areas.")
.SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
Pad input into output, as specified by paddings and pad_value. The input should be a k-D tensor(k > 0 and k < 7). As an example: Pad Operator.
Pad input into output, as specified by paddings and pad_value.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
Given: Given:
X = [[1, 2], X = [[1, 2],
[3, 4]] [3, 4]],
and
paddings = [0, 1, 1, 2] paddings = [0, 1, 1, 2],
and and
pad_value = 0 pad_value = 0,
then we get we have:
Out = [[0, 1, 2, 0, 0] Out = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0] [0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]] [0, 0, 0, 0, 0]]
)DOC"); )DOC");
AddAttr<std::vector<int>>(
"paddings",
"A list<int> to describes padding rules for each dimension."
" For 2-D image tensor, paddings=[0, 1, 2, 3] means"
" padding 0 row to top, 1 row to bottom, 2 columns to left"
" and 3 columns to right.Size of paddings should be equal to"
" 2 * dimension size of input tensor.");
AddAttr<float>("pad_value",
"(float) default to 0; "
"The value to fill padded areas.")
.SetDefault(0.0f);
} }
}; };
......
...@@ -73,125 +73,138 @@ Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto, ...@@ -73,125 +73,138 @@ Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto,
AddInput( AddInput(
"X", "X",
"(Tensor) The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H and W is the height and width of feature."); "number of channels, H is the height of the feature, "
"and W is the width of the feature.");
AddOutput("Out", AddOutput("Out",
"(Tensor) The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator. "
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW, "
"Where N is batch size, C is " "where N is batch size, C is the number of channels, "
"the number of channels, H and W is the height and " "H is the height of the feature, "
"width of feature."); "and W is the width of the feature.");
AddAttr<std::string>("poolingType", AddAttr<std::string>("poolingType",
"(string), pooling type, can be \"max\" for max-pooling " "(string), pooling type, can be \"max\" for max-pooling "
"and \"avg\" for average-pooling.") "and \"avg\" for average-pooling.")
.InEnum({"max", "avg"}); .InEnum({"max", "avg"});
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>("ksize",
"(vector ), the pooling window size(height, width) " "(vector<int>) The pooling window "
"of pooling operator." "size(height, width) of the pooling operator. "
"If globalPooling = true, ksize and paddings will " "If globalPooling = true, ksize and paddings will "
"be ignored."); // TODO(Chengduo): Add checker. "be ignored."); // TODO(Chengduo): Add checker.
// (Currently, // (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>("globalPooling", AddAttr<bool>("globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings will be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>("strides",
"strides", "(vector<int>, default {1, 1}), strides(height, "
"(vector, default:{1, 1}), strides(height, width) of pooling operator.") "width) of pooling operator.")
.SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0,0}), paddings(height, width) of pooling operator." "(vector<int>, defalut {0,0}), paddings(height, width) of pooling "
"operator."
"If globalPooling = true, paddings and ksize will be ignored.") "If globalPooling = true, paddings and ksize will be ignored.")
.SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
Pool2d Operator.
The pooling2d operation calculates the output based on The pooling2d operation calculates the output based on
the input, poolingType and ksize, strides, paddings parameters. the input, poolingType and ksize, strides, paddings parameters.
Input(X) and output(Out) are in NCHW format. Where N is batch size, C is the Input(X) and output(Out) are in NCHW format, where N is batch size, C is the
number of channels, H and W is the height and width of feature. number of channels, H is the height of the feature, and W is the width of the feature.
Parameters(ksize, strides, paddings) are two elements. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively. These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different. The input(X) size and output(Out) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, H_in, W_in) X shape: $(N, C, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, H_out, W_out) Out shape: $(N, C, H_{out}, W_{out})$
where where
H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1
$$
)DOC"); )DOC");
} }
Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto, Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("X",
"X", "(Tensor) The input tensor of pooling operator. "
"(Tensor) The input tensor of pooling operator. " "The format of input tensor is NCDHW, where N is batch size, C is "
"The format of input tensor is NCDHW. Where N is batch size, C is " "the number of channels, and D, H and W is the depth, height and "
"the number of channels, D, H and W is the depth, height and width of " "width of "
"feature."); "the feature, respectively.");
AddOutput("Out", AddOutput("Out",
"(Tensor) The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator."
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW, "
"Where N is batch size, C is " "where N is batch size, C is "
"the number of channels, D, H and W is the depth, height and " "the number of channels, and D, H and W is the depth, height and "
"width of feature."); "width of the feature, respectively.");
AddAttr<std::string>("poolingType", AddAttr<std::string>("poolingType",
"(string), pooling type, can be \"max\" for max-pooling " "(string) Pooling type, can be \"max\" for max-pooling "
"and \"avg\" for average-pooling.") "and \"avg\" for average-pooling.")
.InEnum({"max", "avg"}); .InEnum({"max", "avg"});
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>(
"(vector ), the pooling window size(depth, height, " "ksize",
"width) of pooling " "(vector<int>) The pooling window size(depth, height, "
"operator." "width) of pooling operator. "
"If globalPooling = true, ksize and paddings wille " "If globalPooling = true, ksize and paddings will "
"be ignored."); // TODO(Chengduo): Add checker. "be ignored."); // TODO(Chengduo): Add checker.
// (Currently, // (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>("globalPooling", AddAttr<bool>("globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings wille be ignored.") "If globalPooling = true, ksize and paddings wille be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>(
"(vector, default:{1,1,1}), strides(depth, height, " "strides",
"width) of pooling operator.") "(vector<int>, default {1,1,1}) Strides(depth, height, "
"width) of the pooling operator.")
.SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0,0,0}), paddings(depth, height, " "(vector<int>, defalut {0,0,0}), paddings(depth, height, "
"width) of pooling operator." "width) of pooling operator. "
"If globalPooling = true, ksize and paddings wille be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
Pool3d Operator.
The pooling3d operation calculates the output based on The pooling3d operation calculates the output based on
the input, poolingType and ksize, strides, paddings parameters. the input, poolingType, ksize, strides, and paddings parameters.
Input(X) and output(Out) are in NCDHW format. Where N is batch Input(X) and output(Out) are in NCDHW format, where N is batch
size, C is the number of channels, D, H and W is the depth, height and size, C is the number of channels, and D, H and W are the depth, height and
width of feature. Parameters(ksize, strides, paddings) are three elements. width of the feature, respectively. Parameters(ksize, strides, paddings)
These three elements represent depth, height and width, respectively. are three elements. These three elements represent depth, height and
The input(X) size and output(Out) size may be different. width, respectively. The input(X) size and output(Out) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, D_in, H_in, W_in) X shape: $(N, C, D_{in}, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, D_out, H_out, W_out) Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
where where
D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\
W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1
$$
)DOC"); )DOC");
} }
} // namespace operators } // namespace operators
......
...@@ -89,64 +89,73 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -89,64 +89,73 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
"(Tensor), the input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H and W is the height and width of image."); "number of channels, H is the height of the image, "
"and W is the width of the image.");
AddOutput("Out", AddOutput("Out",
"(Tensor), the output tensor of pooling operator." "(Tensor) The output tensor of pooling operator. "
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW, "
"Where N is batch size, C is " "where N is batch size, C is "
"the number of channels, H and W is the height and " "the number of channels, H is the height of the image "
"width of image."); "and W is the width of the image.");
AddOutput("Mask", AddOutput("Mask",
"(Tensor), the Mask tensor of pooling operator." "(Tensor) The Mask tensor of pooling operator."
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW, "
"Where N is batch size, C is the number of channels, H and W " "where N is batch size, C is the number of channels, "
"is the height and width of image." "H is the height of the image, "
"The value in it is the index in current feature map"); "and W is the width of the image. "
"It represents the index in the current feature map.");
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>("ksize",
"(vector ), the pooling window size(height, " "(vector<int>) The pooling window size(height, "
"width) of pooling operator." "width) of pooling operator. "
"If globalPooling = true, ksize and paddings " "If globalPooling = true, ksize and paddings "
"will be ignored."); // TODO(Chengduo): Add "will be ignored."); // TODO(Chengduo): Add
// checker. (Currently, // checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>( AddAttr<bool>(
"globalPooling", "globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings will be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>("strides",
"strides", "(vector<int>, default {1, 1}), strides(height, "
"(vector, default:{1, 1}), strides(height, width) of pooling operator.") "width) of pooling operator.")
.SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0, 0}), paddings(height, width) of pooling operator." "(vector<int>, defalut {0, 0}), paddings(height, width) of pooling "
"operator. "
"If globalPooling = true, paddings and will be ignored.") "If globalPooling = true, paddings and will be ignored.")
.SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
MaxPool2d Operator.
The maxPooling2d with index operation calculates the output and the mask The maxPooling2d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters. Input(X) and based on the input, ksize, strides, and paddings parameters. Input(X) and
output(Out, Mask) are in NCHW format. Where N is batch size, C is the output(Out, Mask) are in NCHW format, where N is batch size, C is the
number of channels, H and W is the height and width of feature. number of channels, H is the height of the feature,
and W is the width of the feature.
Parameters(ksize, strides, paddings) are two elements. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively. These two elements represent height and width, respectively.
The input(X) size and output(Out, Mask) size may be different. The input(X) size and output(Out, Mask) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, H_in, W_in) X shape: $(N, C, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, H_out, W_out) Out shape: $(N, C, H_{out}, W_{out})$
Mask shape: (N, C, H_out, W_out) Mask shape: $(N, C, H_{out}, W_{out})$
where where
H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; H_{out} = (H_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_{out} = (W_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1
$$
)DOC"); )DOC");
} }
}; };
...@@ -156,70 +165,76 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -156,70 +165,76 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
MaxPool3dWithIndexOpMaker(framework::OpProto *proto, MaxPool3dWithIndexOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("X",
"X", "(Tensor) The input tensor of pooling operator. "
"(Tensor), the input tensor of pooling operator. " "The format of input tensor is NCDHW, where N is batch size, C is "
"The format of input tensor is NCDHW. Where N is batch size, C is " "the number of channels, and D, H and W are the depth, height and "
"the number of channels, D, H and W is the depth, height and width of " "width of "
"image."); "the image, respectively");
AddOutput("Out", AddOutput("Out",
"(Tensor), the output tensor of pooling operator." "(Tensor) The output tensor of pooling operator. "
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW, "
"Where N is batch size, C is " "where N is the batch size, C is the number of channels, "
"the number of channels, D, H and W is the depth, height and " "and D, H and W are the depth, height and "
"width of image."); "width of the image, respectively.");
AddOutput("Mask", AddOutput("Mask",
"(Tensor), the Mask tensor of pooling operator." "(Tensor) The Mask tensor of pooling operator. "
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW, "
"Where N is batch size, C is the number of channels, D, H and W " "where N is the batch size, C is the number of channels, and "
"is the depth, height and width of image." "D, H and W are the depth, height and width "
"The value in it is the index in current feature map"); "of the image, respectively. "
"It represents the index in the current feature map.");
AddAttr<std::vector<int>>("ksize", AddAttr<std::vector<int>>("ksize",
"(vector), the pooling window size(depth, " "(vector<int>) The pooling window size(depth, "
"height, width) of pooling " "height, width) of pooling operator. "
"operator."
"If globalPooling = true, ksize and paddings " "If globalPooling = true, ksize and paddings "
"will be ignored."); // TODO(Chengduo): Add "will be ignored."); // TODO(Chengduo): Add
// checker. (Currently, // checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>( AddAttr<bool>(
"globalPooling", "globalPooling",
"(bool default: false), whether to use the global pooling." "(bool, default false) Whether to use the global pooling. "
"If globalPooling = true, ksize and paddings will be ignored.") "If globalPooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"(vector, default:{1,1,1}), strides(depth, " "(vector<int>, default {1,1,1}), strides(depth, "
"height, width) of pooling operator.") "height, width) of pooling operator.")
.SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector defalut:{0,0,0}), paddings(depth, " "(vector, defalut {0,0,0}), paddings(depth, "
"height, width) of pooling operator." "height, width) of pooling operator. "
"If globalPooling = true, paddings and ksize will be ignored.") "If globalPooling = true, paddings and ksize will be ignored.")
.SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
MaxPool3d Operator.
The maxpooling3d with index operation calculates the output and the mask The maxpooling3d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters. based on the input and ksize, strides, paddings parameters.
Input(X) and output(Out, Mask) are in NCDHW format. Where N is batch Input(X) and output(Out, Mask) are in NCDHW format, where N is batch
size, C is the number of channels, D, H and W is the depth, height and size, C is the number of channels, and D, H and W are the depth, height and
width of feature. Parameters(ksize, strides, paddings) are three elements. width of the feature, respectively.
Parameters(ksize, strides, paddings) are three elements.
These three elements represent depth, height and width, respectively. These three elements represent depth, height and width, respectively.
The input(X) size and output(Out, Mask) size may be different. The input(X) size and output(Out, Mask) size may be different.
Example: Example:
Input: Input:
X shape: (N, C, D_in, H_in, W_in) X shape: $(N, C, D_{in}, H_{in}, W_{in})$
Output: Output:
Out shape: (N, C, D_out, H_out, W_out) Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
Mask shape: (N, C, D_out, H_out, W_out) Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$
where where
D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1; $$
H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1; D_{out} = (D_{in} - ksize[0] + 2 * paddings[0]) / strides[0] + 1 \\
W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1; H_{out} = (H_{in} - ksize[1] + 2 * paddings[1]) / strides[1] + 1 \\
W_{out} = (W_{in} - ksize[2] + 2 * paddings[2]) / strides[2] + 1
$$
)DOC"); )DOC");
} }
}; };
......
...@@ -92,76 +92,78 @@ class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -92,76 +92,78 @@ class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("MaxProbs", AddInput("MaxProbs",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<float>) A 2-D tensor with shape N x 1, "
"where N is the batch size. Each row contains the max probability " "where N is the batch size. Each row contains the max probability "
"of an instance which computed by the previous top_k (k=1) " "of an instance which computed by the previous top_k (k=1) "
"operator."); "operator.");
AddInput("Indices", AddInput("Indices",
"(Tensor, default Tensor<int>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<int>) A 2-D tensor with shape N x 1, "
"where N is the batch size. Each row contains the corresponding " "where N is the batch size. Each row contains the corresponding "
"index which computed by the previous top_k (k=1) operator."); "index which computed by the previous top_k (k=1) operator.");
AddInput("Labels", AddInput("Labels",
"(Tensor, default Tensor<int>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<int>) A 2-D tensor with shape N x 1, "
"where N is the batch size. Each element is a label and the " "where N is the batch size. Each element is a label and the "
"value should be in [0, class_number - 1]."); "value should be in [0, class_number - 1].");
AddInput("Weights", AddInput("Weights",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x 1, " "(Tensor, default Tensor<float>) A 2-D tensor with shape N x 1, "
"where N is the batch size. This input is optional. If provided, " "where N is the batch size. This input is optional. If provided, "
"weight of instance would be considered when computing metrics.") "weight of instance would be considered when computing metrics.")
.AsDispensable(); .AsDispensable();
AddInput("StatesInfo", AddInput("StatesInfo",
"(Tensor, default Tensor<int>), a 2-D tensor with shape D x 4, " "(Tensor, default Tensor<int>) A 2-D tensor with shape D x 4, "
"where D is the number of classes. This input is optional. If " "where D is the number of classes. This input is optional. If "
"provided, current state will be accumulated to this state and " "provided, current state will be accumulated to this state and "
"the accumulation state will be as the output state.") "the accumulation state will be the output state.")
.AsDispensable(); .AsDispensable();
AddOutput("BatchMetrics", AddOutput("BatchMetrics",
"(Tensor, default Tensor<float>), a 1-D tensor with shape {6}." "(Tensor, default Tensor<float>) A 1-D tensor with shape {6}. "
"This output tensor contains metrics for current batch data." "This output tensor contains metrics for current batch data. "
"The layout is [macro average precision, macro average recall, " "The layout is [macro average precision, macro average recall, "
"macro f1 score, micro average precision, micro average recall, " "macro f1 score, micro average precision, micro average recall, "
"micro f1 score]"); "micro f1 score].");
AddOutput("AccumMetrics", AddOutput("AccumMetrics",
"(Tensor, default Tensor<float>), a 1-D tensor with shape {6}." "(Tensor, default Tensor<float>) A 1-D tensor with shape {6}. "
"This output tensor contains metrics for accumulated data." "This output tensor contains metrics for accumulated data. "
"The layout is [macro average precision, macro average recall, " "The layout is [macro average precision, macro average recall, "
"macro f1 score, micro average precision, micro average recall, " "macro f1 score, micro average precision, micro average recall, "
"micro f1 score]"); "micro f1 score].");
AddOutput("AccumStatesInfo", AddOutput("AccumStatesInfo",
"(Tensor, default Tensor<float>), a 2-D tensor with shape D x 4, " "(Tensor, default Tensor<float>) A 2-D tensor with shape D x 4, "
"where D is equal to class number. This output tensor contains " "where D is equal to class number. This output tensor contains "
"accumulated state variables used to compute metrics. The layout " "accumulated state variables used to compute metrics. The layout "
"for each class is [true positives, false positives, " "for each class is [true positives, false positives, "
"true negatives, false negatives]."); "true negatives, false negatives].");
AddAttr<int>("class_number", "Number of classes to be evaluated."); AddAttr<int>("class_number", "(int) Number of classes to be evaluated.");
AddComment(R"DOC( AddComment(R"DOC(
When given 'Input(Indices)' and 'Input(Labels)', this operator can be used Precision Recall Operator.
When given Input(Indices) and Input(Labels), this operator can be used
to compute various metrics including: to compute various metrics including:
- macro average precision 1. macro average precision
- macro average recall 2. macro average recall
- macro f1 score 3. macro f1 score
- micro average precision 4. micro average precision
- micro average recall 5. micro average recall
- micro f1 score 6. micro f1 score
To compute the above metrics, we need to do statistics for true positives, To compute the above metrics, we need to do statistics for true positives,
false positives and false negatives. Here count of true negatives is not false positives and false negatives. Here the count of true negatives is not
necessary, but counting it may provide potential usage and the cost is necessary, but counting it may provide potential usage and the cost is
trivial, so the operator also provides count of true negatives. trivial, so the operator also provides the count of true negatives.
We define state as a 2-D tensor with shape [class_number, 4]. Each row of a We define state as a 2-D tensor with shape [class_number, 4]. Each row of a
state contains statistic variables for corresponding class. Layout of each row state contains statistic variables for corresponding class. Layout of each row
is: TP(true positives), FP(false positives), TN(true negatives), is: TP(true positives), FP(false positives), TN(true negatives),
FN(false negatives). If 'Input(Weights)' provided, TP, FP, TN, FN will be FN(false negatives). If Input(Weights) is provided, TP, FP, TN, FN will be
calculated by given weight instead of instance count. calculated by given weight instead of the instance count.
This operator also supports metrics computing for cross-batch situation. To This operator also supports metrics computing for cross-batch situation. To
achieve this, 'Input(StatesInfo)' should be provided. State of current batch achieve this, Input(StatesInfo) should be provided. State of current batch
data will be accumulated to 'Input(StatesInfo)' and 'Output(AccumStatesInfo)' data will be accumulated to Input(StatesInfo) and Output(AccumStatesInfo)
is the accumulation state. is the accumulation state.
'Output(BatchMetrics)' is metrics of current batch data while Output(BatchMetrics) is metrics of current batch data while
'Output(AccumStatesInfo)' is metrics of accumulation data. Output(AccumStatesInfo) is metrics of accumulation data.
)DOC"); )DOC");
} }
......
...@@ -41,17 +41,24 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -41,17 +41,24 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of prelu operator."); AddInput("X", "The input tensor of prelu operator.");
AddInput("Alpha", "The alpha weight of PRelu operator."); AddInput("Alpha", "The alpha weight of prelu operator.");
AddOutput("Out", "The output tensor of PRelu operator."); AddOutput("Out", "The output tensor of prelu operator.");
AddComment(R"DOC(PRelu operator AddComment(R"DOC(
PRelu Operator.
The equation is: The equation is:
f(x) = alpha * x , for x < 0 $$
f(x) = x , for x >= 0 f(x) =
\begin{cases}
\alpha * x, \quad \text{if} \ x < 0 \\
x, \qquad \text{if} \ x >= 0
\end{cases}
$$
The input `X` can carry the LoD (Level of Details) information, The input `X` can carry the LoD (Level of Details) information,
or not. And the output shares the LoD with input `X`. or not. And the output shares the LoD information with input `X`.
)DOC"); )DOC");
} }
}; };
......
...@@ -83,22 +83,26 @@ class ProximalAdagradOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -83,22 +83,26 @@ class ProximalAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
"L1 regularization strength.") "L1 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddAttr<float>("l2", AddAttr<float>("l2",
"(float, default 0.0)" "(float, default 0.0) "
"L2 regularization strength.") "L2 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
Proximal Adagrad Optimizer.
Optimizer that implements the proximal adagrad algorithm. Optimizer that implements the proximal adagrad algorithm:
moment = moment + grad * grad $$
prox_param = param - learning_rate * grad * (1 / sqrt(moment)) moment = moment + grad * grad \\
param = sign(prox_param) / (1 + learning_rate * l2) * prox\_param = param - learning\_rate * grad * (1 / \sqrt{moment}) \\
max { |prox_param| - learning_rate * l1 , 0 } param = sign(prox\_param) / (1 + learning\_rate * l2) *
\max(|prox\_param| - learning\_rate * l1 , 0)
$$
The paper that proposed Proximal GD: The paper that proposed Proximal GD:
(http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf) (http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf)
Here, we use the adagrad learning rate as specified here: Here, we use the adagrad learning rate as specified here:
(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) (http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
)DOC"); )DOC");
} }
}; };
......
...@@ -67,19 +67,23 @@ class ProximalGDOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -67,19 +67,23 @@ class ProximalGDOpMaker : public framework::OpProtoAndCheckerMaker {
"L1 regularization strength.") "L1 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddAttr<float>("l2", AddAttr<float>("l2",
"(float, default 0.0)" "(float, default 0.0) "
"L2 regularization strength.") "L2 regularization strength.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
ProximalGD Operator.
Optimizer that implements the proximal gradient descent algorithm. Optimizer that implements the proximal gradient descent algorithm:
prox_param = param - learning_rate * grad $$
param = sign(prox_param) / (1 + learning_rate * l2) * prox\_param = param - learning\_rate * grad \\
max { |prox_param| - learning_rate * l1 , 0 } param = sign(prox\_param) / (1 + learning\_rate * l2) *
\max(|prox\_param| - learning\_rate * l1, 0)
$$
The paper that proposed Proximal Gradient Descent: The paper that proposed Proximal Gradient Descent:
(http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf) (http://papers.nips.cc/paper/3793-efficient-learning-using-forward-backward-splitting.pdf)
)DOC"); )DOC");
} }
}; };
......
...@@ -26,9 +26,9 @@ class RankLossOp : public framework::OperatorWithKernel { ...@@ -26,9 +26,9 @@ class RankLossOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
// input check // input check
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null.");
auto label_dims = ctx->GetInputDim("Label"); auto label_dims = ctx->GetInputDim("Label");
auto left_dims = ctx->GetInputDim("Left"); auto left_dims = ctx->GetInputDim("Left");
...@@ -50,32 +50,32 @@ class RankLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -50,32 +50,32 @@ class RankLossOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Label", AddInput("Label",
"The label indicating A ranked higher than B or not, row vector."); "The label indicating A ranked higher than B or not, row vector.");
AddInput("Left", "The output of RankNet for doc A, vector."); AddInput("Left", "The output of RankNet for doc A, vector.");
AddInput("Right", "The output of RankNet for doc B, vetor"); AddInput("Right", "The output of RankNet for doc B, vetor.");
AddOutput("Out", "The output loss of RankLoss operator, vector."); AddOutput("Out", "The output loss of RankLoss operator, vector.");
AddComment(R"DOC(RankLoss operator AddComment(R"DOC(
RankLoss Operator.
Rank loss operator for RankNet[1]. RankNet is a pairwise ranking model with RankLoss operator for RankNet
(http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf).
RankNet is a pairwise ranking model with
one training sample consisting of a pair of doc A and B, and the label P one training sample consisting of a pair of doc A and B, and the label P
indicating that A is ranked higher than B or not: indicating that A is ranked higher than B or not:
P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of P = {0, 1} or {0, 0.5, 1}, where 0.5 means no information about the rank of
the input pair. the input pair.
The RankLoss operator contains three inputs: Left (o_i), Right (o_j) and Label The RankLoss operator takes three inputs: Left (o_i), Right (o_j) and Label
(P_{i,j}), which represent the output of RankNet for two docs and the label (P_{i,j}), which represent the output of RankNet for the two docs and the label,
respectively, and yields the rank loss C_{i,j} by following the expression respectively, and yields the rank loss C_{i,j} using the following equation:
\f[ \f$$
C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\ C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}}) \\
o_{i,j} = o_i - o_j \\ o_{i,j} = o_i - o_j \\
\tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \} \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
\f] \f$$
The operator can take inputs of one sample or in batch. The operator can take inputs of one sample or in batch.
[1]. Chris Burges, Tal Shaked, Erin Renshaw, et al. Learning to
Rank using Gradient Descent.
http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf
)DOC"); )DOC");
} }
}; };
......
...@@ -509,14 +509,14 @@ class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -509,14 +509,14 @@ class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker {
AddInput(kInitialStates, "rnn initial states").AsDuplicable(); AddInput(kInitialStates, "rnn initial states").AsDuplicable();
AddInput(kParameters, AddInput(kParameters,
"Parameters are used by step block as its input. However, the " "Parameters are used by step block as its input. However, the "
"inputs is not a sequence tensor. Every time step, each operator " "input is not a sequence tensor. Every time step, each operator "
"in step block just use the parameter directly") "in step block just use the parameter directly.")
.AsDuplicable(); .AsDuplicable();
AddOutput(kOutputs, AddOutput(kOutputs,
"The output sequence of RNN. The sequence length must be same") "The output sequence of RNN. The sequence length must be same.")
.AsDuplicable(); .AsDuplicable();
AddOutput(kStepScopes, AddOutput(kStepScopes,
"StepScopes contains all local variables in each time step."); "StepScopes contain all local variables in each time step.");
AddAttr<std::vector<std::string>>(kExStates, AddAttr<std::vector<std::string>>(kExStates,
string::Sprintf( string::Sprintf(
R"DOC(The ex-state variable names. R"DOC(The ex-state variable names.
...@@ -556,10 +556,12 @@ if reverse is True ...@@ -556,10 +556,12 @@ if reverse is True
o o o o o o o o
)DOC").SetDefault(false); )DOC").SetDefault(false);
AddAttr<bool>(kIsTrain, "").SetDefault(true); AddAttr<bool>(kIsTrain, "").SetDefault(true);
AddComment(R"DOC(Static Length Recurrent Operator AddComment(R"DOC(
Static Length Recurrent Operator.
The static length recurrent operator can only operate on fixed size sequence
data, i.e. in each mini-batch, the sequence length of all inputs are the same.
The static length recurrent operator can only operate on fix sized sequence
data, i.e. in each mini-batch, the sequence length of all inputs are same.
)DOC"); )DOC");
} }
}; };
......
...@@ -80,24 +80,27 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -80,24 +80,27 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ReduceOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ReduceOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput("X",
"X", "(Tensor) The input tensor. Tensors with rank at most 6 are "
"(Tensor) The input tensor. Tensors with rank at most 6 are supported"); "supported.");
AddOutput("Out", "(Tensor) The result tensor."); AddOutput("Out", "(Tensor) The result tensor.");
AddAttr<int>( AddAttr<int>(
"dim", "dim",
"(int, default 1) The dimension to reduce. " "(int, default 0) The dimension to reduce. "
"Must be in the range [-rank(input), rank(input)). " "Must be in the range [-rank(input), rank(input)). "
"If `dim < 0`, the dim to reduce is `rank + dim`. " "If `dim < 0`, the dim to reduce is `rank + dim`. "
"Noting that reducing on the first dim will make the LoD info lost.") "Note that reducing on the first dim will make the LoD info lost.")
.SetDefault(0); .SetDefault(0);
AddAttr<bool>("keep_dim", AddAttr<bool>("keep_dim",
"(bool, default false) " "(bool, default false) "
"If true, retain the reduced dimension with length 1.") "If true, retain the reduced dimension with length 1.")
.SetDefault(false); .SetDefault(false);
comment_ = R"DOC( comment_ = R"DOC(
{ReduceOP} operator computes the {reduce} of input tensor along the given dimension. {ReduceOp} Operator.
The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
This operator computes the {reduce} of input tensor along the given dimension.
The result tensor has 1 fewer dimension than the input unless keep_dim is true.
)DOC"; )DOC";
AddComment(comment_); AddComment(comment_);
} }
......
...@@ -71,8 +71,11 @@ class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -71,8 +71,11 @@ class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of reshape operator."); AddInput("X", "The input tensor of reshape operator.");
AddOutput("Out", "The output tensor of reshape operator."); AddOutput("Out", "The output tensor of reshape operator.");
AddAttr<std::vector<int>>("shape", "Target shape of reshape operator."); AddAttr<std::vector<int>>("shape",
AddComment(R"DOC(Reshape operator "(vector<int>) "
"Target shape of reshape operator.");
AddComment(R"DOC(
Reshape Operator.
Reshape Input(X) into the shape specified by Attr(shape). Reshape Input(X) into the shape specified by Attr(shape).
...@@ -81,7 +84,7 @@ Given a 2-D tensor X with 2 rows and 2 columns ...@@ -81,7 +84,7 @@ Given a 2-D tensor X with 2 rows and 2 columns
[[1, 2], [3, 4]] [[1, 2], [3, 4]]
with target shape = [1, 4], the reshape operator will transform and target shape = [1, 4], the reshape operator will transform
the tensor X into a 1-D tensor: the tensor X into a 1-D tensor:
[1, 2, 3, 4] [1, 2, 3, 4]
......
...@@ -68,22 +68,22 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -68,22 +68,22 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", AddInput("Param",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
"Input parameter value that has to be updated"); "Input parameter value that has to be updated.");
AddInput("MeanSquare", AddInput("MeanSquare",
"(Tensor, default Tensor<float>)" "(Tensor, default Tensor<float>)"
" The mean square value that gets updated"); " The mean square value that gets updated.");
AddInput("LearningRate", AddInput("LearningRate",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
"The learning rate should be a tensor of size 1"); "The learning rate should be a tensor of size 1.");
AddInput("Grad", AddInput("Grad",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
"Input gradient of the parameter"); "Input gradient of the parameter.");
AddInput("Moment", AddInput("Moment",
"(Tensor, default Tensor<float>) The moment that gets updated"); "(Tensor, default Tensor<float>) The moment that gets updated.");
AddOutput("ParamOut", "(Tensor) Output updated parameter value"); AddOutput("ParamOut", "(Tensor) Output updated parameter value.");
AddOutput("MomentOut", "(Tensor) Output updated moment"); AddOutput("MomentOut", "(Tensor) Output updated moment.");
AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value"); AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value.");
AddAttr<float>("epsilon", AddAttr<float>("epsilon",
"(float, default 1e-10) Constant " "(float, default 1e-10) Constant "
...@@ -93,18 +93,19 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -93,18 +93,19 @@ class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
"(float, default 0.9) " "(float, default 0.9) "
"Discounting factor for coming gradient.") "Discounting factor for coming gradient.")
.SetDefault(0.9f); .SetDefault(0.9f);
AddAttr<float>("momentum", "(float, default 0.0) Constant value") AddAttr<float>("momentum", "(float, default 0.0) Constant value.")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
Rmsprop Optimizer.
RMSprop $$
MeanSquareOut = decay * MeanSquare + (1 - decay) * Grad * Grad \\
MeanSquareOut = decay * MeanSquare + (1 - decay) * Grad * Grad
MomentOut = momentum * Moment + MomentOut = momentum * Moment +
LearningRate * Grad / sqrt(MeanSquareOut + epsilon) \frac{LearningRate * Grad}{\sqrt{MeanSquareOut + epsilon}} \\
ParamOut = Param - MomentOut ParamOut = Param - MomentOut
$$
The original slides that proposed RMSprop: Slide 29 of The original slides that proposed Rmsprop: Slide 29 of
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
)DOC"); )DOC");
......
...@@ -53,8 +53,10 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -53,8 +53,10 @@ class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker {
"(LodTensor)The output of seq_expand op." "(LodTensor)The output of seq_expand op."
"The lod of output will be as same as input(Y)'s lod."); "The lod of output will be as same as input(Y)'s lod.");
AddComment(R"DOC( AddComment(R"DOC(
Expand input(X) according to LOD of input(Y). Seq Expand Operator.
This operator expands input(X) according to LOD of input(Y).
Following are cases to better explain how this works:
Case 1: Case 1:
Given 2-level a LoDTensor input(X) Given 2-level a LoDTensor input(X)
......
...@@ -68,11 +68,12 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -68,11 +68,12 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
"The level should be less than the level number of inputs.") "The level should be less than the level number of inputs.")
.SetDefault(0); .SetDefault(0);
AddComment(R"DOC( AddComment(R"DOC(
Sequence Concat operator Sequence Concat Operator.
The sequence_concat operator concatenates multiple LoDTensors. The sequence_concat operator concatenates multiple LoDTensors.
It only supports sequence (LoD Tensor with level number is 1) It supports a sequence (LoD Tensor with level number is 1)
or a nested sequence (LoD tensor with level number is 2) as its input. or a nested sequence (LoD tensor with level number is 2) as its input.
The following examples explain how the operator works:
- Case1: - Case1:
If the axis is other than 0(here, axis is 1 and level is 1), If the axis is other than 0(here, axis is 1 and level is 1),
each input should have the same LoD information and the LoD each input should have the same LoD information and the LoD
...@@ -98,6 +99,7 @@ or a nested sequence (LoD tensor with level number is 2) as its input. ...@@ -98,6 +99,7 @@ or a nested sequence (LoD tensor with level number is 2) as its input.
LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4) LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4)
NOTE: The levels of all the inputs should be the same. NOTE: The levels of all the inputs should be the same.
)DOC"); )DOC");
} }
}; };
......
...@@ -105,10 +105,10 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -105,10 +105,10 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
"(LoDTensor) the input(X) is a LodTensor, which support " "(LoDTensor) the input(X) is a LodTensor, which supports "
"variable-time length input sequence. The underlying tensor in " "variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T, N), where, T is the " "this LoDTensor is a matrix with shape (T, N), where T is the "
"total time steps in this mini-batch, N is the input_hidden_size."); "total time steps in this mini-batch and N is the input_hidden_size.");
AddInput("PaddingData", AddInput("PaddingData",
"(Tensor, optional) the input(PaddingData) is an optional " "(Tensor, optional) the input(PaddingData) is an optional "
"parameter, and it is learnable. " "parameter, and it is learnable. "
...@@ -157,14 +157,16 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -157,14 +157,16 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
.GreaterThan(0); .GreaterThan(0);
AddComment(R"DOC( AddComment(R"DOC(
SequenceConvOp performs convolution operation on features of Sequence Conv Operator.
contextLength time-steps of each instance.
The convolution operation calculates the output based on the input, filter SequenceConvOp performs convolution operation on features of contextLength
and strides, paddings parameters. The size of each dimension of the time-steps of each instance. The convolution operation calculates the output
parameters is checked in the infer-shape. In order to ensure the equal based on the input, filter, strides and paddings parameters.
length of sequence before and after convolution, it is necessary to fill The size of each dimension of the parameters is checked during infer-shape.
the top and bottom of each sequence according to context_length, In order to ensure the equal length of sequence before and after convolution,
context_stride and context_start. it is necessary to fill the top and bottom of each sequence based on
context_length, context_stride and context_start.
)DOC"); )DOC");
} }
}; };
......
...@@ -27,6 +27,11 @@ class SequencePoolOp : public framework::OperatorWithKernel { ...@@ -27,6 +27,11 @@ class SequencePoolOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequencePoolOp should not be null."); "Output(Out) of SequencePoolOp should not be null.");
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
if (ctx->Attrs().Get<std::string>("pooltype") == "MAX") {
PADDLE_ENFORCE(ctx->HasOutput("MaxIndex"),
"Output(MaxIndex) of SequencePoolOp should not be null.");
ctx->SetOutputDim("MaxIndex", ctx->GetInputDim("X"));
}
} }
}; };
...@@ -35,43 +40,50 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -35,43 +40,50 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
SequencePoolOpMaker(framework::OpProto* proto, SequencePoolOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor), the variable-length input of SequencePoolOp"); AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp");
AddOutput("Out", AddOutput("Out",
"(Tensor), output of SequencePoolOp, which does not contain LoD " "(Tensor) The output of SequencePoolOp does not contain LoD "
"infomation."); "infomation.");
AddOutput("MaxIndex",
"(Tensor<int>) This tensor is used for the sequence max-pooling "
"to record the max indexes.")
.AsIntermediate();
AddAttr<std::string>( AddAttr<std::string>(
"pooltype", "pooltype",
"(int, default AVERAGE) the pooling pooltype of SequencePoolOp.") "(int, default AVERAGE) the pooling pooltype of SequencePoolOp.")
.SetDefault("AVERAGE") .SetDefault("AVERAGE")
.InEnum({"AVERAGE", "SUM", "SQRT", "LAST", "FIRST", "MAX"}); .InEnum({"AVERAGE", "SUM", "SQRT", "LAST", "FIRST", "MAX"});
AddComment(R"DOC( AddComment(R"DOC(
SequencePoolOp pools features of all time-steps of each instance. Sequence Pool Operator.
It supports six pooling pooltype: The SequencePoolOp pools features of all time-steps of each instance.
- AVERAGE: Out[i] = average_{for each instance in i-th sequence}{X[i]} It supports six pooling types:
- SUM: Out[i] = sum_{for each instance in i-th sequence}{X[i]} 1. AVERAGE: Out[i] = $$avg(X_i)$$
- SQRT: Out[i] = sum_{for each instance in i-th sequence}{X[i]} 2. SUM: Out[i] = $$\sum_jX_{ij}$$
/ sqrt(i-th sequence length) 3. SQRT: Out[i] = $$\frac{\sum_jX_{ij}}{\sqrt{len(X_i)}}$$
- LAST: Out[i] = last instance in i-th sequence X[i] 4. LAST: Out[i] = last instance in i-th sequence X[i]
- FIRST: Out[i] = first instance in i-th sequence X[i] 5. FIRST: Out[i] = first instance in i-th sequence X[i]
- MAX: Out[i] = max_{for each instance in i-th sequence}{X[i]} 6. MAX: Out[i] = $$max(X_i)$$
For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps: The following example explains how this works:
For a mini-batch of 3 variable-length sentences,
Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2. containing 2, 3, and 2 time-steps:
Besides, for the sake of simplicity, we assume M=1 and N=1,
and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
Besides, for the sake of simplicity, we assume M=1 and N=1,
Thus, Out is a [3,1,1] Tensor without LoD infomation. and the value of X = [[1, 3], [2, 4, 6], [5, 1]].
And for different pooltype, the value of Out is as follows:
Thus, Out is a [3,1,1] Tensor without LoD infomation.
- AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 And for different pooltype, the value of Out is as follows:
- SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1
- SQRT: [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2), - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
- SUM: [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1
- SQRT: [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2),
6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2) 6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2)
- MAX: [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1) - MAX: [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1)
- LAST: [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1) - LAST: [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)
- FIRST: [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1) - FIRST: [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)
)DOC"); )DOC");
} }
}; };
...@@ -93,6 +105,12 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { ...@@ -93,6 +105,12 @@ class SequencePoolGradOp : public framework::OperatorWithKernel {
} }
ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
} }
protected:
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
return framework::ToDataType(ctx.Input<Tensor>("X")->type());
}
}; };
} // namespace operators } // namespace operators
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/sequence_pooling.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -34,7 +35,7 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -34,7 +35,7 @@ class SequencePoolKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* out = context.Output<LoDTensor>("Out"); auto* out = context.Output<Tensor>("Out");
std::string pooltype = context.Attr<std::string>("pooltype"); std::string pooltype = context.Attr<std::string>("pooltype");
auto dims = in->dims(); auto dims = in->dims();
...@@ -53,6 +54,16 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -53,6 +54,16 @@ class SequencePoolKernel : public framework::OpKernel<T> {
auto lod_level_0 = lod[0]; auto lod_level_0 = lod[0];
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
if (pooltype == "MAX") {
math::MaxSeqPoolFunctor<Place, T> max_pool;
auto* index = context.Output<Tensor>("MaxIndex");
index->Resize({dims});
index->mutable_data<int>(context.GetPlace());
max_pool(context.device_context(), *in, out, index);
return;
}
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
Tensor in_t = in->Slice(static_cast<int>(lod_level_0[i]), Tensor in_t = in->Slice(static_cast<int>(lod_level_0[i]),
...@@ -69,8 +80,6 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -69,8 +80,6 @@ class SequencePoolKernel : public framework::OpKernel<T> {
} else if (pooltype == "SQRT") { } else if (pooltype == "SQRT") {
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) / out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
std::sqrt(static_cast<T>(h)); std::sqrt(static_cast<T>(h));
} else if (pooltype == "MAX") {
out_e.device(place) = in_e.maximum(Eigen::array<int, 1>({{0}}));
} else if (pooltype == "LAST") { } else if (pooltype == "LAST") {
out_e.device(place) = in_e.chip(h - 1, 0); out_e.device(place) = in_e.chip(h - 1, 0);
} else if (pooltype == "FIRST") { } else if (pooltype == "FIRST") {
...@@ -87,8 +96,8 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -87,8 +96,8 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* in = context.Input<LoDTensor>("X"); auto* in = context.Input<LoDTensor>("X");
auto* out_g = context.Input<Tensor>(framework::GradVarName("Out"));
auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X")); auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
std::string pooltype = context.Attr<std::string>("pooltype"); std::string pooltype = context.Attr<std::string>("pooltype");
auto dims = in->dims(); auto dims = in->dims();
...@@ -96,6 +105,14 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -96,6 +105,14 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
int64_t w = in->numel() / dims[0]; int64_t w = in->numel() / dims[0];
in_g->mutable_data<T>(context.GetPlace()); in_g->mutable_data<T>(context.GetPlace());
if (pooltype == "MAX") {
math::MaxSeqPoolGradFunctor<Place, T> max_pool_grad;
auto* index = context.Input<Tensor>("MaxIndex");
max_pool_grad(context.device_context(), *out_g, *index, in_g);
return;
}
if (pooltype == "LAST" || pooltype == "FIRST") { if (pooltype == "LAST" || pooltype == "FIRST") {
// set X@Grad be zero at first when pooltype is LAST/FIRST // set X@Grad be zero at first when pooltype is LAST/FIRST
math::SetConstant<Place, T> functor; math::SetConstant<Place, T> functor;
...@@ -118,20 +135,6 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -118,20 +135,6 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
} else if (pooltype == "SQRT") { } else if (pooltype == "SQRT") {
in_g_e.device(place) = in_g_e.device(place) =
(out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast); (out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
} else if (pooltype == "MAX") {
auto in_t =
in->Slice(static_cast<int>(lod[i]), static_cast<int>(lod[i + 1]));
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
in_t_map(in_t.data<T>(), h, w);
int row_id;
Eigen::array<int, 2> extents{{1, 1}};
for (int col_id = 0; col_id < w; col_id++) {
in_t_map.col(col_id).maxCoeff(&row_id);
Eigen::array<int, 2> in_offsets{{row_id, col_id}};
Eigen::array<int, 2> out_offsets{{0, col_id}};
in_g_e.slice(in_offsets, extents).device(place) =
out_g_e.slice(out_offsets, extents);
}
} else if (pooltype == "LAST") { } else if (pooltype == "LAST") {
in_g_e.chip(h - 1, 0).device(place) = out_g_e; in_g_e.chip(h - 1, 0).device(place) = out_g_e;
} else if (pooltype == "FIRST") { } else if (pooltype == "FIRST") {
......
...@@ -43,20 +43,24 @@ class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -43,20 +43,24 @@ class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
"(LoDTensor) 1-D or 2-D output LoDTensor with the 2-nd dimension " "(LoDTensor) 1-D or 2-D output LoDTensor with the 2-nd dimension "
"of length 1."); "of length 1.");
AddComment(R"DOC( AddComment(R"DOC(
SequenceSoftmaxOp computes softmax activation among all time-steps for each Sequence Softmax Operator.
SequenceSoftmaxOp computes the softmax activation among all time-steps for each
sequence. The dimension of each time-step should be 1. Thus, the shape of sequence. The dimension of each time-step should be 1. Thus, the shape of
input Tensor can be either [N, 1] or [N], where N is the sum of all sequences' input Tensor can be either [N, 1] or [N], where N is the sum of the length
lengths. of all sequences.
Equation: The algorithm works as follows:
for i-th sequence in a mini-batch: for i-th sequence in a mini-batch:
Out(X[lod[i]:lod[i+1]], :) = $$Out(X[lod[i]:lod[i+1]], :) =
exp(X[lod[i]:lod[i+1], :]) / sum(exp(X[lod[i]:lod[i+1], :])) \frac{\exp(X[lod[i]:lod[i+1], :])}
{\sum(\exp(X[lod[i]:lod[i+1], :]))}$$
For example, for a mini-batch of 3 sequences with variable-length, For example, for a mini-batch of 3 sequences with variable-length,
each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7], each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
then softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :] then softmax will be computed among X[0:2, :], X[2:5, :], X[5:7, :]
and N turns out to be 7. and N turns out to be 7.
)DOC"); )DOC");
} }
}; };
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册