diff --git a/CMakeLists.txt b/CMakeLists.txt index 264420ad830ed39b38f1918951d8d66c84fd5ee9..fd3582a1bca199d62d19550ffdd1efe9db520fa7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,7 +126,7 @@ include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any include(external/eigen) # download eigen3 -include(external/pybind11) # download pybind11 +include(external/pybind11) # download pybind11 include(external/nccl) include(cudnn) # set cudnn libraries, must before configure diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 0b38943952f7fb9052368fe95eb31dd7592d8a47..310450f7d009dc0cdae9c0079a96445af8ec8f95 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -79,9 +79,8 @@ if(NOT DEFINED IOS_ARCH) # FIXME(liuyiqun): support "armv7;armv7s;arm64" future set(IOS_ARCH "arm64") elseif(IOS_PLATFORM STREQUAL "SIMULATOR") - set(IOS_ARCH "i386;x86_64") - elseif(IOS_PLATFORM STREQUAL "WATCHOS") - set(IOS_ARCH armv7k) + # FIXME(liuyiqun): support "i386;x86_64" future + set(IOS_ARCH "x86_64") endif() endif() set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") diff --git a/cmake/external/nccl.cmake b/cmake/external/nccl.cmake index 57d2c0a352507afd01d1cbf2c7b23c00ff7ad81b..fc43766efafc3d3e16f2906ce7f9a3d692c8e4ff 100644 --- a/cmake/external/nccl.cmake +++ b/cmake/external/nccl.cmake @@ -1,3 +1,21 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(NOT WITH_GPU) + return() +endif() + include(ExternalProject) set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 143b57a954e4e6b2bf273535ebdf0fa8e3dab768..3f86e456cfbe55fe47e5b18e755e34829ebe9930 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake index 9391c285c7544669a5b1a078b7473d7a656c1bb4..4e87dc49d8956d1fa6dec777efc5a63c6b0f79a5 100644 --- a/cmake/external/pybind11.cmake +++ b/cmake/external/pybind11.cmake @@ -1,8 +1,26 @@ -INCLUDE(ExternalProject) +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -SET(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind) +if(NOT WITH_PYTHON) + return() +endif() + +include(ExternalProject) -INCLUDE_DIRECTORIES(${PYBIND_SOURCE_DIR}/src/extern_pybind/include) +set(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind) + +include_directories(${PYBIND_SOURCE_DIR}/src/extern_pybind/include) ExternalProject_Add( extern_pybind @@ -17,14 +35,12 @@ ExternalProject_Add( TEST_COMMAND "" ) -if (${CMAKE_VERSION} VERSION_LESS "3.3.0") +if(${CMAKE_VERSION} VERSION_LESS "3.3.0") set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c) - file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") + file(WRITE ${dummyfile} "const char * dummy_pybind = \"${dummyfile}\";") add_library(pybind STATIC ${dummyfile}) else() add_library(pybind INTERFACE) endif() add_dependencies(pybind extern_pybind) - -LIST(APPEND external_project_dependencies pybind) diff --git a/cmake/external/swig.cmake b/cmake/external/swig.cmake index ce088ae7eaa3355f2f9761e8c421da0d7ef89fa7..9db457c7b2d61228e5d5af6827c4cda11a20a463 100644 --- a/cmake/external/swig.cmake +++ b/cmake/external/swig.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index e2c9fe56f335ae5b627b4d8d4bb17e4a2a466677..a98e069b7cd1654ddd5868560d0905eab6d9c692 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/cmake/simd.cmake b/cmake/simd.cmake index 46035a908b588861607a25d3a21cf34b7b6fd4b8..53c2de332ea74b06d1bd6e5bb119cad6af27ed01 100644 --- a/cmake/simd.cmake +++ b/cmake/simd.cmake @@ -1,27 +1,28 @@ # This file is use to check all support level of AVX on your machine # so that PaddlePaddle can unleash the vectorization power of muticore. -INCLUDE(CheckCXXSourceRuns) -INCLUDE(CheckCXXSourceCompiles) +include(CheckCXXSourceRuns) +include(CheckCXXSourceCompiles) -IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") +if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(MMX_FLAG "-mmmx") set(SSE2_FLAG "-msse2") set(SSE3_FLAG "-msse3") - SET(AVX_FLAG "-mavx") - SET(AVX2_FLAG "-mavx2") -ELSEIF(MSVC) + set(AVX_FLAG "-mavx") + set(AVX2_FLAG "-mavx2") +elseif(MSVC) set(MMX_FLAG "/arch:MMX") set(SSE2_FLAG "/arch:SSE2") set(SSE3_FLAG "/arch:SSE3") SET(AVX_FLAG "/arch:AVX") SET(AVX2_FLAG "/arch:AVX2") -ENDIF() +endif() set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) # Check MMX set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG}) +set(MMX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) CHECK_CXX_SOURCE_RUNS(" #include int main() @@ -32,6 +33,7 @@ int main() # Check SSE2 set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG}) +set(SSE2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) CHECK_CXX_SOURCE_RUNS(" #include int main() @@ -42,6 +44,7 @@ int main() # Check SSE3 set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG}) +set(SSE3_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) CHECK_CXX_SOURCE_RUNS(" #include int main() @@ -55,6 +58,7 @@ int main() # Check AVX set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) +set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) CHECK_CXX_SOURCE_RUNS(" #include int main() @@ -67,6 +71,7 @@ int main() # Check AVX 2 set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) +set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) CHECK_CXX_SOURCE_RUNS(" #include int main() diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 30b144d849bec367cd0197b6082889e011193a9a..0d34dec8e908c5e61001500725187a2233797f46 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -145,7 +145,7 @@ PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以 Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。 -PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Nodebook。 +PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Notebook。 如果您想要更深入了解deep learning,PaddlePaddle Book一定是您最好的选择。 我们提供可以直接运行PaddlePaddle Book的Docker镜像,直接运行: diff --git a/doc/howto/usage/cmd_parameter/arguments_cn.md b/doc/howto/usage/cmd_parameter/arguments_cn.md index f7aa525054468670f59309ddf9206af55bb77869..2dea231ca5487978d59a4d0a570431722ed6b3bf 100644 --- a/doc/howto/usage/cmd_parameter/arguments_cn.md +++ b/doc/howto/usage/cmd_parameter/arguments_cn.md @@ -63,7 +63,7 @@ -训练dot_period +训练dot_period √√ diff --git a/doc/index_cn.rst b/doc/index_cn.rst index 9279bac7f4b2898c18979630a8d6dfcb2dba70e0..ada51c2d73263898b2c748437f8eb0f30b537073 100644 --- a/doc/index_cn.rst +++ b/doc/index_cn.rst @@ -8,3 +8,4 @@ PaddlePaddle 文档 howto/index_cn.rst api/index_cn.rst faq/index_cn.rst + mobile/index_cn.rst diff --git a/doc/index_en.rst b/doc/index_en.rst index 64684b8b9b27e245c6b32ea28809d3bbce22fab9..23b64b6cadf776d44c4d0aa5a550ffe24be13b18 100644 --- a/doc/index_en.rst +++ b/doc/index_en.rst @@ -7,3 +7,4 @@ PaddlePaddle Documentation getstarted/index_en.rst howto/index_en.rst api/index_en.rst + mobile/index_en.rst diff --git a/doc/howto/cross_compiling/cross_compiling_for_android_cn.md b/doc/mobile/cross_compiling_for_android_cn.md similarity index 95% rename from doc/howto/cross_compiling/cross_compiling_for_android_cn.md rename to doc/mobile/cross_compiling_for_android_cn.md index 58e4dd9c3fe43f963d00152aa4f456fadbb12bf3..882066f23714f7ab3bba9199b5fa5ff2325ce849 100644 --- a/doc/howto/cross_compiling/cross_compiling_for_android_cn.md +++ b/doc/mobile/cross_compiling_for_android_cn.md @@ -20,10 +20,32 @@ $ docker build -t username/paddle-android:dev . -f Dockerfile.android 构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。 Android的Docker开发镜像向用户提供两个可配置的参数: -| Argument | Optional Values | Default | -|-----------------|-------------------------|---------| -|`ANDROID_ABI` |`armeabi-v7a, arm64-v8a` | `armeabi-v7a` | -|`ANDROID_API` |`>= 21` | `21` | + ++ + + + + + + + + + + + + + + + + + + + + + + +
ArgumentOptional ValuesDefault
ANDROID_ABIarmeabi-v7a, arm64-v8aarmeabi-v7a
ANDROID_API>= 2121
- 编译`armeabi-v7a`,`Android API 21`的PaddlePaddle库 ```bash diff --git a/doc/howto/cross_compiling/cross_compiling_for_android.md b/doc/mobile/cross_compiling_for_android_en.md similarity index 93% rename from doc/howto/cross_compiling/cross_compiling_for_android.md rename to doc/mobile/cross_compiling_for_android_en.md index 161863e5c0a2c002af7d7611dad53c2c19148722..26858581fc1d77a9391520ac0dfd80fbd98f508c 100644 --- a/doc/howto/cross_compiling/cross_compiling_for_android.md +++ b/doc/mobile/cross_compiling_for_android_en.md @@ -26,10 +26,32 @@ $ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_A The Docker image accepts two arguments `ANDROID_ABI` and `ANDROID_API`: -| Argument | Optional Values | Default | -|-----------------|-------------------------|---------| -|`ANDROID_ABI` |`armeabi-v7a, arm64-v8a` | `armeabi-v7a` | -|`ANDROID_API` |`>= 21` | `21` | + ++ + + + + + + + + + + + + + + + + + + + + + + +
ArgumentOptional ValuesDefault
ANDROID_ABIarmeabi-v7a, arm64-v8aarmeabi-v7a
ANDROID_API>= 2121
The ARM-64 architecture (`arm64-v8a`) requires at least level 21 of Android API. diff --git a/doc/howto/cross_compiling/cross_compiling_for_ios_cn.md b/doc/mobile/cross_compiling_for_ios_cn.md similarity index 91% rename from doc/howto/cross_compiling/cross_compiling_for_ios_cn.md rename to doc/mobile/cross_compiling_for_ios_cn.md index 32c490d9aa4202e17aa1784a45a317c5307b98ea..cda636a67de712e072f4cc7ad859dda75211eaa8 100644 --- a/doc/howto/cross_compiling/cross_compiling_for_ios_cn.md +++ b/doc/mobile/cross_compiling_for_ios_cn.md @@ -27,10 +27,28 @@ iOS平台可选配置参数: - `SIMULATOR`,构建目标为`x86`架构的模拟器平台。 - `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示: - | IOS_PLATFORM | IOS_ARCH | - |--------------|----------------------| - | OS | armv7, armv7s, arm64 (默认) | - | SIMULATOR | i386, x86_64 (默认) | + + + + + + + + + + + + + + + + + + + + + +
IOS_PLATFORMIOS_ARCH
OSarmv7, armv7s, arm64 (默认)
SIMULATORi386, x86_64 (默认)
- `IOS_DEPLOYMENT_TARGET`,最小的iOS部署版本,默认值为`7.0`。 - `IOS_ENABLE_BITCODE`,是否使能[Bitcode](https://developer.apple.com/library/content/documentation/IDEs/Conceptual/AppDistributionGuide/AppThinning/AppThinning.html#//apple_ref/doc/uid/TP40012582-CH35-SW3),可设置`ON/OFF`,默认值为`ON`。 diff --git a/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md b/doc/mobile/cross_compiling_for_raspberry_cn.md similarity index 100% rename from doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md rename to doc/mobile/cross_compiling_for_raspberry_cn.md diff --git a/doc/howto/cross_compiling/cross_compiling_for_raspberry_en.md b/doc/mobile/cross_compiling_for_raspberry_en.md similarity index 100% rename from doc/howto/cross_compiling/cross_compiling_for_raspberry_en.md rename to doc/mobile/cross_compiling_for_raspberry_en.md diff --git a/doc/mobile/index_cn.rst b/doc/mobile/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..1d99666e58b7043b85b0203ee0dfcd1957710161 --- /dev/null +++ b/doc/mobile/index_cn.rst @@ -0,0 +1,9 @@ +MOBILE +====== + +.. toctree:: + :maxdepth: 1 + + cross_compiling_for_android_cn.md + cross_compiling_for_ios_cn.md + cross_compiling_for_raspberry_cn.md diff --git a/doc/mobile/index_en.rst b/doc/mobile/index_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..3c08d736717cfe8d5fdf449dc58015086befbe60 --- /dev/null +++ b/doc/mobile/index_en.rst @@ -0,0 +1,8 @@ +MOBILE +====== + +.. toctree:: + :maxdepth: 1 + + cross_compiling_for_android_en.md + cross_compiling_for_raspberry_en.md diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index e767856d5012fd205f6b57f9721d0cbca8dc46ed..d267b14657be2a773d1dacfd9ac3767cddc47415 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -29,32 +29,32 @@ add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER} add_dependencies(paddle_capi paddle_proto) # TODO: paddle_capi_whole will be removed. +set(PADDLE_CAPI_LAYERS_LIBS + paddle_function + paddle_gserver) if(MOBILE_INFERENCE) - set(PADDLE_CAPI_INFER_LIBS - paddle_utils - paddle_parameter - paddle_math - paddle_cuda - paddle_function - paddle_gserver - paddle_proto) + set(PADDLE_CAPI_ENGINE_LIBS + paddle_utils + paddle_parameter + paddle_math + paddle_cuda + paddle_proto) else() - set(PADDLE_CAPI_INFER_LIBS - paddle_utils - paddle_parameter - paddle_math - paddle_cuda - paddle_function - paddle_gserver - paddle_proto - paddle_pserver - paddle_network) + set(PADDLE_CAPI_ENGINE_LIBS + paddle_utils + paddle_parameter + paddle_math + paddle_cuda + paddle_proto + paddle_pserver + paddle_network) endif() +set(PADDLE_CAPI_INFER_LIBS ${PADDLE_CAPI_LAYERS_LIBS} ${PADDLE_CAPI_ENGINE_LIBS}) cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS}) # Link the static library for inference -cc_library(paddle_capi_engine DEPS paddle_capi paddle_utils paddle_parameter paddle_math paddle_cuda paddle_proto) -cc_library(paddle_capi_layers DEPS paddle_function paddle_gserver) +cc_library(paddle_capi_engine DEPS paddle_capi ${PADDLE_CAPI_ENGINE_LIBS}) +cc_library(paddle_capi_layers DEPS ${PADDLE_CAPI_LAYERS_LIBS}) # Link the shared library for inference if(NOT IOS) diff --git a/paddle/framework/lod_tensor.md b/paddle/framework/lod_tensor.md index d147f1c4257eec14664301edab8d1fe2f128d2b0..10a8a7867fbf072f585fe3bfb1243e4e6bef4ec8 100644 --- a/paddle/framework/lod_tensor.md +++ b/paddle/framework/lod_tensor.md @@ -140,19 +140,9 @@ Similarly, the lengths in the top level LoD are transformed into offsets of elements/words as follows: ``` -0 9 10 15 - = = = - 3+2+4 1+9 2+3+10 -``` - -so we can tell that the first article is from word 0 to word 9, and the second article is from word 9 to word 10. - -The complete offset representation is as follows: - -``` -0 9 10 15 -0 3 5 9 10 12 15 - ||| || |||| | || ||| +0 3 4 6 + = = = + 3 3+1 4+2 ``` ## Slicing of LoD Tensors diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index c96166f35d1425218a4a74f50dc5ed542d677b68..e7cba9e702ce0f96a9680169f0593130df2fd096 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -67,8 +67,11 @@ class CompileTimeInferShapeContext : public InferShapeContext { out); in_var->SetLoDLevel(out_var->GetLodLevel()); } + bool IsRuntime() const override; + + protected: + VarDesc::VarType GetVarType(const std::string &name) const override; - private: DDim GetDim(const std::string &name) const override; void SetDim(const std::string &name, const DDim &dim) override; @@ -349,6 +352,9 @@ void OpDescBind::InferVarType(BlockDescBind *block) const { info.infer_var_type_(*this, block); } else { // all output type is LoDTensor by default + VLOG(10) << this->Type() + << " has not registered InferVarType. Set output variables to " + "LOD_TENSOR"; for (auto &out_pair : this->outputs_) { for (auto &out_var_name : out_pair.second) { block->Var(out_var_name)->SetType(VarDesc::LOD_TENSOR); @@ -448,6 +454,12 @@ void CompileTimeInferShapeContext::SetDim(const std::string &name, const DDim &dim) { block_.FindVarRecursive(name)->SetShape(framework::vectorize(dim)); } +bool CompileTimeInferShapeContext::IsRuntime() const { return false; } + +VarDesc::VarType CompileTimeInferShapeContext::GetVarType( + const std::string &name) const { + return block_.FindVarRecursive(name)->GetType(); +} } // namespace framework } // namespace paddle diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 9295d36c2b2e66130ad273ebd3a40de739efeea7..22a7d9728a05950d66a1acd23d6fb18263f4ff6b 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -15,7 +15,9 @@ limitations under the License. */ #include "paddle/framework/operator.h" #include #include +#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/shape_inference.h" +#include "paddle/framework/var_type.h" namespace paddle { namespace framework { @@ -365,7 +367,9 @@ class RuntimeInferShapeContext : public InferShapeContext { out_tensor->set_lod(in_tensor.lod()); } - private: + bool IsRuntime() const override { return true; } + + protected: DDim GetDim(const std::string& name) const override { Variable* var = scope_.FindVar(name); if (var->IsType()) { @@ -388,6 +392,12 @@ class RuntimeInferShapeContext : public InferShapeContext { } } + VarDesc::VarType GetVarType(const std::string& name) const override { + auto* var = scope_.FindVar(name); + return ToVarType(var->Type()); + } + + private: const OperatorBase& op_; const Scope& scope_; }; diff --git a/paddle/framework/shape_inference.cc b/paddle/framework/shape_inference.cc index 8169df8e4629e2d02d3dabcd6a8a102ad0077a81..0af41b164f5894db17b2f86d4eba371cf05e3b41 100644 --- a/paddle/framework/shape_inference.cc +++ b/paddle/framework/shape_inference.cc @@ -46,6 +46,23 @@ void InferShapeContext::SetDims(const std::vector &names, SetDim(names[i], dims[i]); } } +std::vector InferShapeContext::GetInputsVarType( + const std::string &name) const { + return GetVarTypes(Inputs(name)); +} +std::vector InferShapeContext::GetOutputsVarType( + const std::string &name) const { + return GetVarTypes(Outputs(name)); +} +std::vector InferShapeContext::GetVarTypes( + const std::vector &names) const { + std::vector retv; + retv.resize(names.size()); + std::transform(names.begin(), names.end(), retv.begin(), + std::bind(std::mem_fn(&InferShapeContext::GetVarType), this, + std::placeholders::_1)); + return retv; +} } // namespace framework } // namespace paddle diff --git a/paddle/framework/shape_inference.h b/paddle/framework/shape_inference.h index 6f19900ef1a3e88fe78d457a03c344ea586ab551..7d36ead2ca85328c7843b3b5d423cf8e921d1c93 100644 --- a/paddle/framework/shape_inference.h +++ b/paddle/framework/shape_inference.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/framework/attribute.h" #include "paddle/framework/ddim.h" +#include "paddle/framework/framework.pb.h" namespace paddle { namespace framework { @@ -26,6 +27,10 @@ class InferShapeContext { virtual bool HasInput(const std::string &name) const = 0; virtual bool HasOutput(const std::string &name) const = 0; + std::vector GetInputsVarType(const std::string &name) const; + std::vector GetOutputsVarType( + const std::string &name) const; + virtual bool HasInputs(const std::string &name) const = 0; virtual bool HasOutputs(const std::string &name) const = 0; @@ -46,6 +51,8 @@ class InferShapeContext { virtual void ShareLoD(const std::string &in, const std::string &out, size_t i = 0, size_t j = 0) const = 0; + virtual bool IsRuntime() const = 0; + protected: virtual framework::DDim GetDim(const std::string &name) const = 0; virtual void SetDim(const std::string &name, const framework::DDim &dim) = 0; @@ -55,6 +62,11 @@ class InferShapeContext { void SetDims(const std::vector &names, const std::vector &dims); + + std::vector GetVarTypes( + const std::vector &names) const; + + virtual VarDesc::VarType GetVarType(const std::string &name) const = 0; }; } // namespace framework diff --git a/paddle/framework/var_type.h b/paddle/framework/var_type.h new file mode 100644 index 0000000000000000000000000000000000000000..d060196bb2c478b776851288cb71a1880d60660d --- /dev/null +++ b/paddle/framework/var_type.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_rank_table.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/lod_tensor_array.h" + +namespace paddle { +namespace framework { +inline VarDesc::VarType ToVarType(std::type_index type) { + if (type.hash_code() == typeid(LoDTensor).hash_code()) { + return VarDesc_VarType_LOD_TENSOR; + } else if (type.hash_code() == typeid(LoDRankTable).hash_code()) { + return VarDesc_VarType_LOD_RANK_TABLE; + } else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) { + return VarDesc_VarType_LOD_TENSOR_ARRAY; + } else { + PADDLE_THROW("ToVarType:Unsupported type %s", type.name()); + } +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index cde5ec2413ad01a0396e19fa617688af0eafbc75..e5a94759f9230ab4ce9d2cc24849a2debb8a5e2f 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -48,6 +48,11 @@ class Variable { void Clear() { holder_.reset(); } + std::type_index Type() const { + PADDLE_ENFORCE(holder_ != nullptr, "Must hold memory"); + return holder_->Type(); + } + private: struct Placeholder { virtual ~Placeholder() {} diff --git a/paddle/gserver/layers/ConvBaseProjection.cpp b/paddle/gserver/layers/ConvBaseProjection.cpp index 08f36c516cfdadd42e9333c1c5a7a247df1f263e..19efed7b52ee07a5c509d069c286ccc3b21602f4 100644 --- a/paddle/gserver/layers/ConvBaseProjection.cpp +++ b/paddle/gserver/layers/ConvBaseProjection.cpp @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { -ThreadLocalD> ConvBaseProjection::convMem_; +ThreadLocalD> ConvBaseProjection::convMem_; ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config, ParameterPtr parameter, @@ -175,18 +175,18 @@ void ConvBaseProjection::reshape(int batchSize) { } void *ConvBaseProjection::getSpaceBytes(size_t size) { - std::vector &convMem = *convMem_; + std::vector &convMem = *convMem_; if (convMem.empty()) { int numDevices = hl_get_device_count(); convMem.resize(numDevices); } int devId = hl_get_device(); - MemoryHandle **localMem = &(convMem[devId]); - if (NULL == *localMem || size > (*localMem)->getAllocSize()) { - *localMem = new GpuMemoryHandle(size); + MemoryHandlePtr localMem = convMem[devId]; + if (NULL == localMem || size > localMem->getAllocSize()) { + localMem = std::make_shared(size); } - return (*localMem)->getBuf(); + return localMem->getBuf(); } ConvBaseProjection::~ConvBaseProjection() { diff --git a/paddle/gserver/layers/ConvBaseProjection.h b/paddle/gserver/layers/ConvBaseProjection.h index ebdb57845bb36ac607b1e4c8e02f9d20b6e82a36..bb7ffa627b745f45b0f210cdb58ef87d6990af73 100644 --- a/paddle/gserver/layers/ConvBaseProjection.h +++ b/paddle/gserver/layers/ConvBaseProjection.h @@ -105,7 +105,7 @@ protected: bool bias_; std::unique_ptr weight_; - static ThreadLocalD> convMem_; + static ThreadLocalD> convMem_; }; } // namespace paddle diff --git a/paddle/gserver/layers/SubSequenceLayer.cpp b/paddle/gserver/layers/SubSequenceLayer.cpp index 19b7ad1869af98e6313fe85a40203fd1e84f31d6..00d8ce017aa0121217688a1afc1fe31b4c3619ec 100644 --- a/paddle/gserver/layers/SubSequenceLayer.cpp +++ b/paddle/gserver/layers/SubSequenceLayer.cpp @@ -98,8 +98,19 @@ void SubSequenceLayer::forward(PassType passType) { CHECK_EQ(numSequences2, numSequences3); MatrixPtr inputValue = input.value; - IVectorPtr offsetValue = offsetSeq.ids; - IVectorPtr sizeValue = sizeSeq.ids; + IVectorPtr offsetValue; + IVectorPtr sizeValue; + + if (useGpu_) { + // copy to cpu + IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false); + IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false); + offsetValue->copyFrom(*offsetSeq.ids); + sizeValue->copyFrom(*sizeSeq.ids); + } else { + offsetValue = offsetSeq.ids; + sizeValue = sizeSeq.ids; + } CHECK_EQ(offsetValue->getSize(), numSequences1); CHECK_EQ(sizeValue->getSize(), numSequences1); @@ -176,8 +187,21 @@ void SubSequenceLayer::backward(const UpdateCallback& callback) { size_t numSequences1 = startPositions1->getSize() - 1; const int* starts1 = startPositions1->getData(); - IVectorPtr offsetValue = getInput(1).ids; - IVectorPtr sizeValue = getInput(2).ids; + const Argument& offsetSeq = getInput(1); + const Argument& sizeSeq = getInput(2); + IVectorPtr offsetValue; + IVectorPtr sizeValue; + + if (useGpu_) { + // copy to cpu + IVector::resizeOrCreate(offsetValue, offsetSeq.ids->getSize(), false); + IVector::resizeOrCreate(sizeValue, sizeSeq.ids->getSize(), false); + offsetValue->copyFrom(*offsetSeq.ids); + sizeValue->copyFrom(*sizeSeq.ids); + } else { + offsetValue = offsetSeq.ids; + sizeValue = sizeSeq.ids; + } int* offsets = offsetValue->getData(); int* sizes = sizeValue->getData(); diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 13ebb0ad65f62b5961f3280bf6b178be5c8fe30b..f22f86468db7be6c87193f85c3f87aef4a9d3c68 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -69,6 +69,20 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") endif() + # conv_op contains several operators + if ("${TARGET}" STREQUAL "conv_op") + set(pybind_flag 1) + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(conv2d);\n") + endif() + + # conv_transpose_op contains several operators + if ("${TARGET}" STREQUAL "conv_transpose_op") + set(pybind_flag 1) + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(conv2d_transpose);\n") + endif() + # pool_cudnn_op contains several operators if ("${TARGET}" STREQUAL "pool_cudnn_op") set(pybind_flag 1) @@ -96,7 +110,7 @@ function(op_library TARGET) # It's enough to just adding one operator to pybind file(APPEND ${pybind_file} "USE_GPU_ONLY_OP(ncclAllReduce);\n") endif() - + # reduce_op contains several operators if ("${TARGET}" STREQUAL "reduce_op") set(pybind_flag 1) @@ -104,6 +118,11 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(reduce_sum);\n") endif() + if ("${TARGET}" STREQUAL "tensor_array_read_write_op") + set(pybind_flag 1) + file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(read_from_array);\nUSE_NO_KERNEL_OP(write_to_array);\n") + endif() + # pybind USE_NO_KERNEL_OP # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel file(READ ${TARGET}.cc TARGET_CONTENT) @@ -139,26 +158,38 @@ set(DEPS_OPS sum_op pool_op pool_with_index_op + conv_op + lstm_op + conv_transpose_op nccl_op sequence_conv_op + sequence_pool_op lod_rank_table_op - lstm_op) + lstm_op + tensor_array_read_write_op + gru_op) op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) +op_library(conv_op DEPS vol2col) op_library(sum_op DEPS net_op selected_rows_functor) op_library(pool_op DEPS pooling) op_library(pool_with_index_op DEPS pooling) op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table) +op_library(tensor_array_read_write_op SRCS tensor_array_read_write_op.cc) if(WITH_GPU) op_library(nccl_op DEPS nccl_common) endif() op_library(sequence_conv_op DEPS context_project) +op_library(sequence_pool_op DEPS sequence_pooling) op_library(lstm_op DEPS sequence2batch lstm_compute) +op_library(conv_transpose_op DEPS vol2col) +op_library(gru_op DEPS sequence2batch gru_compute) op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc DEPS net_op tensor_array) op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) + list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) op_library(${src}) diff --git a/paddle/operators/conv2d_op.cc b/paddle/operators/conv2d_op.cc deleted file mode 100644 index b47cff180dc6f019c1730f68841fae27e95693ee..0000000000000000000000000000000000000000 --- a/paddle/operators/conv2d_op.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/conv2d_op.h" - -namespace paddle { -namespace operators { - -void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of Conv2DOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Filter"), - "Input(Filter) of Conv2DOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Output"), - "Output(Output) of Conv2DOp should not be null."); - - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - std::vector strides = ctx->Attrs().Get>("strides"); - std::vector paddings = ctx->Attrs().Get>("paddings"); - int groups = ctx->Attrs().Get("groups"); - int input_channels = in_dims[1]; - int output_channels = filter_dims[0]; - - PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D."); - PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D."); - PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, - "The number of input channels should be equal to filter " - "channels * groups."); - PADDLE_ENFORCE_EQ( - output_channels % groups, 0, - "The number of output channels should be divided by groups."); - - auto output_height = - OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]); - auto output_width = - OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]); - ctx->SetOutputDim("Output", - {in_dims[0], filter_dims[0], output_height, output_width}); -} - -Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "Input", - "The input tensor of convolution operator. " - "The format of input tensor is NCHW, where N is batch size, C is the " - "number of channels, H is the height of the image, " - "and W is the width of the image."); - AddInput("Filter", - "The filter tensor of convolution operator. " - "The format of the filter tensor is MCHW, where M is the number of " - "output image channels, C is the number of input image channels, " - "H is the height of the filter, and W is the width of the filter. " - "If the groups attribute is greater than 1, C equals the number of " - "input image channels divided by the groups."); - AddOutput("Output", - "The output tensor of convolution operator. " - "The format of output tensor is also NCHW."); - AddAttr>("strides", "strides of convolution operator.") - .SetDefault({1, 1}); - AddAttr>("paddings", "paddings of convolution operator.") - .SetDefault({0, 0}); - AddAttr( - "groups", - "Group size of convolution operator. " - "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " - "when group=2, the first half of the filters is only connected to the " - "first half of the input channels, while the second half of the filters " - "is only connected to the second half of the input channels.") - .SetDefault(1); - AddComment(R"DOC( -Convolution Operator. - -The convolution operation calculates the output based on the input, filter, -strides, paddings, and groups parameters. The size of each dimension of the -parameters is checked in the infer-shape method. - -)DOC"); -} - -void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const { - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - if (ctx->HasOutput(framework::GradVarName("Input"))) { - ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); - } - if (ctx->HasOutput(framework::GradVarName("Filter"))) { - ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); - } -} - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP(conv2d, ops::Conv2DOp, ops::Conv2DOpMaker, conv2d_grad, - ops::Conv2DOpGrad); - -REGISTER_OP_CPU_KERNEL( - conv2d, ops::GemmConv2DKernel); -REGISTER_OP_CPU_KERNEL( - conv2d_grad, ops::GemmConvGrad2DKernel); diff --git a/paddle/operators/conv2d_transpose_cudnn_op.cc b/paddle/operators/conv2d_transpose_cudnn_op.cc index 8ce94e0f04f14e1eae7e7d01280601cc72dea8c4..fce1357ce5af5f11ccc5941690431393301e6725 100644 --- a/paddle/operators/conv2d_transpose_cudnn_op.cc +++ b/paddle/operators/conv2d_transpose_cudnn_op.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/conv2d_transpose_op.h" +#include "paddle/operators/conv_transpose_op.h" namespace paddle { namespace operators { @@ -38,13 +38,13 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(conv2d_transpose_cudnn, ops::Conv2DTransposeOp, +REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp, ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad, - ops::Conv2DTransposeOpGrad); + ops::ConvTransposeOpGrad); REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn, - ops::GemmConv2DTransposeKernel); + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn_grad, - ops::GemmConv2DTransposeGradKernel); + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv2d_transpose_cudnn_op.cu b/paddle/operators/conv2d_transpose_cudnn_op.cu index 61fcfb3bd8fa57f2c45fbf3a980dbe41041cff18..1aa8d110759a7d99c26cf7baaf6d4ce4b92975b9 100644 --- a/paddle/operators/conv2d_transpose_cudnn_op.cu +++ b/paddle/operators/conv2d_transpose_cudnn_op.cu @@ -15,7 +15,7 @@ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" #include "paddle/memory/memory.h" -#include "paddle/operators/conv2d_transpose_op.h" +#include "paddle/operators/conv_transpose_op.h" #include "paddle/platform/assert.h" #include "paddle/platform/cudnn_helper.h" diff --git a/paddle/operators/conv2d_transpose_op.cc b/paddle/operators/conv2d_transpose_op.cc deleted file mode 100644 index 8f5d18cddf45d1129040454adbc95a511ccf0583..0000000000000000000000000000000000000000 --- a/paddle/operators/conv2d_transpose_op.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/conv2d_transpose_op.h" - -namespace paddle { -namespace operators { - -void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of Conv2DTransposeOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Filter"), - "Input(Filter) of Conv2DTransposeOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Output"), - "Output(Output) of Conv2DTransposeOp should not be null."); - - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - std::vector strides = ctx->Attrs().Get>("strides"); - std::vector paddings = ctx->Attrs().Get>("paddings"); - - for (size_t i = 0; i < paddings.size(); ++i) { - PADDLE_ENFORCE_EQ(paddings[i], 0, - "No Padding allowed in conv transpose op."); - } - - PADDLE_ENFORCE_EQ(in_dims.size(), 4, - "Conv2DTransposeOp input should be 4-D tensor."); - PADDLE_ENFORCE_EQ(filter_dims.size(), 4, - "Conv2DTransposeOp filter should be 4-D tensor."); - PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0], - "input and kernel input dimension should be equal."); - - auto output_height = (in_dims[2] - 1) * strides[0] + filter_dims[2]; - auto output_width = (in_dims[3] - 1) * strides[1] + filter_dims[3]; - ctx->SetOutputDim("Output", - {in_dims[0], filter_dims[1], output_height, output_width}); -} - -Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( - framework::OpProto* proto, framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "Input", - "(Tensor) The input tensor of convolution transpose operator. " - "The format of input tensor is NCHW, where N is batch size, C is the " - "number of input channels, H is the height of the image, and " - "W is the width of the image."); - AddInput("Filter", - "(Tensor) The filter tensor of convolution transpose operator." - "The format of the filter tensor is CMHW, where C is the number of " - "output image channels, M is the number of input image channels, " - "H is the height of the filter, and W is the width of the filter. " - "We enforce groups number == 1 and padding == 0 in " - "the convolution transpose scenario."); - AddOutput("Output", - "(Tensor) The output tensor of convolution transpose operator." - "The format of output tensor is also NCHW."); - AddAttr>("strides", - "strides of convolution transpose operator.") - .SetDefault({1, 1}); - AddAttr>("paddings", - "paddings of convolution transpose operator.") - .SetDefault({0, 0}); - AddComment(R"DOC( -Convolution Transpose Operator. - -The convolution transpose operation calculates the output based on the input, -filter, strides, paddings, and groups parameters. The size of each dimension -of the parameters is checked in the infer-shape method. - -)DOC"); -} - -void Conv2DTransposeOpGrad::InferShape( - framework::InferShapeContext* ctx) const { - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - if (ctx->HasOutput(framework::GradVarName("Input"))) { - ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); - } - if (ctx->HasOutput(framework::GradVarName("Filter"))) { - ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); - } -} - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP(conv2d_transpose, ops::Conv2DTransposeOp, - ops::Conv2DTransposeOpMaker, conv2d_transpose_grad, - ops::Conv2DTransposeOpGrad); - -REGISTER_OP_CPU_KERNEL( - conv2d_transpose, - ops::GemmConv2DTransposeKernel); -REGISTER_OP_CPU_KERNEL( - conv2d_transpose_grad, - ops::GemmConv2DTransposeGradKernel); diff --git a/paddle/operators/conv2d_transpose_op.h b/paddle/operators/conv2d_transpose_op.h deleted file mode 100644 index cab7788227690621a0e5b744197b86c515bbef72..0000000000000000000000000000000000000000 --- a/paddle/operators/conv2d_transpose_op.h +++ /dev/null @@ -1,254 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" -#include "paddle/operators/math/im2col.h" -#include "paddle/operators/math/math_function.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; -using DDim = framework::DDim; - -// Define Op classes in .h file so that other conv transpose -// operator implementations can reuse the code. -class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { - public: - Conv2DTransposeOpMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker); -}; - -class Conv2DTransposeOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override; -}; - -class Conv2DTransposeOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override; -}; - -template -class GemmConv2DTransposeKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const Tensor* input = context.Input("Input"); - // The filter will be reshaped, so it should not be constant pointer - Tensor filter = *context.Input("Filter"); - - Tensor* output = context.Output("Output"); - - std::vector strides = context.Attr>("strides"); - - // TODO(Zhuoyuan): Paddings can be added in future. - // groups will alway be disabled in conv2d_transpose. - - const int batch_size = input->dims()[0]; - const int m = input->dims()[1]; - const int h = input->dims()[2]; - const int w = input->dims()[3]; - - const int k_h = filter.dims()[2]; - const int k_w = filter.dims()[3]; - - const int c = output->dims()[1]; // output channels - const int o_h = output->dims()[2]; - const int o_w = output->dims()[3]; - - paddle::operators::math::Col2ImFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - col2im; - - // use col_shape in the im2col and col2im calculation - DDim col_shape = {c, k_h, k_w, h, w}; - - // use col_matrix_shape in the gemm calculation - DDim col_matrix_shape = {c * k_h * k_w, h * w}; - - Tensor col; - col.mutable_data(col_shape, context.GetPlace()); - // col_matrix shares the same piece of data with col, - // but will be reshaped into a two-dimensional matrix shape - // to call the matrix multiplication interface. - Tensor col_matrix; - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - - DDim output_shape = {c, o_h, o_w}; - DDim input_matrix_shape = {m, h * w}; - - DDim filter_matrix_shape = {m, c * k_h * k_w}; - filter.Resize(filter_matrix_shape); - - // convolution transpose: gemm + col2im (similar to conv-backward on input) - - output->mutable_data(context.GetPlace()); - auto t = framework::EigenVector::Flatten(*output); - t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); - - for (int i = 0; i < batch_size; i++) { - // batch with size (M, h * w) - Tensor input_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); - // filter size: (M, c * k_h * k_w) - - // output size: (c, o_h, o_w) - Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape); - - // col_matrix = filter * input_batch - // of shape (c * k_h * k_w, h * w) - math::matmul(context.device_context(), filter, true, - input_batch, false, T(1.0), &col_matrix, T(0.0)); - col2im(context.device_context(), output_batch, col, strides[0], - strides[1], 0, 0, 0, 0); - } - } -}; - -template -class GemmConv2DTransposeGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - const Tensor* input = context.Input("Input"); - const Tensor* output_grad = - context.Input(framework::GradVarName("Output")); - - // For filter, we do not use const pointer b/c we will do reshape, - // but we should avoid modifying its value. - Tensor filter = *context.Input("Filter"); - - Tensor* input_grad = - context.Output(framework::GradVarName("Input")); - Tensor* filter_grad = - context.Output(framework::GradVarName("Filter")); - - std::vector strides = context.Attr>("strides"); - // Actually, no paddings and groups allowed in conv transpose. - std::vector paddings = context.Attr>("paddings"); - - const int batch_size = input->dims()[0]; - const int m = input->dims()[1]; - const int h = input->dims()[2]; - const int w = input->dims()[3]; - - const int k_h = filter.dims()[2]; - const int k_w = filter.dims()[3]; - - const int c = output_grad->dims()[1]; // output channels - const int o_h = output_grad->dims()[2]; - const int o_w = output_grad->dims()[3]; - - // Only im2col functor required for bp to get to the right shape - paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - im2col; - - // use col_shape in the im2col and col2im calculation - DDim col_shape = {c, k_h, k_w, h, w}; - - // use col_matrix_shape in the gemm calculation - DDim col_matrix_shape_f = {c * h * w, k_h * k_w}; - - Tensor col; - col.mutable_data(col_shape, context.GetPlace()); - // col_matrix shares the same piece of data with col, - // but will be reshaped into a two-dimensional matrix shape - // to call the matrix multiplication interface. - - DDim output_shape = {c, o_h, o_w}; - DDim input_matrix_shape = {m, h * w}; - - DDim filter_matrix_shape = {m, c * k_h * k_w}; - filter.Resize(filter_matrix_shape); - - // convolution transpose grad on input: - // im2col + gemm (similar to conv-forward) - // input need to compute gradient - if (input_grad) { - Tensor col_matrix; - col_matrix.ShareDataWith(col); - DDim col_matrix_shape = {c * k_h * k_w, h * w}; - col_matrix.Resize(col_matrix_shape); - - input_grad->mutable_data(context.GetPlace()); - auto t = framework::EigenVector::Flatten(*input_grad); - t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); - - for (int i = 0; i < batch_size; i++) { - // batch with size (c, o_h * o_w) - Tensor output_grad_batch = - output_grad->Slice(i, i + 1).Resize(output_shape); - // filter of size (m, c * k_h * k_w) - - // batch with size (m, h, w) - Tensor input_grad_batch = - input_grad->Slice(i, i + 1).Resize(input_matrix_shape); - - // im2col: dy from (c, o_h, o_w) -> (c * k_h * k_w, h * w) - im2col(context.device_context(), output_grad_batch, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], paddings[1]); - - // gemm: dx = filter * dy - // (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, c, h) - math::matmul(context.device_context(), filter, false, - col_matrix, false, T(1.0), &input_grad_batch, - T(0.0)); - } - } - - // filter gradient required - if (filter_grad) { - Tensor col_matrix_f; - col_matrix_f.ShareDataWith(col); - DDim col_matrix_shape_f = {c * h * w, k_h * k_w}; - col_matrix_f.Resize(col_matrix_shape_f); - - filter_grad->mutable_data(context.GetPlace()); - Tensor filter_grad_ = *filter_grad; - filter_grad_.Resize(filter_matrix_shape); - auto t = framework::EigenVector::Flatten(filter_grad_); - t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); - - for (int i = 0; i < batch_size; ++i) { - // batch with size (c, o_h, o_w) - Tensor output_grad_batch = - output_grad->Slice(i, i + 1).Resize(output_shape); - // input batch - Tensor in_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); - - // im2col: (c * h * w, k_h * k_w) - im2col(context.device_context(), output_grad_batch, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], paddings[1]); - - // gemm: d_filter = x * y_grad^T - // (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, c, h) - math::matmul(context.device_context(), in_batch, false, - col_matrix_f, true, T(1.0), &filter_grad_, - T(1.0)); - } - } - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 62190ebc217be49f549cedfb2de24b9d138fff48..97f31bf22d7072d89bd043045045dcb5bb5518b8 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/conv2d_op.h" +#include "paddle/operators/conv_op.h" namespace paddle { namespace operators { @@ -38,10 +38,11 @@ class CudnnConvOpMaker : public Conv2DOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad, - ops::Conv2DOpGrad); -REGISTER_OP_CPU_KERNEL( - conv_cudnn, ops::GemmConv2DKernel); +REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad, + ops::ConvOpGrad); + +REGISTER_OP_CPU_KERNEL(conv_cudnn, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( conv_cudnn_grad, - ops::GemmConvGrad2DKernel); + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_cudnn_op.cu b/paddle/operators/conv_cudnn_op.cu index e2eb157f40c0039f87c41d28f8732cd4901a046d..d115850e2b651e20d82ad6028648c6a88439c9d7 100644 --- a/paddle/operators/conv_cudnn_op.cu +++ b/paddle/operators/conv_cudnn_op.cu @@ -15,7 +15,7 @@ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" #include "paddle/memory/memory.h" -#include "paddle/operators/conv2d_op.h" +#include "paddle/operators/conv_op.h" #include "paddle/platform/assert.h" #include "paddle/platform/cudnn_helper.h" diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..a6f65f10165929316f971d195f3790fd9e7ed376 --- /dev/null +++ b/paddle/operators/conv_op.cc @@ -0,0 +1,209 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/conv_op.h" + +namespace paddle { +namespace operators { + +void ConvOp::InferShape(framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(Input) of ConvOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Filter"), + "Input(Filter) of ConvOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Output"), + "Output(Output) of ConvOp should not be null."); + + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + std::vector strides = ctx->Attrs().Get>("strides"); + std::vector paddings = ctx->Attrs().Get>("paddings"); + int groups = ctx->Attrs().Get("groups"); + int input_channels = in_dims[1]; + int output_channels = filter_dims[0]; + + PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, + "Conv intput should be 4-D or 5-D tensor."); + PADDLE_ENFORCE_EQ( + in_dims.size(), filter_dims.size(), + "Conv input dimension and filter dimension should be the same."); + PADDLE_ENFORCE( + in_dims.size() - strides.size() == 2U, + "Conv input dimension and strides dimension should be consistent."); + PADDLE_ENFORCE_EQ( + paddings.size(), strides.size(), + "Conv paddings dimension and Conv strides dimension should be the same."); + PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, + "The number of input channels should be equal to filter " + "channels * groups."); + PADDLE_ENFORCE_EQ( + output_channels % groups, 0, + "The number of output channels should be divided by groups."); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < paddings.size(); ++i) { + output_shape.push_back(OutputSize(in_dims[i + 2], filter_dims[i + 2], + paddings[i], strides[i])); + } + ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); +} + +Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "Input", + "(Tensor) The input tensor of convolution operator. " + "The format of input tensor is NCHW, where N is batch size, C is the " + "number of channels, H is the height of the feature, " + "and W is the width of the feature."); + AddInput("Filter", + "(Tensor) The filter tensor of convolution operator. " + "The format of the filter tensor is MCHW, where M is the number of " + "output image channels, C is the number of input image channels, " + "H is the height of the filter, and W is the width of the filter. " + "If the groups attribute is greater than 1, C equals the number of " + "input image channels divided by the groups."); + AddOutput("Output", + "(Tensor) The output tensor of convolution operator. " + "The format of output tensor is also NCHW."); + AddAttr>("strides", "strides of convolution operator.") + .SetDefault({1, 1}); + AddAttr>("paddings", "paddings of convolution operator.") + .SetDefault({0, 0}); + AddAttr( + "groups", + "(int default:1), the group size of convolution operator. " + "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " + "when group=2, the first half of the filters is only connected to the " + "first half of the input channels, while the second half of the filters " + "is only connected to the second half of the input channels.") + .SetDefault(1); + AddComment(R"DOC( +Convolution Operator. + +The convolution operation calculates the output based on the input, filter +and strides, paddings, groups parameters. The size of each dimension of the +parameters is checked in the infer-shape. +Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch +size, C is the number of channels, H is the height of the feature, and W is +the width of the feature. Parameters(ksize, strides, paddings) are two elements. +These two elements represent height and width, respectively. +The input(X) size and output(Out) size may be different. + +Example: + Input: + Input shape: (N, C_in, H_in, W_in) + Filter shape: (C_out, C_in, H_f, W_f) + Output: + Output shape: (N, C_out, H_out, W_out) + where + H_out = (H_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1; + W_out = (W_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1; +)DOC"); +} + +Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "Input", + "(Tensor) The input tensor of convolution operator. " + "The format of input tensor is NCDHW. Where N is batch size, C is the " + "number of channels, D is the depth of the feature, H is the height of " + "the feature, " + "and W is the width of the feature."); + AddInput("Filter", + "(Tensor) The filter tensor of convolution operator. " + "The format of the filter tensor is MCDHW, where M is the number of " + "output image channels, C is the number of input image channels, " + "D is the depth of the filter, H is the height of the filter, and W " + "is the width of the filter." + "If the groups attribute is greater than 1, C equals the number of " + "input image channels divided by the groups."); + AddOutput("Output", + "(Tensor) The output tensor of convolution operator." + "The format of output tensor is also NCDHW."); + AddAttr>( + "strides", + "(vector, default:{0, 0, 0}), the strides of convolution operator.") + .SetDefault({1, 1, 1}); + AddAttr>( + "paddings", + "(vector, default:{0, 0, 0}), the paddings of convolution operator.") + .SetDefault({0, 0, 0}); + AddAttr( + "groups", + "(int default:1), the group size of convolution operator. " + "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " + "when group=2, the first half of the filters is only connected to the " + "first half of the input channels, while the second half of the filters " + "is only connected to the second half of the input channels.") + .SetDefault(1); + + AddComment(R"DOC( +Convolution3D Operator. + +The convolution operation calculates the output based on the input, filter +and strides, paddings, groups parameters. The size of each dimension of the +parameters is checked in the infer-shape. +Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch +size, C is the number of channels,D is the depth of the feature, H is the height of +the feature, and W is the width of the feature. Parameters(ksize, strides, paddings) +are three elements. These three elements represent depth, height and width, respectively. +The input(X) size and output(Out) size may be different. + +Example: + Input: + Input shape: (N, C_in, D_in, H_in, W_in) + Filter shape: (C_out, C_in, D_f, H_f, W_f) + Output: + Output shape: (N, C_out, D_out, H_out, W_out) + where + D_out = (D_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1; + H_out = (H_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1; + W_out = (W_in - filter_size[2] + 2 * paddings[2]) / strides[2] + 1; +)DOC"); +} + +void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const { + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + if (ctx->HasOutput(framework::GradVarName("Input"))) { + ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); + } + if (ctx->HasOutput(framework::GradVarName("Filter"))) { + ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); + } +} + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(conv2d, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad, + ops::ConvOpGrad); +namespace ops = paddle::operators; +REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad, + ops::ConvOpGrad); + +REGISTER_OP_CPU_KERNEL(conv2d, + ops::GemmConvKernel); +REGISTER_OP_CPU_KERNEL( + conv2d_grad, ops::GemmConvGradKernel); + +REGISTER_OP_CPU_KERNEL(conv3d, + ops::GemmConvKernel); +REGISTER_OP_CPU_KERNEL( + conv3d_grad, ops::GemmConvGradKernel); diff --git a/paddle/operators/conv2d_op.cu b/paddle/operators/conv_op.cu similarity index 64% rename from paddle/operators/conv2d_op.cu rename to paddle/operators/conv_op.cu index c697c9466d34c29af6976f3a4d2d0a24ba778ceb..8e6f9da455b7291049aee57189dae15b8bcc2150 100644 --- a/paddle/operators/conv2d_op.cu +++ b/paddle/operators/conv_op.cu @@ -12,11 +12,16 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/conv2d_op.h" +#include "paddle/operators/conv_op.h" namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(conv2d, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv2d, ops::GemmConv2DKernel); + conv2d_grad, ops::GemmConvGradKernel); + +REGISTER_OP_GPU_KERNEL(conv3d, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv2d_grad, ops::GemmConvGrad2DKernel); + conv3d_grad, ops::GemmConvGradKernel); diff --git a/paddle/operators/conv2d_op.h b/paddle/operators/conv_op.h similarity index 54% rename from paddle/operators/conv2d_op.h rename to paddle/operators/conv_op.h index 0621389a79eee6b5e75b1eab309b49f8aa4a97ca..7c1729213bf3f5f3987afbf2d51d5b5339ae521d 100644 --- a/paddle/operators/conv2d_op.h +++ b/paddle/operators/conv_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/operators/math/im2col.h" #include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/vol2col.h" namespace paddle { namespace operators { @@ -40,14 +41,20 @@ class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker* op_checker); }; -class Conv2DOp : public framework::OperatorWithKernel { +class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Conv3DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker); +}; + +class ConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; }; -class Conv2DOpGrad : public framework::OperatorWithKernel { +class ConvOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -55,7 +62,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel { }; template -class GemmConv2DKernel : public framework::OpKernel { +class GemmConvKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("Input"); @@ -70,51 +77,78 @@ class GemmConv2DKernel : public framework::OpKernel { std::vector paddings = context.Attr>("paddings"); int groups = context.Attr("groups"); - int batch_size = input->dims()[0]; - int input_channels = input->dims()[1]; - int filter_height = filter.dims()[filter.dims().size() - 2]; - int filter_width = filter.dims()[filter.dims().size() - 1]; - int output_channels = output->dims()[1]; - int output_height = output->dims()[2]; - int output_width = output->dims()[3]; - - paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - im2col; + const int batch_size = static_cast(input->dims()[0]); + + // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + filter_shape_vec.erase(filter_shape_vec.begin(), + filter_shape_vec.begin() + 2); + + // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + std::vector output_shape_vec(framework::vectorize(output->dims())); + output_shape_vec.erase(output_shape_vec.begin(), + output_shape_vec.begin() + 2); + // use col_shape in the im2col calculation - framework::DDim col_shape = {input_channels / groups, filter_height, - filter_width, output_height, output_width}; + // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, + // o_h, o_w} + std::vector col_shape_vec; + col_shape_vec.push_back(input->dims()[1] / groups); + col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), + filter_shape_vec.end()); + col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), + output_shape_vec.end()); + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + // use col_matrix_shape in the gemm calculation - framework::DDim col_matrix_shape = { - input_channels / groups * filter_height * filter_width, - output_height * output_width}; + // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d * + // o_h * o_w) + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + Tensor col; col.mutable_data(col_shape, context.GetPlace()); // col_matrix shares the same piece of data with col, // but will be reshaped into a two-dimensional matrix shape // to call the matrix multiplication interface. - Tensor col_matrix = col; + Tensor col_matrix; + col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); - framework::DDim input_shape = {input->dims()[1], input->dims()[2], - input->dims()[3]}; + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = {output_channels, - output_height * output_width}; - // convolution operator: im2col + gemm - int in_step = input_channels / groups; - int out_step = output_channels / groups; + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + for (int i = 0; i < batch_size; i++) { Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); for (int g = 0; g < groups; g++) { - // im2col Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - im2col(context.device_context(), in_slice, col, strides[0], strides[1], - paddings[0], paddings[0], paddings[1], paddings[1]); + + if (filter_shape_vec.size() == 2) { + // im2col + math::Im2ColFunctor im2col; + im2col(context.device_context(), in_slice, col, strides[0], + strides[1], paddings[0], paddings[0], paddings[1], + paddings[1]); + } else if (filter_shape_vec.size() == 3) { + // vol2col + math::Vol2ColFunctor vol2col; + vol2col(context.device_context(), in_slice, col, strides[0], + strides[1], strides[2], paddings[0], paddings[1], + paddings[2]); + } // gemm Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); @@ -127,7 +161,7 @@ class GemmConv2DKernel : public framework::OpKernel { }; template -class GemmConvGrad2DKernel : public framework::OpKernel { +class GemmConvGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("Input"); @@ -137,64 +171,79 @@ class GemmConvGrad2DKernel : public framework::OpKernel { context.Output(framework::GradVarName("Input")); Tensor* filter_grad = context.Output(framework::GradVarName("Filter")); - // The filter and filter_grad will be reshaped in the calculations, // so here use an assignment operation, // that avoids modifying the variable in the Scope. Tensor filter = *context.Input("Filter"); + if (!input_grad && !filter_grad) return; + std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); int groups = context.Attr("groups"); - int batch_size = input->dims()[0]; - int input_channels = input->dims()[1]; - int filter_height = filter.dims()[filter.dims().size() - 2]; - int filter_width = filter.dims()[filter.dims().size() - 1]; - int output_channels = output_grad->dims()[1]; - int output_height = output_grad->dims()[2]; - int output_width = output_grad->dims()[3]; - - paddle::operators::math::Col2ImFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - col2im; - paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - im2col; - // use col_shape in the im2col and col2im calculation - framework::DDim col_shape = {input_channels / groups, filter_height, - filter_width, output_height, output_width}; + const int batch_size = static_cast(input->dims()[0]); + + // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + filter_shape_vec.erase(filter_shape_vec.begin(), + filter_shape_vec.begin() + 2); + + // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + std::vector output_shape_vec( + framework::vectorize(output_grad->dims())); + output_shape_vec.erase(output_shape_vec.begin(), + output_shape_vec.begin() + 2); + + // use col_shape in the im2col calculation + // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, + // o_h, o_w} + std::vector col_shape_vec; + col_shape_vec.push_back(input->dims()[1] / groups); + col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), + filter_shape_vec.end()); + col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), + output_shape_vec.end()); + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + // use col_matrix_shape in the gemm calculation - framework::DDim col_matrix_shape = { - input_channels / groups * filter_height * filter_width, - output_height * output_width}; - Tensor col; - col.mutable_data(col_shape, context.GetPlace()); - // col_matrix shares the same piece of data with col, - // but will be reshaped into a two-dimensional matrix shape - // to call the matrix multiplication interface. - Tensor col_matrix = col; - col_matrix.Resize(col_matrix_shape); + // size: (i_c/g * k_h * k_w, o_h * o_w) + // or + // (i_c/g * k_d * k_h * k_w, o_d * o_h * o_w) + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); - framework::DDim input_shape = {input->dims()[1], input->dims()[2], - input->dims()[3]}; - framework::DDim output_matrix_shape = { - output_grad->dims()[1], - output_grad->dims()[2] * output_grad->dims()[3]}; + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - // convolution backward input operator: gemm + col2im - // convolution backward weight operator: im2col + gemm - int in_step = input_channels / groups; - int out_step = output_channels / groups; + framework::DDim output_matrix_shape = { + output_grad->dims()[1], + output_grad->numel() / + (output_grad->dims()[0] * output_grad->dims()[1])}; + + // convolution backward input operator: gemm + col2im(or col2vol) + // convolution backward weight operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output_grad->dims()[1]) / groups; + + Tensor col; + // col_matrix shares the same piece of data with col, + // but will be reshaped into a two-dimensional matrix shape + // to call the matrix multiplication interface. + Tensor col_matrix; + col.mutable_data(col_shape, context.GetPlace()); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + + math::SetConstant set_zero; if (input_grad) { input_grad->mutable_data(context.GetPlace()); - auto t = framework::EigenVector::Flatten(*input_grad); - t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); + set_zero(context.device_context(), input_grad, static_cast(0)); for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = @@ -208,13 +257,22 @@ class GemmConvGrad2DKernel : public framework::OpKernel { math::matmul(context.device_context(), filter_slice, true, out_grad_slice, false, T(1.0), &col_matrix, T(0.0)); - // col2im Tensor in_grad_slice = in_grad_batch.Slice(g * in_step, (g + 1) * in_step); - col2im(context.device_context(), in_grad_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + + if (filter_shape_vec.size() == 2) { + math::Col2ImFunctor col2im; + col2im(context.device_context(), in_grad_slice, col, strides[0], + strides[1], paddings[0], paddings[0], paddings[1], + paddings[1]); + + } else if (filter_shape_vec.size() == 3) { + math::Col2VolFunctor col2vol; + col2vol(context.device_context(), in_grad_slice, col, strides[0], + strides[1], strides[2], paddings[0], paddings[1], + paddings[2]); + } } } } @@ -223,8 +281,7 @@ class GemmConvGrad2DKernel : public framework::OpKernel { filter_grad->mutable_data(context.GetPlace()); Tensor filter_grad_ = *filter_grad; filter_grad_.Resize(filter_matrix_shape); - auto t = framework::EigenVector::Flatten(filter_grad_); - t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); + set_zero(context.device_context(), filter_grad, static_cast(0)); for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = @@ -235,9 +292,18 @@ class GemmConvGrad2DKernel : public framework::OpKernel { Tensor out_grad_slice = out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - im2col(context.device_context(), in_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + + if (filter_shape_vec.size() == 2) { + math::Im2ColFunctor im2col; + im2col(context.device_context(), in_slice, col, strides[0], + strides[1], paddings[0], paddings[0], paddings[1], + paddings[1]); + } else if (filter_shape_vec.size() == 3) { + math::Vol2ColFunctor vol2col; + vol2col(context.device_context(), in_slice, col, strides[0], + strides[1], strides[2], paddings[0], paddings[1], + paddings[2]); + } // gemm Tensor filter_grad_slice = @@ -250,6 +316,5 @@ class GemmConvGrad2DKernel : public framework::OpKernel { } } }; - } // namespace operators } // namespace paddle diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..50081779a5ea3c81884007d4e4b7832dc4ea2bdd --- /dev/null +++ b/paddle/operators/conv_transpose_op.cc @@ -0,0 +1,203 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/conv_transpose_op.h" + +namespace paddle { +namespace operators { + +void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(Input) of ConvTransposeOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Filter"), + "Input(Filter) of ConvTransposeOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Output"), + "Output(Output) of ConvTransposeOp should not be null."); + + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + std::vector strides = ctx->Attrs().Get>("strides"); + std::vector paddings = ctx->Attrs().Get>("paddings"); + + for (size_t i = 0; i < paddings.size(); ++i) { + PADDLE_ENFORCE_EQ(paddings[i], 0, + "No Padding allowed in conv transpose op."); + } + + PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, + "ConvTransposeOp intput should be 4-D or 5-D tensor."); + PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(), + "ConvTransposeOp input dimension and filter dimension " + "should be the same."); + PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U, + "ConvTransposeOp input dimension and strides dimension should " + "be consistent."); + PADDLE_ENFORCE_EQ(paddings.size(), strides.size(), + "ConvTransposeOp paddings dimension and Conv strides " + "dimension should be the same."); + PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0], + "In ConvTransposeOp, The input channel should be the same " + "as the number of filters."); + + std::vector output_shape({in_dims[0], filter_dims[1]}); + for (size_t i = 0; i < paddings.size(); ++i) { + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] + + filter_dims[i + 2]); + } + ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); +} + +Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( + framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "Input", + "(Tensor) The input tensor of convolution transpose operator. " + "The format of input tensor is NCHW. Where N is batch size, C is the " + "number of input channels, H is the height of the feature, and " + "W is the width of the feature."); + AddInput("Filter", + "(Tensor) The filter tensor of convolution transpose operator. " + "The format of the filter tensor is CMHW, where C is the number of " + "output image channels, M is the number of input image channels, " + "H is the height of the filter, and W is the width of the filter. " + "We enforce groups number == 1 and padding == 0 in " + "the convolution transpose scenario."); + AddOutput("Output", + "(Tensor) The output tensor of convolution transpose operator. " + "The format of output tensor is also NCHW."); + AddAttr>( + "strides", + "(vector defalut:{1, 1}), strides of convolution transpose operator.") + .SetDefault({1, 1}); + AddAttr>( + "paddings", + "(vector defalut:{0, 0}), paddings of convolution transpose operator.") + .SetDefault({0, 0}); + AddComment(R"DOC( +Convolution2D Transpose Operator. + +The convolution transpose operation calculates the output based on the input, filter +and strides, paddings, groups parameters. The size of each dimension of the +parameters is checked in the infer-shape. + +Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch +size, C is the number of channels, H is the height of the feature, and +W is the width of the feature. Parameters(ksize, strides, paddings) are two elements. +These two elements represent height and width, respectively. +The input(X) size and output(Out) size may be different. +Example: + Input: + Input shape: (N, C_in, H_in, W_in) + Filter shape: (C_in, C_out, H_f, W_f) + Output: + Output shape: (N, C_out, H_out, W_out) + where + H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; + W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; +)DOC"); +} + +Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( + framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Input", + "(Tensor) The input tensor of convolution transpose operator." + "The format of input tensor is NCDHW. Where N is batch size, C is " + "the number of channels, D is the depth of the feature, H is the " + "height of the feature, and " + "W is the width of the feature."); + AddInput("Filter", + "(Tensor) The filter tensor of convolution transpose operator." + "The format of the filter tensor is CMDHW, where C is the number of " + "output image channels, M is the number of input image channels, D " + "is the depth of the filter, H is the height of the filter, and " + "W is the width of the filter." + "We enforce groups number == 1 and padding == 0 in " + "the convolution3d transpose scenario."); + AddOutput("Output", + "(Tensor) The output tensor of convolution transpose operator." + "The format of output tensor is also NCDHW." + "Where N is batch size, C is " + "the number of channels, D is the depth of the feature, H is the " + "height of the feature, and W is the width of the feature."); + AddAttr>( + "strides", + "(vector defalut:{1, 1, 1}), strides of convolution transpose operator.") + .SetDefault({1, 1, 1}); + AddAttr>( + "paddings", + "(vector defalut:{0, 0, 0}), paddings of convolution transpose operator.") + .SetDefault({0, 0, 0}); + AddComment(R"DOC( +Convolution3D Transpose Operator. + +The convolution transpose operation calculates the output based on the input, filter +and strides, paddings, groups parameters. The size of each dimension of the +parameters is checked in the infer-shape. + +Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch +size, C is the number of channels, D is the depth of the feature, +H is the height of the feature, and W is the width of the feature. +Parameters(ksize, strides, paddings) are three elements. +These three elements represent depth, height and width, respectively. +The input(X) size and output(Out) size may be different. +Example: + Input: + Input shape: (N, C_in, D_in, H_in, W_in) + Filter shape: (C_in, C_out, D_f, H_f, W_f) + Output: + Output shape: (N, C_out, D_out, H_out, W_out) + where + D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; + H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; + W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + filter_size[2]; +)DOC"); +} + +void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const { + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + if (ctx->HasOutput(framework::GradVarName("Input"))) { + ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); + } + if (ctx->HasOutput(framework::GradVarName("Filter"))) { + ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); + } +} + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker, + conv2d_transpose_grad, ops::ConvTransposeOpGrad); + +REGISTER_OP_CPU_KERNEL( + conv2d_transpose, + ops::GemmConvTransposeKernel); +REGISTER_OP_CPU_KERNEL( + conv2d_transpose_grad, + ops::GemmConvTransposeGradKernel); + +REGISTER_OP(conv3d_transpose, ops::ConvTransposeOp, ops::Conv3DTransposeOpMaker, + conv3d_transpose_grad, ops::ConvTransposeOpGrad); + +REGISTER_OP_CPU_KERNEL( + conv3d_transpose, + ops::GemmConvTransposeKernel); +REGISTER_OP_CPU_KERNEL( + conv3d_transpose_grad, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv2d_transpose_op.cu b/paddle/operators/conv_transpose_op.cu similarity index 63% rename from paddle/operators/conv2d_transpose_op.cu rename to paddle/operators/conv_transpose_op.cu index 931ac9eed294c4fe7c726d8cc2c4d9a39ec12828..401cddb379ced134b800d2a078fe130a2850fbb2 100644 --- a/paddle/operators/conv2d_transpose_op.cu +++ b/paddle/operators/conv_transpose_op.cu @@ -12,13 +12,20 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/conv2d_transpose_op.h" +#include "paddle/operators/conv_transpose_op.h" namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( conv2d_transpose, - ops::GemmConv2DTransposeKernel); + ops::GemmConvTransposeKernel); REGISTER_OP_GPU_KERNEL( conv2d_transpose_grad, - ops::GemmConv2DTransposeGradKernel); + ops::GemmConvTransposeGradKernel); + +REGISTER_OP_GPU_KERNEL( + conv3d_transpose, + ops::GemmConvTransposeKernel); +REGISTER_OP_GPU_KERNEL( + conv3d_transpose_grad, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6c1a6220d784abf89ec789f94d9cff9e5414db04 --- /dev/null +++ b/paddle/operators/conv_transpose_op.h @@ -0,0 +1,293 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/im2col.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/vol2col.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using DDim = framework::DDim; + +// Define Op classes in .h file so that other conv transpose +// operator implementations can reuse the code. +class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Conv2DTransposeOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker); +}; + +class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Conv3DTransposeOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker); +}; + +class ConvTransposeOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override; +}; + +class ConvTransposeOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override; +}; + +template +class GemmConvTransposeKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* input = context.Input("Input"); + // The filter will be reshaped, so it should not be constant pointer + Tensor filter = *context.Input("Filter"); + Tensor* output = context.Output("Output"); + + std::vector strides = context.Attr>("strides"); + // TODO(Zhuoyuan): Paddings can be added in future. + // groups will alway be disabled in conv2dtranspose. + + const int batch_size = static_cast(input->dims()[0]); + + // input_shape_vec: {h, w} or {d, h, w} + std::vector input_shape_vec = framework::vectorize(input->dims()); + input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2); + + // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + std::vector filter_shape_vec = framework::vectorize(filter.dims()); + filter_shape_vec.erase(filter_shape_vec.begin(), + filter_shape_vec.begin() + 2); + + // use col_shape in the im2col and col2im (or vol2col and col2vol) + // calculation + // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w} + std::vector col_shape_vec; + col_shape_vec.push_back(output->dims()[1]); + col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), + filter_shape_vec.end()); + col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(), + input_shape_vec.end()); + DDim col_shape(framework::make_ddim(col_shape_vec)); + + // use col_matrix_shape in the gemm calculation + // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w) + DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + + Tensor col; + col.mutable_data(col_shape, context.GetPlace()); + // col_matrix shares the same piece of data with col, + // but will be reshaped into a two-dimensional matrix shape + // to call the matrix multiplication interface. + Tensor col_matrix; + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + + // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w) + DDim output_shape = + framework::slice_ddim(output->dims(), 1, output->dims().size()); + + // input matrix size: (m, h * w) or (m, d * h * w) + DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]}; + + // filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w) + DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]}; + filter.Resize(filter_matrix_shape); + + output->mutable_data(context.GetPlace()); + math::SetConstant set_zero; + set_zero(context.device_context(), output, static_cast(0)); + + // convolution transpose: gemm + col2im or col2vol (similar to conv-backward + // on input) + for (int i = 0; i < batch_size; i++) { + // batch with size (m, h * w) or (m, d * h * w) + Tensor input_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); + + // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w) + Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape); + + // col_matrix = filter * input_batch + // of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w) + math::matmul(context.device_context(), filter, true, + input_batch, false, static_cast(1.0), + &col_matrix, static_cast(0.0)); + + if (filter_shape_vec.size() == 2) { + // col2im: col_matrix -> dy + // from (c * k_h * k_w, h * w) to (c, o_h, o_w) + math::Col2ImFunctor col2im; + + col2im(context.device_context(), output_batch, col, strides[0], + strides[1], 0, 0, 0, 0); + } else if (filter_shape_vec.size() == 3) { + // col2vol: col_matrix -> dy + // from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w) + math::Col2VolFunctor col2vol; + col2vol(context.device_context(), output_batch, col, strides[0], + strides[1], strides[2], 0, 0, 0); + } + } + } +}; + +template +class GemmConvTransposeGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* input = context.Input("Input"); + const Tensor* output_grad = + context.Input(framework::GradVarName("Output")); + // For filter, we do not use const pointer b/c we will do reshape, + // but we should avoid modifying its value. + Tensor filter = *context.Input("Filter"); + Tensor* input_grad = + context.Output(framework::GradVarName("Input")); + Tensor* filter_grad = + context.Output(framework::GradVarName("Filter")); + + if ((!input_grad) && (!filter_grad)) return; + + std::vector strides = context.Attr>("strides"); + // Actually, no paddings and groups allowed in conv transpose. + std::vector paddings = context.Attr>("paddings"); + + const int batch_size = static_cast(input->dims()[0]); + + // input_shape_vec: {h, w} or {d, h, w} + std::vector input_shape_vec = framework::vectorize(input->dims()); + input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2); + + // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + std::vector filter_shape_vec = framework::vectorize(filter.dims()); + filter_shape_vec.erase(filter_shape_vec.begin(), + filter_shape_vec.begin() + 2); + + // use col_shape in the im2col and col2im (or vol2col and col2vol) + // calculation + // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w} + std::vector col_shape_vec; + col_shape_vec.push_back(output_grad->dims()[1]); + col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), + filter_shape_vec.end()); + col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(), + input_shape_vec.end()); + DDim col_shape(framework::make_ddim(col_shape_vec)); + + // use col_matrix_shape in the gemm calculation + // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w) + DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + + // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w) + DDim output_shape = framework::slice_ddim(output_grad->dims(), 1, + output_grad->dims().size()); + + // input matrix size: (m, h * w) or (m, d * h * w) + DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]}; + + // filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w) + DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]}; + filter.Resize(filter_matrix_shape); + + // convolution transpose grad on input: + // im2col + gemm (similar to conv-forward) + // input need to compute gradient + if (input_grad || filter_grad) { + Tensor col; + col.mutable_data(col_shape, context.GetPlace()); + // col_matrix shares the same piece of data with col, + // but will be reshaped into a two-dimensional matrix shape + // to call the matrix multiplication interface. + Tensor col_matrix; + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + + Tensor filter_grad_; + math::SetConstant set_zero; + + if (input_grad) { + input_grad->mutable_data(context.GetPlace()); + set_zero(context.device_context(), input_grad, static_cast(0)); + } + if (filter_grad) { // filter size (m, c, k_h, k_w) + filter_grad->mutable_data(context.GetPlace()); + set_zero(context.device_context(), filter_grad, static_cast(0)); + filter_grad_ = *filter_grad; + filter_grad_.Resize(filter_matrix_shape); + } + + for (int i = 0; i < batch_size; i++) { + // batch with size (c, o_h * o_w) + Tensor output_grad_batch = + output_grad->Slice(i, i + 1).Resize(output_shape); + + if (filter_shape_vec.size() == 2) { + // im2col: dy -> col matrix + // from (c, o_h, o_w) to (c * k_h * k_w, h * w) + math::Im2ColFunctor im2col; + im2col(context.device_context(), output_grad_batch, col, strides[0], + strides[1], paddings[0], paddings[0], paddings[1], + paddings[1]); + } else if (filter_shape_vec.size() == 3) { + // vol2col: dy -> col_matrix + // from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w) + math::Vol2ColFunctor vol2col; + vol2col(context.device_context(), output_grad_batch, col, strides[0], + strides[1], strides[2], paddings[0], paddings[1], + paddings[2]); + } + + if (input_grad) { + // batch with size (m, h, w) + Tensor input_grad_batch = + input_grad->Slice(i, i + 1).Resize(input_matrix_shape); + // gemm: dx = filter * dy + // (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, h * w) + // or + // (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m, + // d, h, w) + math::matmul(context.device_context(), filter, false, + col_matrix, false, static_cast(1.0), + &input_grad_batch, static_cast(0.0)); + } + if (filter_grad) { + // input batch + Tensor in_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); + // gemm: d_filter = x * dy^T + // (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, k_h * k_w) + // or + // (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d * + // k_h * k_w) + math::matmul(context.device_context(), in_batch, false, + col_matrix, true, static_cast(1.0), + &filter_grad_, static_cast(1.0)); + } + } + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 24df1fcadac75315890635f4d3aaa7146c1cc27b..9d41879b27a24f83090f5abf1325eca5f9488d00 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -114,21 +114,17 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { "where N is the batch size and D is the number of classes. " "This input is a probability computed by the previous operator, " "which is almost always the result of a softmax operator."); - AddInput( - "Label", - "(Tensor, default Tensor), the ground truth which is " - "a 2-D tensor. " - "When soft_label is set to false, Label is a Tensor with shape " - "[N x 1]. " - "When soft_label is set to true, Label is a Tensor " - "with shape [N x K]."); + AddInput("Label", + "(Tensor), the ground truth which is a 2-D tensor. When " + "soft_label is set to false, Label is a Tensor with shape " + "[N x 1]. When soft_label is set to true, Label is a " + "Tensor with shape [N x K]."); AddOutput("Y", - "(Tensor, default Tensor), a 2-D tensor " - "with shape [N x 1]. The cross entropy loss."); - AddAttr( - "soft_label", - "(bool, default false), a flag to indicate whether to interpretate " - "the given labels as soft labels.") + "(Tensor, default Tensor), a 2-D tensor with shape " + "[N x 1]. The cross entropy loss."); + AddAttr("soft_label", + "(bool, default false), a flag indicating whether to " + "interpretate the given labels as soft labels.") .SetDefault(false); AddComment(R"DOC( CrossEntropy Operator. diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 3f02214f30f886c5a03ad438fd795d44a0b8dddc..232d88e26bd2fb17f52c2dc3626d47c2615b1d42 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -34,15 +34,18 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { std::vector shape_int64(shape.size(), 0); std::transform(shape.begin(), shape.end(), shape_int64.begin(), [](int a) { return static_cast(a); }); - auto dims = framework::make_ddim(shape_int64); + auto output_dim = framework::make_ddim(shape_int64); - int dim_idx = ctx->Attrs().Get("dim_idx"); - PADDLE_ENFORCE_GE(dim_idx, 0); - PADDLE_ENFORCE_GT(static_cast(shape.size()), dim_idx); - PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), dim_idx); + int input_dim_idx = ctx->Attrs().Get("input_dim_idx"); + PADDLE_ENFORCE_GE(input_dim_idx, 0); + PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), input_dim_idx); - dims[dim_idx] = ctx->GetInputDim("Input")[dim_idx]; - ctx->SetOutputDim("Out", dims); + int output_dim_idx = ctx->Attrs().Get("output_dim_idx"); + PADDLE_ENFORCE_GE(output_dim_idx, 0); + PADDLE_ENFORCE_GT(static_cast(shape.size()), output_dim_idx); + + output_dim[output_dim_idx] = ctx->GetInputDim("Input")[input_dim_idx]; + ctx->SetOutputDim("Out", output_dim); } protected: @@ -69,8 +72,11 @@ class FillConstantBatchSizeLikeOpMaker "(Tensor) Tensor of specified shape will be filled " "with the specified value"); AddAttr>("shape", "(vector) The shape of the output"); - AddAttr("dim_idx", - "(int, default 0) The index of batch size dimension") + AddAttr("input_dim_idx", + "(int, default 0) the index of input's batch size dimension") + .SetDefault(0); + AddAttr("output_dim_idx", + "(int, default 0) the index of output's batch size dimension") .SetDefault(0); AddAttr("value", "(float, default 0) The value to be filled") .SetDefault(0.0f); @@ -86,9 +92,10 @@ Fill up a variable with specified constant value. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(fill_constant_batch_size_like, - ops::FillConstantBatchSizeLikeOp, - ops::FillConstantBatchSizeLikeOpMaker); +REGISTER_OPERATOR(fill_constant_batch_size_like, + ops::FillConstantBatchSizeLikeOp, + paddle::framework::EmptyGradOpMaker, + ops::FillConstantBatchSizeLikeOpMaker); REGISTER_OP_CPU_KERNEL( fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOpKernel, diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index ee2219cd03313beae352c18adbe7e029fabbb6d3..f60425051cc93b77ef9e383f90a62eb9dfed44de 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -35,7 +35,9 @@ class FillConstantOp : public framework::OperatorWithKernel { protected: framework::DataType IndicateDataType( const framework::ExecutionContext &ctx) const override { - return static_cast(ctx.Attr("data_type")); + int data_type = ctx.Attr("data_type"); + VLOG(10) << " FillConstant data_type = " << data_type; + return static_cast(data_type); } }; @@ -71,4 +73,5 @@ REGISTER_OP_WITHOUT_GRADIENT(fill_constant, ops::FillConstantOp, REGISTER_OP_CPU_KERNEL( fill_constant, ops::FillConstantOpKernel, ops::FillConstantOpKernel, - ops::FillConstantOpKernel); + ops::FillConstantOpKernel, + ops::FillConstantOpKernel); diff --git a/paddle/operators/fill_constant_op.cu b/paddle/operators/fill_constant_op.cu index a57b11c6cba77ad7d258c47a8ebf887f359f9522..bca402a8b988b570a083e9ce253342304f4b8946 100644 --- a/paddle/operators/fill_constant_op.cu +++ b/paddle/operators/fill_constant_op.cu @@ -20,4 +20,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( fill_constant, ops::FillConstantOpKernel, ops::FillConstantOpKernel, - ops::FillConstantOpKernel); + ops::FillConstantOpKernel, + ops::FillConstantOpKernel); diff --git a/paddle/operators/gru_op.cc b/paddle/operators/gru_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5aa03f8916a67222fb0ca5781533766063e52683 --- /dev/null +++ b/paddle/operators/gru_op.cc @@ -0,0 +1,220 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/gru_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class GRUOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(%s) of GRUOp should not be null.", "Input"); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(%s) of GRUOp should not be null.", "Weight"); + PADDLE_ENFORCE(ctx->HasOutput("BatchGate"), + "Output(%s) of GRUOp should not be null.", "BatchGate"); + PADDLE_ENFORCE(ctx->HasOutput("BatchResetHiddenPrev"), + "Output(%s) of GRUOp should not be null.", + "BatchResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasOutput("BatchHidden"), + "Output(%s) of GRUOp should not be null.", "BatchHidden"); + PADDLE_ENFORCE(ctx->HasOutput("Hidden"), + "Output(%s) of GRUOp should not be null.", "Hidden"); + auto input_dims = ctx->GetInputDim("Input"); + auto weight_dims = ctx->GetInputDim("Weight"); + int input_size = input_dims[1]; + int frame_size = weight_dims[0]; + PADDLE_ENFORCE_EQ(input_size, frame_size * 3, + "The input_size must be 3 times of frame_size in GRUOp."); + PADDLE_ENFORCE_EQ( + weight_dims[1], frame_size * 3, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + if (ctx->HasInput("H0")) { + auto h0_dims = ctx->GetInputDim("H0"); + PADDLE_ENFORCE_EQ(h0_dims[1], frame_size, + "The width of H0 must be equal to frame_size."); + } + if (ctx->HasInput("Bias")) { + auto bias_dims = ctx->GetInputDim("Bias"); + int bias_height = bias_dims[0]; + int bias_width = bias_dims[1]; + PADDLE_ENFORCE_EQ(bias_height, 1, + "The shape of Bias must be [1, frame_size * 3]."); + PADDLE_ENFORCE_EQ(bias_width, frame_size * 3, + "The shape of Bias must be [1, frame_size * 3]."); + } + ctx->SetOutputDim("BatchGate", input_dims); + ctx->SetOutputDim("BatchResetHiddenPrev", {input_dims[0], frame_size}); + ctx->SetOutputDim("BatchHidden", {input_dims[0], frame_size}); + ctx->SetOutputDim("Hidden", {input_dims[0], frame_size}); + ctx->ShareLoD("Input", "Hidden"); + } +}; + +class GRUOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Input", + "(LoDTensor) The first input is a LodTensor, which supports " + "variable-time length input sequence. The underlying tensor in " + "this LoDTenosr is a matrix with shape (T X 3D), where, T is the " + "total time steps in this mini-batch, D is the hidden size."); + AddInput("H0", + "(Tensor, optional) The initial hidden state is an optional " + "input. This is a tensor with shape (N x D), where N is the " + "batch size, D is the hidden size.") + .AsDispensable(); + AddInput( + "Weight", + "(Tensor) The learnable hidden-hidden weight matrix with shape " + "(D x 3D), where D is the hidden size. The elements continuous in " + "memory can be divided into two parts. The first part are weights of " + "the update gate and reset gate with shape (D x 2D), and the second " + "part are weights of output candidate with shape (D x D)."); + AddInput("Bias", + "(Tensor, optional) Bias vector with shape (1 x 3D) concating " + "bias of the update gate, reset gate and output candidate.") + .AsDispensable(); + AddOutput("BatchGate", + "(LoDTensor) To compute with batches, sequence data will be " + "reorganized into several successive batches each containing " + "data from the same time step. The LoDTensor BatchGate contains " + "the update gate, reset gate and output candidate values " + "organized in batches. The LoD size is 2. The first LoD contains " + "the batch offsets and the second LoD contains the indexes in " + "the raw sequence data.") + .AsIntermediate(); + AddOutput( + "BatchResetHiddenPrev", + "(LoDTensor) The reseted hidden state LoDTensor organized in batches. " + "This LoDTensor is a matrix with shape (T X D) and has the same LoD " + "with `BatchGate`.") + .AsIntermediate(); + AddOutput( + "BatchHidden", + "(LoDTensor) The hidden state LoDTensor organized in batches. " + "This LoDTensor is a matrix with shape (T X D) and has the same LoD " + "with `BatchGate`.") + .AsIntermediate(); + AddOutput( + "Hidden", + "(LoDTensor) the hidden state LoDTensor organized in sequences. " + "This LoDTensor is a matrix with shape (T X D) and has the same LoD " + "with `BatchGate`."); + AddAttr("activation", + "(string, default tanh) " + "The activation type used for output candidate {h}_t.") + .SetDefault("tanh"); + AddAttr( + "gate_activation", + "(string, default sigmoid) " + "The activation type used in update gate and reset gate.") + .SetDefault("sigmoid"); + AddAttr("is_reverse", + "(bool, defalut: False) " + "whether to compute reversed GRU.") + .SetDefault(false); + AddComment(R"DOC( +GRU Operator implements part calculations of the complete GRU as following: + +\f[ +update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ +output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) +\f] + +@note To implement the complete GRU, fully-connected operator must be used +before to feed xu, xr and xc as the Input of GRU operator. +)DOC"); + } +}; + +class GRUGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(%s) of GRUGradOp should not be null.", "Input"); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(%s) of GRUGradOp should not be null.", "Weight"); + PADDLE_ENFORCE(ctx->HasInput("BatchGate"), + "Input(%s) of GRUGradOp should not be null.", "BatchGate"); + PADDLE_ENFORCE(ctx->HasInput("BatchResetHiddenPrev"), + "Input(%s) of GRUGradOp should not be null.", + "BatchResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput("BatchHidden"), + "Input(%s) of GRUOp should not be null.", "BatchHidden"); + PADDLE_ENFORCE(ctx->HasInput("Hidden"), + "Input(%s) of GRUGradOp should not be null.", "Hidden"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), + "Input(%s@GRAD) of GRUGradOp should not be null.", "Hidden"); + auto input_dims = ctx->GetInputDim("Input"); + auto weight_dims = ctx->GetInputDim("Weight"); + int input_size = input_dims[1]; + int frame_size = weight_dims[0]; + int weight_height = weight_dims[0]; + int weight_width = weight_dims[1]; + PADDLE_ENFORCE_EQ(input_size, frame_size * 3, + "The input_size must be 3 times of frame_size in GRUOp."); + PADDLE_ENFORCE_EQ( + weight_height, frame_size, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + PADDLE_ENFORCE_EQ( + weight_width, frame_size * 3, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + if (ctx->HasInput("H0")) { + auto h0_dims = ctx->GetInputDim("H0"); + PADDLE_ENFORCE_EQ(h0_dims[1], frame_size, + "The width of H0 must be equal to frame_size."); + auto h0_grad_name = framework::GradVarName("H0"); + if (ctx->HasOutput(h0_grad_name)) + ctx->SetOutputDim(h0_grad_name, h0_dims); + } + if (ctx->HasInput("Bias")) { + auto bias_dims = ctx->GetInputDim("Bias"); + int bias_height = bias_dims[0]; + int bias_width = bias_dims[1]; + PADDLE_ENFORCE_EQ(bias_height, 1, + "The shape of Bias must be [1, frame_size * 3]."); + PADDLE_ENFORCE_EQ(bias_width, frame_size * 3, + "The shape of Bias must be [1, frame_size * 3]."); + auto bias_grad_name = framework::GradVarName("Bias"); + if (ctx->HasOutput(bias_grad_name)) + ctx->SetOutputDim(bias_grad_name, bias_dims); + } + auto input_grad_name = framework::GradVarName("Input"); + if (ctx->HasOutput(input_grad_name)) + ctx->SetOutputDim(input_grad_name, input_dims); + auto weight_grad_name = framework::GradVarName("Weight"); + if (ctx->HasOutput(weight_grad_name)) + ctx->SetOutputDim(weight_grad_name, weight_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gru, ops::GRUOp, ops::GRUOpMaker, gru_grad, ops::GRUGradOp); +REGISTER_OP_CPU_KERNEL(gru, ops::GRUKernel, + ops::GRUKernel); +REGISTER_OP_CPU_KERNEL(gru_grad, + ops::GRUGradKernel, + ops::GRUGradKernel); diff --git a/paddle/operators/gru_op.cu b/paddle/operators/gru_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..35538c74b4bf678f8068999bfadb2589a1671be0 --- /dev/null +++ b/paddle/operators/gru_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/gru_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gru, ops::GRUKernel, + ops::GRUKernel); +REGISTER_OP_GPU_KERNEL(gru_grad, + ops::GRUGradKernel, + ops::GRUGradKernel); diff --git a/paddle/operators/gru_op.h b/paddle/operators/gru_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ba90ec9816c40a6a49065ac6efcee6b93dffce90 --- /dev/null +++ b/paddle/operators/gru_op.h @@ -0,0 +1,231 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/operators/math/gru_compute.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/sequence2batch.h" + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; + +template +using EigenMatrix = framework::EigenMatrix; + +template +class GRUKernel : public framework::OpKernel { + public: + void BatchCompute(const framework::ExecutionContext& context) const { + auto* input = context.Input("Input"); + auto* h0 = context.Input("H0"); + const T* h0_data = h0 ? h0->data() : nullptr; + auto* weight = context.Input("Weight"); + const T* weight_data = weight->data(); + auto* bias = context.Input("Bias"); + auto* batch_gate = context.Output("BatchGate"); + batch_gate->mutable_data(context.GetPlace()); + auto* batch_reset_hidden_prev = + context.Output("BatchResetHiddenPrev"); + batch_reset_hidden_prev->mutable_data(context.GetPlace()); + auto* batch_hidden = context.Output("BatchHidden"); + batch_hidden->mutable_data(context.GetPlace()); + auto* hidden = context.Output("Hidden"); + hidden->mutable_data(context.GetPlace()); + + context.ShareLoD("Input", "Hidden"); + + auto hidden_dims = hidden->dims(); + + bool is_reverse = context.Attr("is_reverse"); + math::LoDTensor2BatchFunctor to_batch; + to_batch(context.device_context(), *input, *batch_gate, true, is_reverse); + + int frame_size = hidden_dims[1]; + int batch_size = hidden_dims[0]; + auto g = EigenMatrix::From(*batch_gate); + auto place = context.GetEigenDevice(); + if (bias) { + auto b = EigenMatrix::From(*bias); + g.device(place) = g + + b.reshape(Eigen::array({{1, frame_size * 3}})) + .broadcast(Eigen::array({{batch_size, 1}})); + } + + math::hl_gru_value gru_value; + gru_value.gateWeight = const_cast(weight_data); + gru_value.stateWeight = + const_cast(weight_data + 2 * frame_size * frame_size); + gru_value.prevOutValue = const_cast(h0_data); + auto batch_starts = batch_gate->lod()[0]; + size_t num_batch = batch_starts.size() - 1; + for (size_t n = 0; n < num_batch; n++) { + int bstart = static_cast(batch_starts[n]); + int bend = static_cast(batch_starts[n + 1]); + int cur_batch_size = bend - bstart; + + Tensor gate_t = batch_gate->Slice(bstart, bend); + Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend); + Tensor hidden_t = batch_hidden->Slice(bstart, bend); + gru_value.outputValue = hidden_t.data(); + gru_value.gateValue = gate_t.data(); + gru_value.resetOutputValue = reset_hidden_prev_t.data(); + math::GRUUnitFunctor::compute( + context.device_context(), gru_value, frame_size, cur_batch_size, + math::ActiveType(context.Attr("activation")), + math::ActiveType(context.Attr("gate_activation"))); + gru_value.prevOutValue = gru_value.outputValue; + } + + math::Batch2LoDTensorFunctor to_seq; + batch_hidden->set_lod(batch_gate->lod()); + to_seq(context.device_context(), *batch_hidden, *hidden); + } + + void Compute(const framework::ExecutionContext& context) const override { + BatchCompute(context); + } +}; + +template +class GRUGradKernel : public framework::OpKernel { + public: + void BatchCompute(const framework::ExecutionContext& context) const { + auto* h0 = context.Input("H0"); + const T* h0_data = h0 ? h0->data() : nullptr; + auto* weight = context.Input("Weight"); + const T* weight_data = weight->data(); + auto* batch_gate = context.Input("BatchGate"); + auto* batch_reset_hidden_prev = + context.Input("BatchResetHiddenPrev"); + auto* batch_hidden = context.Input("BatchHidden"); + auto* hidden = context.Input("Hidden"); + auto* hidden_grad = + context.Input(framework::GradVarName("Hidden")); + auto* input_grad = + context.Output(framework::GradVarName("Input")); + auto* h0_grad = context.Output(framework::GradVarName("H0")); + auto* weight_grad = + context.Output(framework::GradVarName("Weight")); + auto* bias_grad = context.Output(framework::GradVarName("Bias")); + + auto gate_dims = batch_gate->dims(); + auto hidden_dims = hidden->dims(); + int frame_size = hidden_dims[1]; + + math::LoDTensor2BatchFunctor to_batch; + LoDTensor batch_hidden_grad, batch_gate_grad, batch_reset_hidden_prev_grad; + batch_hidden_grad.mutable_data(hidden_dims, context.GetPlace()); + batch_gate_grad.mutable_data(gate_dims, context.GetPlace()); + batch_reset_hidden_prev_grad.mutable_data(hidden_dims, + context.GetPlace()); + math::SetConstant zero; + zero(context.device_context(), &batch_hidden_grad, static_cast(0.0)); + zero(context.device_context(), &batch_gate_grad, static_cast(0.0)); + zero(context.device_context(), &batch_reset_hidden_prev_grad, + static_cast(0.0)); + + bool is_reverse = context.Attr("is_reverse"); + batch_hidden_grad.set_lod(batch_hidden->lod()); + to_batch(context.device_context(), *hidden_grad, batch_hidden_grad, false, + is_reverse); + + math::hl_gru_value gru_value; + gru_value.gateWeight = const_cast(weight_data); + gru_value.stateWeight = + const_cast(weight_data + 2 * frame_size * frame_size); + + math::hl_gru_grad gru_grad; + if (weight_grad) { + gru_grad.gateWeightGrad = + weight_grad->mutable_data(context.GetPlace()); + zero(context.device_context(), weight_grad, static_cast(0.0)); + gru_grad.stateWeightGrad = + weight_grad->data() + 2 * frame_size * frame_size; + } else { + gru_grad.gateWeightGrad = nullptr; + gru_grad.stateWeightGrad = nullptr; + } + + auto batch_starts = batch_hidden_grad.lod()[0]; + size_t num_batch = batch_starts.size() - 1; + for (int n = static_cast(num_batch) - 1; n >= 0; n--) { + int bstart = static_cast(batch_starts[n]); + int bend = static_cast(batch_starts[n + 1]); + int cur_batch_size = bend - bstart; + + Tensor gate_t = batch_gate->Slice(bstart, bend); + gru_value.gateValue = gate_t.data(); + Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend); + gru_value.resetOutputValue = reset_hidden_prev_t.data(); + + Tensor hidden_grad_t = batch_hidden_grad.Slice(bstart, bend); + gru_grad.outputGrad = hidden_grad_t.data(); + Tensor gate_grad_t = batch_gate_grad.Slice(bstart, bend); + gru_grad.gateGrad = gate_grad_t.data(); + Tensor reset_hidden_prev_grad_t = + batch_reset_hidden_prev_grad.Slice(bstart, bend); + gru_grad.resetOutputGrad = reset_hidden_prev_grad_t.data(); + if (n == 0) { + gru_value.prevOutValue = const_cast(h0_data); + if (h0_grad) { + T* h0_grad_data = h0_grad->mutable_data(context.GetPlace()); + zero(context.device_context(), h0_grad, static_cast(0.0)); + gru_grad.prevOutGrad = h0_grad_data; + } else { + gru_grad.prevOutGrad = nullptr; + } + } else { + int bstart_pre = static_cast(batch_starts[n - 1]); + Tensor hidden_prev_t = batch_hidden->Slice(bstart_pre, bstart); + gru_value.prevOutValue = hidden_prev_t.data(); + Tensor hidden_prev_grad_t = batch_hidden_grad.Slice(bstart_pre, bstart); + gru_grad.prevOutGrad = hidden_prev_grad_t.data(); + } + + math::GRUUnitGradFunctor::compute( + context.device_context(), gru_value, gru_grad, frame_size, + cur_batch_size, + math::ActiveType(context.Attr("activation")), + math::ActiveType(context.Attr("gate_activation"))); + } + if (input_grad) { + input_grad->mutable_data(context.GetPlace()); + math::Batch2LoDTensorFunctor to_seq; + batch_gate_grad.set_lod(batch_gate->lod()); + to_seq(context.device_context(), batch_gate_grad, *input_grad); + } + if (bias_grad) { + bias_grad->mutable_data(context.GetPlace()); + auto d_b = EigenMatrix::From(*bias_grad); + auto d_g = EigenMatrix::From(batch_gate_grad); + auto place = context.GetEigenDevice(); + d_b.device(place) = d_g.sum(Eigen::array({{0}})); + } + } + + void Compute(const framework::ExecutionContext& context) const override { + BatchCompute(context); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/increment_op.cc b/paddle/operators/increment_op.cc index c3e9308fe0ad6a90ce5e9097d078dfe3a3e1c20c..deb02bf2bf82a22a2b59b1ee16c222a162863144 100644 --- a/paddle/operators/increment_op.cc +++ b/paddle/operators/increment_op.cc @@ -31,7 +31,6 @@ class IncrementOp : public framework::OperatorWithKernel { } }; -template class IncrementOpMaker : public framework::OpProtoAndCheckerMaker { public: IncrementOpMaker(framework::OpProto *proto, @@ -39,10 +38,10 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "(Tensor) The input tensor of increment operator"); AddOutput("Out", "(Tensor) The output tensor of increment operator."); - AddAttr("step", - "(float, default 1.0) " - "The step size by which the " - "input tensor will be incremented.") + AddAttr("step", + "(float, default 1.0) " + "The step size by which the " + "input tensor will be incremented.") .SetDefault(1.0); AddComment(R"DOC( Increment Operator. @@ -73,7 +72,10 @@ class IncrementGradOpMaker : public framework::SingleGradOpDescMaker { namespace ops = paddle::operators; -REGISTER_OPERATOR(increment, ops::IncrementOp, ops::IncrementOpMaker, +REGISTER_OPERATOR(increment, ops::IncrementOp, ops::IncrementOpMaker, ops::IncrementGradOpMaker); -REGISTER_OP_CPU_KERNEL(increment, - ops::IncrementKernel); +REGISTER_OP_CPU_KERNEL( + increment, ops::IncrementKernel, + ops::IncrementKernel, + ops::IncrementKernel, + ops::IncrementKernel); diff --git a/paddle/operators/increment_op.cu b/paddle/operators/increment_op.cu index 659c380d147a36650452bea23b30cbcf1ff516ee..f97a6c468522f033687bd83ae5b1a1bc7d86fa80 100644 --- a/paddle/operators/increment_op.cu +++ b/paddle/operators/increment_op.cu @@ -16,4 +16,7 @@ REGISTER_OP_GPU_KERNEL( increment, - paddle::operators::IncrementKernel); + paddle::operators::IncrementKernel, + paddle::operators::IncrementKernel, + paddle::operators::IncrementKernel, + paddle::operators::IncrementKernel); diff --git a/paddle/operators/increment_op.h b/paddle/operators/increment_op.h index 342e254fc453555c70923efbca02fdfd014af015..3d53256dd1a277d7face8b43860d6672d7a68cfb 100644 --- a/paddle/operators/increment_op.h +++ b/paddle/operators/increment_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { -template +template class IncrementKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& context) const { @@ -27,7 +27,7 @@ class IncrementKernel : public framework::OpKernel { auto* in = context.Input("X"); tensor->mutable_data(in->place()); - auto step = static_cast(context.Attr("step")); + auto step = static_cast(context.Attr("step")); auto eigen_out = framework::EigenVector::Flatten(*tensor); auto eigen_in = framework::EigenVector::Flatten(*in); diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index 40cc177d0f19c2359626ef972e787a0b1c5580f8..90bc9f4f922e7aa09523bad8ffb3ef477dd89857 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -8,20 +8,24 @@ if(WITH_GPU) nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator) nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) + nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function) nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context) nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context) nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) + nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions) else() cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) cc_library(softmax SRCS softmax.cc DEPS operator) cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator) cc_library(pooling SRCS pooling.cc DEPS device_context) + cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function) cc_library(vol2col SRCS vol2col.cc DEPS device_context) cc_library(context_project SRCS context_project.cc DEPS device_context) cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) + cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function) endif() cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/detail/gru_cpu_kernel.h b/paddle/operators/math/detail/gru_cpu_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..51af140cf4d5e6581765bea00033fa53d383230d --- /dev/null +++ b/paddle/operators/math/detail/gru_cpu_kernel.h @@ -0,0 +1,424 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/operators/math/detail/activation_functions.h" +#include "paddle/operators/math/gru_compute.h" + +namespace paddle { +namespace operators { +namespace math { +namespace detail { + +#ifndef __NVCC__ + +template +void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, + T *gateValue, T *resetOutputValue, + T *prevOutputValue, int frameSize, + activation_mode_t active_gate) { + T rValueUpdateGate; + T rValueResetGate; + T rValueResetOutput; + T rPrevOut = 0; + T *updateGate = gateValue; + T *resetGate = gateValue + frameSize; + + for (int i = 0; i < frameSize; i++) { + rValueUpdateGate = updateGate[i]; + rValueResetGate = resetGate[i]; + if (prevOutputValue) { + rPrevOut = prevOutputValue[i]; + } + + opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut, + rValueResetOutput, active_gate); + + updateGate[i] = rValueUpdateGate; + resetGate[i] = rValueResetGate; + resetOutputValue[i] = rValueResetOutput; + } +} + +template +void hl_naive_gru_forward_final_output(OpFinalOutput opFinalOutput, + T *gateValue, T *prevOutputValue, + T *outputValue, int frameSize, + activation_mode_t active_node) { + T rValueUpdateGate; + T rValueFrameState; + T rPrevOut = 0; + T rOutput; + T *updateGate = gateValue; + T *frameState = gateValue + frameSize * 2; + + for (int i = 0; i < frameSize; i++) { + rValueUpdateGate = updateGate[i]; + rValueFrameState = frameState[i]; + if (prevOutputValue) { + rPrevOut = prevOutputValue[i]; + } + + opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput, + active_node); + + frameState[i] = rValueFrameState; + outputValue[i] = rOutput; + } +} + +template +void hl_avx_gru_forward_reset_output(OpResetOutput opResetOutput, T *gateValue, + T *resetOutputValue, T *prevOutputValue, + int frameSize, + activation_mode_t active_gate) { +#ifdef __AVX__ + __m256 rValueUpdateGate; + __m256 rValueResetGate; + __m256 rValueResetOutput; + __m256 rPrevOut = _mm256_set1_ps(0.0f); + __m256 *updateGate = (__m256 *)gateValue; + __m256 *resetGate = (__m256 *)(gateValue + frameSize); + + for (int i = 0; i < frameSize / 8; i++) { + rValueUpdateGate = updateGate[i]; + rValueResetGate = resetGate[i]; + if (prevOutputValue) { + rPrevOut = ((__m256 *)prevOutputValue)[i]; + } + + opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut, + rValueResetOutput, active_gate); + + updateGate[i] = rValueUpdateGate; + resetGate[i] = rValueResetGate; + ((__m256 *)resetOutputValue)[i] = rValueResetOutput; + } +#endif +} + +template +void hl_avx_gru_forward_final_output(OpFinalOutput opFinalOutput, T *gateValue, + T *prevOutputValue, T *outputValue, + int frameSize, + activation_mode_t active_node) { +#ifdef __AVX__ + __m256 rValueUpdateGate; + __m256 rValueFrameState; + __m256 rPrevOut = _mm256_set1_ps(0.0f); + __m256 rOutput; + __m256 *updateGate = (__m256 *)gateValue; + __m256 *frameState = (__m256 *)(gateValue + frameSize * 2); + + for (int i = 0; i < frameSize / 8; i++) { + rValueUpdateGate = updateGate[i]; + rValueFrameState = frameState[i]; + if (prevOutputValue) { + rPrevOut = ((__m256 *)prevOutputValue)[i]; + } + + opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput, + active_node); + + frameState[i] = rValueFrameState; + ((__m256 *)outputValue)[i] = rOutput; + } +#endif +} + +template +inline void forward_reset_output(OpResetOutput opResetOutput, + hl_gru_value value, int frameSize, + int batchSize, activation_mode_t active_gate) { + for (int b = 0; b < batchSize; b++) { + if (OpResetOutput::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) { + hl_avx_gru_forward_reset_output( + opResetOutput, value.gateValue, value.resetOutputValue, + value.prevOutValue, frameSize, active_gate); + } else { + hl_naive_gru_forward_reset_output( + opResetOutput, value.gateValue, value.resetOutputValue, + value.prevOutValue, frameSize, active_gate); + } + + value.gateValue += frameSize * 3; + value.resetOutputValue += frameSize; + if (value.prevOutValue) { + value.prevOutValue += frameSize; + } + } +} + +template +inline void forward_final_output(OpFinalOutput opFinalOutput, + hl_gru_value value, int frameSize, + int batchSize, activation_mode_t active_node) { + for (int b = 0; b < batchSize; b++) { + if (OpFinalOutput::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) { + hl_avx_gru_forward_final_output(opFinalOutput, value.gateValue, + value.prevOutValue, value.outputValue, + frameSize, active_node); + } else { + hl_naive_gru_forward_final_output(opFinalOutput, value.gateValue, + value.prevOutValue, value.outputValue, + frameSize, active_node); + } + + value.gateValue += frameSize * 3; + value.outputValue += frameSize; + if (value.prevOutValue) { + value.prevOutValue += frameSize; + } + } +} + +template +void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad, T *gateValue, + T *gateGrad, T *prevOutValue, + T *prevOutGrad, T *outputGrad, + int frameSize, + activation_mode_t active_node) { + T rUpdateGateValue; + T rUpdateGateGrad; + T rFrameStateValue; + T rFrameStateGrad; + T rOutGrad; + T rPrevOutValue = 0; + T rPrevOutGrad = 0; + T *updateGateValue = gateValue; + T *updateGateGrad = gateGrad; + T *frameStateValue = gateValue + frameSize * 2; + T *frameStateGrad = gateGrad + frameSize * 2; + + for (int i = 0; i < frameSize; i++) { + rUpdateGateValue = updateGateValue[i]; + rFrameStateValue = frameStateValue[i]; + rOutGrad = outputGrad[i]; + if (prevOutValue) { + rPrevOutValue = prevOutValue[i]; + } + if (prevOutGrad) { + rPrevOutGrad = prevOutGrad[i]; + } + + opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue, + rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad, + active_node); + + updateGateGrad[i] = rUpdateGateGrad; + frameStateGrad[i] = rFrameStateGrad; + if (prevOutGrad) { + prevOutGrad[i] = rPrevOutGrad; + } + } +} + +template +void hl_naive_gru_backward_reset_grad(OpResetGrad opResetGrad, T *gateValue, + T *gateGrad, T *prevOutValue, + T *prevOutGrad, T *resetOutputGrad, + int frameSize, + activation_mode_t active_gate) { + T rUpdateGateValue; + T rUpdateGateGrad; + T rResetGateValue; + T rResetGateGrad; + T rResetOutputGrad = 0; + T rPrevOutValue = 0; + T rPrevOutGrad = 0; + T *updateGateValue = gateValue; + T *updateGateGrad = gateGrad; + T *resetGateValue = gateValue + frameSize; + T *resetGateGrad = gateGrad + frameSize; + + for (int i = 0; i < frameSize; i++) { + rUpdateGateValue = updateGateValue[i]; + rUpdateGateGrad = updateGateGrad[i]; + rResetGateValue = resetGateValue[i]; + + if (prevOutValue && prevOutGrad) { + rResetOutputGrad = resetOutputGrad[i]; + } + if (prevOutValue) { + rPrevOutValue = prevOutValue[i]; + } + if (prevOutGrad) { + rPrevOutGrad = prevOutGrad[i]; + } + + opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue, + rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad, + active_gate); + + updateGateGrad[i] = rUpdateGateGrad; + resetGateGrad[i] = rResetGateGrad; + if (prevOutGrad) { + prevOutGrad[i] = rPrevOutGrad; + } + } +} + +template +void hl_avx_gru_backward_state_grad(OpStateGrad opStateGrad, T *gateValue, + T *gateGrad, T *prevOutValue, + T *prevOutGrad, T *outputGrad, + int frameSize, + activation_mode_t active_node) { +#ifdef __AVX__ + __m256 rUpdateGateValue; + __m256 rUpdateGateGrad; + __m256 rFrameStateValue; + __m256 rFrameStateGrad; + __m256 rOutGrad; + __m256 rPrevOutValue = _mm256_set1_ps(0.0f); + __m256 rPrevOutGrad = _mm256_set1_ps(0.0f); + __m256 *updateGateValue = (__m256 *)gateValue; + __m256 *updateGateGrad = (__m256 *)gateGrad; + __m256 *frameStateValue = (__m256 *)(gateValue + frameSize * 2); + __m256 *frameStateGrad = (__m256 *)(gateGrad + frameSize * 2); + + for (int i = 0; i < frameSize / 8; i++) { + rUpdateGateValue = updateGateValue[i]; + rFrameStateValue = frameStateValue[i]; + rOutGrad = ((__m256 *)outputGrad)[i]; + if (prevOutValue) { + rPrevOutValue = ((__m256 *)prevOutValue)[i]; + } + if (prevOutGrad) { + rPrevOutGrad = ((__m256 *)prevOutGrad)[i]; + } + + opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue, + rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad, + active_node); + + updateGateGrad[i] = rUpdateGateGrad; + frameStateGrad[i] = rFrameStateGrad; + if (prevOutGrad) { + ((__m256 *)prevOutGrad)[i] = rPrevOutGrad; + } + } +#endif +} + +template +void hl_avx_gru_backward_reset_grad(OpResetGrad opResetGrad, T *gateValue, + T *gateGrad, T *prevOutValue, + T *prevOutGrad, T *resetOutputGrad, + int frameSize, + activation_mode_t active_gate) { +#ifdef __AVX__ + __m256 rUpdateGateValue; + __m256 rUpdateGateGrad; + __m256 rResetGateValue; + __m256 rResetGateGrad; + __m256 rResetOutputGrad = _mm256_set1_ps(0.0f); + __m256 rPrevOutValue = _mm256_set1_ps(0.0f); + __m256 rPrevOutGrad = _mm256_set1_ps(0.0f); + __m256 *updateGateValue = (__m256 *)gateValue; + __m256 *updateGateGrad = (__m256 *)gateGrad; + __m256 *resetGateValue = (__m256 *)(gateValue + frameSize); + __m256 *resetGateGrad = (__m256 *)(gateGrad + frameSize); + + for (int i = 0; i < frameSize / 8; i++) { + rUpdateGateValue = updateGateValue[i]; + rUpdateGateGrad = updateGateGrad[i]; + rResetGateValue = resetGateValue[i]; + + if (prevOutValue && prevOutGrad) { + rResetOutputGrad = ((__m256 *)resetOutputGrad)[i]; + } + if (prevOutValue) { + rPrevOutValue = ((__m256 *)prevOutValue)[i]; + } + if (prevOutGrad) { + rPrevOutGrad = ((__m256 *)prevOutGrad)[i]; + } + + opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue, + rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad, + active_gate); + + updateGateGrad[i] = rUpdateGateGrad; + resetGateGrad[i] = rResetGateGrad; + if (prevOutGrad) { + ((__m256 *)prevOutGrad)[i] = rPrevOutGrad; + } + } +#endif +} + +template +inline void backward_state_grad(OpStateGrad opStateGrad, hl_gru_value value, + hl_gru_grad grad, int frameSize, + int batchSize, activation_mode_t active_node) { + for (int b = 0; b < batchSize; b++) { + if (OpStateGrad::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) { + hl_avx_gru_backward_state_grad( + opStateGrad, value.gateValue, grad.gateGrad, value.prevOutValue, + grad.prevOutGrad, grad.outputGrad, frameSize, active_node); + } else { + hl_naive_gru_backward_state_grad( + opStateGrad, value.gateValue, grad.gateGrad, value.prevOutValue, + grad.prevOutGrad, grad.outputGrad, frameSize, active_node); + } + + value.gateValue += frameSize * 3; + if (value.prevOutValue) { + value.prevOutValue += frameSize; + } + + grad.gateGrad += frameSize * 3; + grad.outputGrad += frameSize; + if (grad.prevOutGrad) { + grad.prevOutGrad += frameSize; + } + } +} + +template +inline void backward_reset_grad(OpResetGrad opResetGrad, hl_gru_value value, + hl_gru_grad grad, int frameSize, + int batchSize, activation_mode_t active_gate) { + for (int b = 0; b < batchSize; b++) { + if (OpResetGrad::avx && !(frameSize & (8 - 1)) && (sizeof(T) == 4)) { + hl_avx_gru_backward_reset_grad( + opResetGrad, value.gateValue, grad.gateGrad, value.prevOutValue, + grad.prevOutGrad, grad.resetOutputGrad, frameSize, active_gate); + } else { + hl_naive_gru_backward_reset_grad( + opResetGrad, value.gateValue, grad.gateGrad, value.prevOutValue, + grad.prevOutGrad, grad.resetOutputGrad, frameSize, active_gate); + } + + value.gateValue += frameSize * 3; + if (value.prevOutValue) { + value.prevOutValue += frameSize; + } + + grad.gateGrad += frameSize * 3; + grad.resetOutputGrad += frameSize; + if (grad.prevOutGrad) { + grad.prevOutGrad += frameSize; + } + } +} + +#endif + +} // namespace detail +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/detail/gru_gpu_kernel.h b/paddle/operators/math/detail/gru_gpu_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..6441c648b048422c110872a85aa8cb719f11a8d7 --- /dev/null +++ b/paddle/operators/math/detail/gru_gpu_kernel.h @@ -0,0 +1,203 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/operators/math/detail/activation_functions.h" +#include "paddle/operators/math/gru_compute.h" +#include "paddle/platform/cuda_helper.h" +#include "paddle/platform/device_context.h" + +#include + +namespace paddle { +namespace operators { +namespace math { +namespace detail { + +/* + * threads(framePerBlock, batchPerBlock) + * grid(frameBlocks, batchBlocks) + */ +template +__global__ void KeGruForwardResetOutput(OpResetOutput opResetOutput, + T *gateValue, T *resetOutputValue, + T *prevOutputValue, int frameSize, + int batchSize, + activation_mode_t active_gate) { + const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; + if (frameIdx >= frameSize) return; + + int batchIdx = 0; + if (isBatch) { + batchIdx = blockIdx.y * blockDim.y + threadIdx.y; + if (batchIdx >= batchSize) return; + gateValue += batchIdx * 3 * frameSize; + resetOutputValue += batchIdx * frameSize; + } + + T rPrevOut = 0; + T rValueResetOutput; + T rValueUpdateGate = gateValue[frameIdx + frameSize * 0]; + T rValueResetGate = gateValue[frameIdx + frameSize * 1]; + + if (prevOutputValue) { + if (isBatch) prevOutputValue += batchIdx * frameSize; + rPrevOut = prevOutputValue[frameIdx]; + } + + opResetOutput(rValueUpdateGate, rValueResetGate, rPrevOut, rValueResetOutput, + active_gate); + + gateValue[frameIdx + frameSize * 0] = rValueUpdateGate; + gateValue[frameIdx + frameSize * 1] = rValueResetGate; + resetOutputValue[frameIdx] = rValueResetOutput; +} + +/* + * threads(framePerBlock, batchPerBlock) + * grid(frameBlocks, batchBlocks) + */ +template +__global__ void KeGruForwardFinalOutput(OpFinalOutput opFinalOutput, + T *gateValue, T *prevOutputValue, + T *outputValue, int frameSize, + int batchSize, + activation_mode_t active_node) { + const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; + if (frameIdx >= frameSize) return; + int batchIdx = 0; + if (isBatch) { + batchIdx = blockIdx.y * blockDim.y + threadIdx.y; + if (batchIdx >= batchSize) return; + gateValue += batchIdx * 3 * frameSize; + outputValue += batchIdx * frameSize; + } + + T rOutput; + T rPrevOut = 0; + T rValueUpdateGate = gateValue[frameIdx + frameSize * 0]; + T rValueFrameState = gateValue[frameIdx + frameSize * 2]; + + if (prevOutputValue) { + if (isBatch) prevOutputValue += batchIdx * frameSize; + rPrevOut = prevOutputValue[frameIdx]; + } + + opFinalOutput(rValueUpdateGate, rValueFrameState, rPrevOut, rOutput, + active_node); + + gateValue[frameIdx + frameSize * 2] = rValueFrameState; + outputValue[frameIdx] = rOutput; +} + +/* + * threads(framePerBlock, batchPerBlock) + * grid(frameBlocks, batchBlocks) + */ +template +__global__ void KeGruBackwardStateGrad(OpStateGrad opStateGrad, T *gateValue, + T *gateGrad, T *prevOutValue, + T *prevOutGrad, T *outputGrad, + int frameSize, int batchSize, + activation_mode_t active_node) { + const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; + if (frameIdx >= frameSize) return; + int batchIdx = 0; + if (isBatch) { + batchIdx = blockIdx.y * blockDim.y + threadIdx.y; + if (batchIdx >= batchSize) return; + gateValue += batchIdx * 3 * frameSize; + gateGrad += batchIdx * 3 * frameSize; + outputGrad += batchIdx * frameSize; + } + + T rUpdateGateGrad; + T rFrameStateGrad; + T rPrevOutValue = 0; + T rPrevOutGrad = 0; + T rUpdateGateValue = gateValue[frameIdx + frameSize * 0]; + T rFrameStateValue = gateValue[frameIdx + frameSize * 2]; + T rOutGrad = outputGrad[frameIdx]; + + if (prevOutValue && prevOutGrad) { + if (isBatch) prevOutValue += batchIdx * frameSize; + rPrevOutValue = prevOutValue[frameIdx]; + + if (isBatch) prevOutGrad += batchIdx * frameSize; + rPrevOutGrad = prevOutGrad[frameIdx]; + } + + opStateGrad(rUpdateGateValue, rUpdateGateGrad, rFrameStateValue, + rFrameStateGrad, rPrevOutValue, rPrevOutGrad, rOutGrad, + active_node); + + gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad; + gateGrad[frameIdx + frameSize * 2] = rFrameStateGrad; + if (prevOutGrad) { + prevOutGrad[frameIdx] = rPrevOutGrad; + } +} + +/* + * threads(framePerBlock, batchPerBlock) + * grid(frameBlocks, batchBlocks) + */ +template +__global__ void KeGruBackwardResetGrad(OpResetGrad opResetGrad, T *gateValue, + T *gateGrad, T *prevOutValue, + T *prevOutGrad, T *resetOutputGrad, + int frameSize, int batchSize, + activation_mode_t active_gate) { + const int frameIdx = blockIdx.x * blockDim.x + threadIdx.x; + if (frameIdx >= frameSize) return; + int batchIdx = 0; + if (isBatch) { + batchIdx = blockIdx.y * blockDim.y + threadIdx.y; + if (batchIdx >= batchSize) return; + gateValue += batchIdx * 3 * frameSize; + gateGrad += batchIdx * 3 * frameSize; + resetOutputGrad += batchIdx * frameSize; + } + + T rResetGateGrad; + T rPrevOutValue = 0; + T rPrevOutGrad = 0; + T rResetOutputGrad = 0; + T rUpdateGateValue = gateValue[frameIdx + frameSize * 0]; + T rUpdateGateGrad = gateGrad[frameIdx + frameSize * 0]; + T rResetGateValue = gateValue[frameIdx + frameSize * 1]; + + if (prevOutValue && prevOutGrad) { + if (isBatch) prevOutValue += batchIdx * frameSize; + if (isBatch) prevOutGrad += batchIdx * frameSize; + rPrevOutValue = prevOutValue[frameIdx]; + rPrevOutGrad = prevOutGrad[frameIdx]; + rResetOutputGrad = resetOutputGrad[frameIdx]; + } + + opResetGrad(rUpdateGateValue, rUpdateGateGrad, rResetGateValue, + rResetGateGrad, rPrevOutValue, rPrevOutGrad, rResetOutputGrad, + active_gate); + + gateGrad[frameIdx + frameSize * 0] = rUpdateGateGrad; + gateGrad[frameIdx + frameSize * 1] = rResetGateGrad; + if (prevOutGrad) { + prevOutGrad[frameIdx] = rPrevOutGrad; + } +} +} // namespace detail +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/detail/gru_kernel.h b/paddle/operators/math/detail/gru_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..8a681d8d8bced72e1296f863489f6ccbc7913167 --- /dev/null +++ b/paddle/operators/math/detail/gru_kernel.h @@ -0,0 +1,155 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/detail/activation_functions.h" +#include "paddle/platform/hostdevice.h" + +#include + +// TODO(guosheng): refine code style in gru_kernel +namespace paddle { +namespace operators { +namespace math { +namespace detail { + +namespace forward { + +template +class gru_resetOutput { + public: + HOSTDEVICE void operator()(T &valueUpdateGate, T &valueResetGate, T &prevOut, + T &valueResetOutput, activation_mode_t actGate) { + valueUpdateGate = activation(valueUpdateGate, actGate); + valueResetGate = activation(valueResetGate, actGate); + valueResetOutput = prevOut * valueResetGate; + } +#ifndef __NVCC__ +#ifndef __AVX__ + static const bool avx = false; +#else + static const bool avx = true; + HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &valueResetGate, + __m256 &prevOut, __m256 &valueResetOutput, + activation_mode_t actGate) { + valueUpdateGate = activation(valueUpdateGate, actGate); + valueResetGate = activation(valueResetGate, actGate); + valueResetOutput = _mm256_mul_ps(prevOut, valueResetGate); + } +#endif +#endif +}; + +template +class gru_finalOutput { + public: + HOSTDEVICE void operator()(T &valueUpdateGate, T &valueFrameState, T &prevOut, + T &valueOutput, activation_mode_t actInput) { + valueFrameState = activation(valueFrameState, actInput); + valueOutput = prevOut - (valueUpdateGate * prevOut) + + (valueUpdateGate * valueFrameState); + } +#ifndef __NVCC__ +#ifndef __AVX__ + static const bool avx = false; +#else + static const bool avx = true; + HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &valueFrameState, + __m256 &prevOut, __m256 &valueOutput, + activation_mode_t actInput) { + valueFrameState = activation(valueFrameState, actInput); + valueOutput = _mm256_add_ps( + _mm256_sub_ps(prevOut, _mm256_mul_ps(valueUpdateGate, prevOut)), + _mm256_mul_ps(valueUpdateGate, valueFrameState)); + } +#endif +#endif +}; +} // namespace forward + +namespace backward { + +template +class gru_stateGrad { + public: + HOSTDEVICE void operator()(T &valueUpdateGate, T &gradUpdateGate, + T &valueFrameState, T &gradFrameState, + T &valuePrevOut, T &gradPrevOut, T &gradOutput, + activation_mode_t actInput) { + gradUpdateGate = (gradOutput * valueFrameState); + gradUpdateGate -= (gradOutput * valuePrevOut); + gradPrevOut -= (gradOutput * valueUpdateGate); + gradPrevOut += gradOutput; + gradFrameState = + activation(gradOutput * valueUpdateGate, valueFrameState, actInput); + } +#ifndef __NVCC__ +#ifndef __AVX__ + static const bool avx = false; +#else + static const bool avx = true; + HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &gradUpdateGate, + __m256 &valueFrameState, __m256 &gradFrameState, + __m256 &valuePrevOut, __m256 &gradPrevOut, + __m256 &gradOutput, activation_mode_t actInput) { + gradUpdateGate = _mm256_mul_ps(gradOutput, valueFrameState); + gradUpdateGate = + _mm256_sub_ps(gradUpdateGate, _mm256_mul_ps(gradOutput, valuePrevOut)); + gradPrevOut = _mm256_add_ps( + _mm256_sub_ps(gradPrevOut, _mm256_mul_ps(gradOutput, valueUpdateGate)), + gradOutput); + gradFrameState = activation(_mm256_mul_ps(gradOutput, valueUpdateGate), + valueFrameState, actInput); + } +#endif +#endif +}; + +template +class gru_resetGrad { + public: + HOSTDEVICE void operator()(T &valueUpdateGate, T &gradUpdateGate, + T &valueResetGate, T &gradResetGate, + T &valuePrevOut, T &gradPrevOut, + T &gradResetOutput, activation_mode_t actGate) { + gradResetGate = (gradResetOutput * valuePrevOut); + gradPrevOut += (gradResetOutput * valueResetGate); + gradUpdateGate = activation(gradUpdateGate, valueUpdateGate, actGate); + gradResetGate = activation(gradResetGate, valueResetGate, actGate); + } +#ifndef __NVCC__ +#ifndef __AVX__ + static const bool avx = false; +#else + static const bool avx = true; + HOSTDEVICE void operator()(__m256 &valueUpdateGate, __m256 &gradUpdateGate, + __m256 &valueResetGate, __m256 &gradResetGate, + __m256 &valuePrevOut, __m256 &gradPrevOut, + __m256 &gradResetOutput, + activation_mode_t actGate) { + gradResetGate = _mm256_mul_ps(gradResetOutput, valuePrevOut); + gradPrevOut = _mm256_add_ps(gradPrevOut, + _mm256_mul_ps(gradResetOutput, valueResetGate)); + gradUpdateGate = activation(gradUpdateGate, valueUpdateGate, actGate); + gradResetGate = activation(gradResetGate, valueResetGate, actGate); + } +#endif +#endif +}; + +} // namespace backward + +} // namespace detail +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/gru_compute.cc b/paddle/operators/math/gru_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..125af449d3f700e24be5e4b7615c3b0e03fd4e5b --- /dev/null +++ b/paddle/operators/math/gru_compute.cc @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/gru_compute.h" +#include "paddle/operators/math/detail/gru_cpu_kernel.h" +#include "paddle/operators/math/detail/gru_kernel.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template +struct GRUUnitFunctor { + static void compute(const platform::DeviceContext &context, + hl_gru_value value, int frameSize, int batchSize, + activation_mode_t active_node, + activation_mode_t active_gate) { +#ifndef __NVCC__ + if (value.prevOutValue) { + math::gemm( + context, false, false, batchSize, frameSize * 2, frameSize, 1, + value.prevOutValue, frameSize, value.gateWeight, frameSize * 2, 1, + value.gateValue, frameSize * 3); + } + + detail::forward_reset_output(detail::forward::gru_resetOutput(), value, + frameSize, batchSize, active_gate); + + if (value.prevOutValue) { + math::gemm( + context, false, false, batchSize, frameSize, frameSize, 1, + value.resetOutputValue, frameSize, value.stateWeight, frameSize, 1, + value.gateValue + frameSize * 2, frameSize * 3); + } + + detail::forward_final_output(detail::forward::gru_finalOutput(), value, + frameSize, batchSize, active_node); +#endif + } +}; + +template +struct GRUUnitGradFunctor { + static void compute(const platform::DeviceContext &context, + hl_gru_value value, hl_gru_grad grad, int frameSize, + int batchSize, activation_mode_t active_node, + activation_mode_t active_gate) { +#ifndef __NVCC__ + detail::backward_state_grad(detail::backward::gru_stateGrad(), value, + grad, frameSize, batchSize, active_node); + + if (value.prevOutValue && grad.prevOutGrad) { + math::gemm( + context, false, true, batchSize, frameSize, frameSize, 1, + grad.gateGrad + frameSize * 2, frameSize * 3, value.stateWeight, + frameSize, 0, grad.resetOutputGrad, frameSize); + + if (grad.stateWeightGrad) { + math::gemm( + context, true, false, frameSize, frameSize, batchSize, 1, + value.resetOutputValue, frameSize, grad.gateGrad + frameSize * 2, + frameSize * 3, 1, grad.stateWeightGrad, frameSize); + } + } + + detail::backward_reset_grad(detail::backward::gru_resetGrad(), value, + grad, frameSize, batchSize, active_gate); + + if (grad.prevOutGrad && value.prevOutValue) { + math::gemm( + context, false, true, batchSize, frameSize, frameSize * 2, 1, + grad.gateGrad, frameSize * 3, value.gateWeight, frameSize * 2, 1, + grad.prevOutGrad, frameSize); + + if (grad.gateWeightGrad) { + math::gemm( + context, true, false, frameSize, frameSize * 2, batchSize, 1, + value.prevOutValue, frameSize, grad.gateGrad, frameSize * 3, 1, + grad.gateWeightGrad, frameSize * 2); + } + } +#endif + } +}; + +template struct GRUUnitFunctor; +template struct GRUUnitFunctor; +template struct GRUUnitGradFunctor; +template struct GRUUnitGradFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/gru_compute.cu b/paddle/operators/math/gru_compute.cu new file mode 100644 index 0000000000000000000000000000000000000000..7b9e54ac029f6aa00553338435684097d6d02b25 --- /dev/null +++ b/paddle/operators/math/gru_compute.cu @@ -0,0 +1,178 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/detail/gru_gpu_kernel.h" +#include "paddle/operators/math/detail/gru_kernel.h" +#include "paddle/operators/math/gru_compute.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template +struct GRUUnitFunctor { + static void compute(const platform::DeviceContext &context, + hl_gru_value value, int frameSize, int batchSize, + activation_mode_t active_node, + activation_mode_t active_gate) { + auto stream = + reinterpret_cast(context).stream(); + dim3 threads; + dim3 grid; + if (batchSize == 1) { + int framePerBlock = frameSize <= 1024 ? frameSize : 1024; + int frameBlocks = (frameSize + 1024 - 1) / 1024; + threads = dim3(framePerBlock, 1); + grid = dim3(frameBlocks, 1); + } else { + threads = dim3(32, 32); + grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32); + } + + if (value.prevOutValue) { + math::gemm( + context, false, false, batchSize, frameSize * 2, frameSize, 1, + value.prevOutValue, frameSize, value.gateWeight, frameSize * 2, 1, + value.gateValue, frameSize * 3); + } + + if (batchSize == 1) { + detail::KeGruForwardResetOutput, + /* isBatch= */ false, + T><<>>( + detail::forward::gru_resetOutput(), value.gateValue, + value.resetOutputValue, value.prevOutValue, frameSize, batchSize, + active_gate); + } else { + detail::KeGruForwardResetOutput, + /* isBatch= */ true, + T><<>>( + detail::forward::gru_resetOutput(), value.gateValue, + value.resetOutputValue, value.prevOutValue, frameSize, batchSize, + active_gate); + } + + if (value.prevOutValue) { + math::gemm( + context, false, false, batchSize, frameSize, frameSize, 1, + value.resetOutputValue, frameSize, value.stateWeight, frameSize, 1, + value.gateValue + frameSize * 2, frameSize * 3); + } + + if (batchSize == 1) { + detail::KeGruForwardFinalOutput, + /* isBatch= */ false, + T><<>>( + detail::forward::gru_finalOutput(), value.gateValue, + value.prevOutValue, value.outputValue, frameSize, batchSize, + active_node); + } else { + detail::KeGruForwardFinalOutput, + /* isBatch= */ true, + T><<>>( + detail::forward::gru_finalOutput(), value.gateValue, + value.prevOutValue, value.outputValue, frameSize, batchSize, + active_node); + } + } +}; + +template +struct GRUUnitGradFunctor { + static void compute(const platform::DeviceContext &context, + hl_gru_value value, hl_gru_grad grad, int frameSize, + int batchSize, activation_mode_t active_node, + activation_mode_t active_gate) { + auto stream = + reinterpret_cast(context).stream(); + dim3 threads; + dim3 grid; + if (batchSize == 1) { + int framePerBlock = frameSize <= 1024 ? frameSize : 1024; + int frameBlocks = (frameSize + 1024 - 1) / 1024; + threads = dim3(framePerBlock, 1); + grid = dim3(frameBlocks, 1); + } else { + threads = dim3(32, 32); + grid = dim3((frameSize + 32 - 1) / 32, (batchSize + 32 - 1) / 32); + } + + if (batchSize == 1) { + detail::KeGruBackwardStateGrad< + detail::backward::gru_stateGrad, + /* isBatch= */ false><<>>( + detail::backward::gru_stateGrad(), value.gateValue, grad.gateGrad, + value.prevOutValue, grad.prevOutGrad, grad.outputGrad, frameSize, + batchSize, active_node); + } else { + detail::KeGruBackwardStateGrad< + detail::backward::gru_stateGrad, + /* isBatch= */ true><<>>( + detail::backward::gru_stateGrad(), value.gateValue, grad.gateGrad, + value.prevOutValue, grad.prevOutGrad, grad.outputGrad, frameSize, + batchSize, active_node); + } + + if (value.prevOutValue && grad.prevOutGrad) { + math::gemm( + context, false, true, batchSize, frameSize, frameSize, 1, + grad.gateGrad + frameSize * 2, frameSize * 3, value.stateWeight, + frameSize, 0, grad.resetOutputGrad, frameSize); + + if (grad.stateWeightGrad) { + math::gemm( + context, true, false, frameSize, frameSize, batchSize, 1, + value.resetOutputValue, frameSize, grad.gateGrad + frameSize * 2, + frameSize * 3, 1, grad.stateWeightGrad, frameSize); + } + } + + if (batchSize == 1) { + detail::KeGruBackwardResetGrad< + detail::backward::gru_resetGrad, + /* isBatch= */ false><<>>( + detail::backward::gru_resetGrad(), value.gateValue, grad.gateGrad, + value.prevOutValue, grad.prevOutGrad, grad.resetOutputGrad, frameSize, + batchSize, active_gate); + } else { + detail::KeGruBackwardResetGrad< + detail::backward::gru_resetGrad, + /* isBatch= */ true><<>>( + detail::backward::gru_resetGrad(), value.gateValue, grad.gateGrad, + value.prevOutValue, grad.prevOutGrad, grad.resetOutputGrad, frameSize, + batchSize, active_gate); + } + + if (grad.prevOutGrad && value.prevOutValue) { + math::gemm( + context, false, true, batchSize, frameSize, frameSize * 2, 1, + grad.gateGrad, frameSize * 3, value.gateWeight, frameSize * 2, 1, + grad.prevOutGrad, frameSize); + + if (grad.gateWeightGrad) { + math::gemm( + context, true, false, frameSize, frameSize * 2, batchSize, 1, + value.prevOutValue, frameSize, grad.gateGrad, frameSize * 3, 1, + grad.gateWeightGrad, frameSize * 2); + } + } + } +}; + +template struct GRUUnitFunctor; +template struct GRUUnitFunctor; +template struct GRUUnitGradFunctor; +template struct GRUUnitGradFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/gru_compute.h b/paddle/operators/math/gru_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..1475fb38104f353857dfd968e46af98a6d52c52a --- /dev/null +++ b/paddle/operators/math/gru_compute.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/operators/math/lstm_compute.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace operators { +namespace math { + +// TODO(guosheng): refine code style in gru_compute +template +struct hl_gru_value { + T *gateWeight; + T *stateWeight; + T *gateValue; + T *resetOutputValue; + T *outputValue; + T *prevOutValue; +}; + +template +struct hl_gru_grad { + T *gateWeightGrad; + T *stateWeightGrad; + T *gateGrad; + T *resetOutputGrad; + T *outputGrad; + T *prevOutGrad; +}; + +template +struct GRUUnitFunctor { + static void compute(const platform::DeviceContext &context, + hl_gru_value value, int frameSize, int batchSize, + activation_mode_t active_node, + activation_mode_t active_gate); +}; + +template +struct GRUUnitGradFunctor { + static void compute(const platform::DeviceContext &context, + hl_gru_value value, hl_gru_grad grad, int frameSize, + int batchSize, activation_mode_t active_node, + activation_mode_t active_gate); +}; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/sequence_pooling.cc b/paddle/operators/math/sequence_pooling.cc new file mode 100644 index 0000000000000000000000000000000000000000..5913c99fdb01100d0de44ab317124550fa626528 --- /dev/null +++ b/paddle/operators/math/sequence_pooling.cc @@ -0,0 +1,103 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/sequence_pooling.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { +namespace math { + +template +class MaxSeqPoolFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::LoDTensor& input, framework::Tensor* output, + framework::Tensor* index) { + auto in_dims = input.dims(); + auto out_dims = output->dims(); + auto idx_dims = index->dims(); + PADDLE_ENFORCE_GT(in_dims.size(), 1); + PADDLE_ENFORCE_GT(out_dims.size(), 1); + for (int64_t i = 1; i < in_dims.size(); ++i) { + PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]); + } + PADDLE_ENFORCE_EQ(idx_dims, out_dims); + + auto starts = input.lod()[0]; + const T* in_data = input.data(); + T* out_data = output->data(); + int* max_index = index->data(); + + int64_t num_seq = out_dims[0]; + int64_t dim = output->numel() / num_seq; + for (int64_t i = 0; i < num_seq; ++i) { + for (int64_t k = 0; k < dim; ++k) { + out_data[i * dim + k] = in_data[starts[i] * dim + k]; + max_index[i * dim + k] = starts[i]; + } + for (size_t j = starts[i] + 1; j < starts[i + 1]; ++j) { + for (int64_t k = 0; k < dim; ++k) { + if (in_data[j * dim + k] > out_data[i * dim + k]) { + out_data[i * dim + k] = in_data[j * dim + k]; + max_index[i * dim + k] = j; + } + } + } + } + } +}; + +template +class MaxSeqPoolGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& out_grad, + const framework::Tensor& index, + framework::LoDTensor* in_grad) { + auto og_dims = out_grad.dims(); + auto ig_dims = in_grad->dims(); + auto idx_dims = index.dims(); + PADDLE_ENFORCE_GT(og_dims.size(), 1); + PADDLE_ENFORCE_GT(ig_dims.size(), 1); + for (int64_t i = 1; i < og_dims.size(); ++i) { + PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]); + } + PADDLE_ENFORCE_EQ(idx_dims, og_dims); + + const T* og_data = out_grad.data(); + const int* max_index = index.data(); + T* ig_data = in_grad->data(); + + SetConstant set_zero; + set_zero(context, in_grad, static_cast(0.0)); + int64_t num_seq = og_dims[0]; + int64_t dim = out_grad.numel() / num_seq; + for (int64_t i = 0; i < num_seq; ++i) { + for (int64_t j = 0; j < dim; ++j) { + int step_id = max_index[i * dim + j]; + ig_data[step_id * dim + j] = og_data[i * dim + j]; + } + } + } +}; + +template class MaxSeqPoolFunctor; +template class MaxSeqPoolFunctor; +template class MaxSeqPoolGradFunctor; +template class MaxSeqPoolGradFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/sequence_pooling.cu b/paddle/operators/math/sequence_pooling.cu new file mode 100644 index 0000000000000000000000000000000000000000..5ed951402fecba66a8960f4d024bf3785dac51c7 --- /dev/null +++ b/paddle/operators/math/sequence_pooling.cu @@ -0,0 +1,136 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/sequence_pooling.h" + +namespace paddle { +namespace operators { +namespace math { + +#define FLT_MAX __FLT_MAX__ + +template +__global__ void KeMaxSequencePool(const T* input, const size_t* starts, + T* output, int* index, int64_t num_seq, + int64_t dim) { + int dim_idx = threadIdx.x; + int seq_id = blockIdx.x; + if (seq_id >= num_seq) return; + size_t start = starts[seq_id]; + size_t end = starts[seq_id + 1]; + + for (int64_t i = dim_idx; i < dim; i += blockDim.x) { + T max_val = static_cast(-FLT_MAX); + int max_id = -1; + for (size_t step_id = start; step_id < end; step_id++) { + if (max_val < input[step_id * dim + i]) { + max_val = input[step_id * dim + i]; + max_id = step_id; + } + } + output[seq_id * dim + i] = max_val; + index[seq_id * dim + i] = max_id; + } +} + +template +class MaxSeqPoolFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::LoDTensor& input, framework::Tensor* output, + framework::Tensor* index) { + auto in_dims = input.dims(); + auto out_dims = output->dims(); + auto idx_dims = index->dims(); + PADDLE_ENFORCE_GT(in_dims.size(), static_cast(1)); + PADDLE_ENFORCE_GT(out_dims.size(), 1); + for (int64_t i = 1; i < in_dims.size(); ++i) { + PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]); + } + PADDLE_ENFORCE_EQ(idx_dims, out_dims); + + auto starts = input.lod()[0]; + const T* in_data = input.data(); + T* out_data = output->data(); + int* max_index = index->data(); + + int64_t num_seq = out_dims[0]; + int64_t dim = output->numel() / num_seq; + + dim3 threads(256, 1); + dim3 grid(num_seq, 1); + auto stream = + reinterpret_cast(context).stream(); + KeMaxSequencePool<<>>( + in_data, starts.data(), out_data, max_index, num_seq, dim); + } +}; + +template +__global__ void KeMaxSequencePoolGrad(const T* out_grad, const int* max_index, + T* in_grad, int64_t num_seq, + int64_t dim) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + int col_idx = idx % dim; + if (idx < num_seq * dim) { + int step_id = max_index[idx]; + in_grad[step_id * dim + col_idx] = out_grad[idx]; + } +} + +template +class MaxSeqPoolGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& out_grad, + const framework::Tensor& index, + framework::LoDTensor* in_grad) { + auto og_dims = out_grad.dims(); + auto idx_dims = index.dims(); + auto ig_dims = in_grad->dims(); + PADDLE_ENFORCE_GT(og_dims.size(), static_cast(1)); + PADDLE_ENFORCE_GT(ig_dims.size(), static_cast(1)); + for (int64_t i = 1; i < og_dims.size(); ++i) { + PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]); + } + PADDLE_ENFORCE_EQ(idx_dims, og_dims); + + const T* og_data = out_grad.data(); + const int* max_index = index.data(); + T* ig_data = in_grad->data(); + + SetConstant set_zero; + set_zero(context, in_grad, static_cast(0.0)); + int64_t num_seq = og_dims[0]; + int64_t dim = out_grad.numel() / num_seq; + + unsigned int blocks = (num_seq * dim + 128 - 1) / 128; + dim3 threads(128, 1); + dim3 grid(blocks, 1); + auto stream = + reinterpret_cast(context).stream(); + KeMaxSequencePoolGrad<<>>( + og_data, max_index, ig_data, num_seq, dim); + } +}; + +template class MaxSeqPoolFunctor; +template class MaxSeqPoolFunctor; +template class MaxSeqPoolGradFunctor; +template class MaxSeqPoolGradFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/sequence_pooling.h b/paddle/operators/math/sequence_pooling.h new file mode 100644 index 0000000000000000000000000000000000000000..35dfe26de1a87a064410401244914d4e2a94176e --- /dev/null +++ b/paddle/operators/math/sequence_pooling.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { +namespace math { + +#define FLT_MAX __FLT_MAX__ + +template +class MaxSeqPoolFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::LoDTensor& input, framework::Tensor* output, + framework::Tensor* index); +}; + +template +class MaxSeqPoolGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& out_grad, + const framework::Tensor& index, + framework::LoDTensor* in_grad); +}; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/positive_negative_pair_op.cc b/paddle/operators/positive_negative_pair_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..afbb63cc6053e2968750c163f9ac194d0f5b93e0 --- /dev/null +++ b/paddle/operators/positive_negative_pair_op.cc @@ -0,0 +1,177 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/positive_negative_pair_op.h" + +namespace paddle { +namespace operators { + +class PositiveNegativePairOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE( + ctx->HasInput("Score"), + "Input(Score) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("Label"), + "Input(Label) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("QueryID"), + "Input(QueryID) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("PositivePair"), + "Output(PositivePair) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("NegativePair"), + "Output(NegativePair) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("NeutralPair"), + "Output(NeutralPair) of PositiveNegativePairOp should not be null."); + auto scalar_dim = framework::make_ddim({1}); + if (ctx->HasInput("AccumulatePositivePair") || + ctx->HasInput("AccumulateNegativePair") || + ctx->HasInput("AccumulateNeutralPair")) { + PADDLE_ENFORCE(ctx->HasInput("AccumulatePositivePair") && + ctx->HasInput("AccumulateNegativePair") && + ctx->HasInput("AccumulateNeutralPair"), + "All optional inputs(AccumulatePositivePair, " + "AccumulateNegativePair, AccumulateNeutralPair) of " + "PositiveNegativePairOp are required if one of them is " + "specified."); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("AccumulatePositivePair"), scalar_dim, + "Shape of AccumulatePositivePair should be {1}."); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("AccumulateNegativePair"), scalar_dim, + "Shape of AccumulateNegativePair should be {1}."); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("AccumulateNeutralPair"), scalar_dim, + "Shape of AccumulateNeutralPair should be {1}."); + } + + auto score_dim = ctx->GetInputDim("Score"); + auto label_dim = ctx->GetInputDim("Label"); + auto query_dim = ctx->GetInputDim("QueryID"); + PADDLE_ENFORCE_EQ(score_dim.size(), 2, "Score should be a 2-D tensor."); + PADDLE_ENFORCE_EQ(label_dim.size(), 2, "Label should be a 2-D tensor."); + PADDLE_ENFORCE_EQ( + label_dim[0], score_dim[0], + "Tensor Score and Label should have the same height (batch size)."); + PADDLE_ENFORCE_EQ(label_dim[1], 1, + "The width of Label should be 1, i.e. each item should " + "have a scalar label."); + PADDLE_ENFORCE(query_dim == label_dim, + "QueryID should have the same shape as Label."); + if (ctx->HasInput("Weight")) { + PADDLE_ENFORCE(ctx->GetInputDim("Weight") == label_dim, + "Weight should have the same shape as Label."); + } + int column = ctx->Attrs().Get("column"); + auto depth = score_dim[1]; + PADDLE_ENFORCE(column < depth && column >= -depth, + "Attribute column should be in the range of [-%l, %l)", + depth, depth); + + ctx->SetOutputDim("PositivePair", scalar_dim); + ctx->SetOutputDim("NegativePair", scalar_dim); + ctx->SetOutputDim("NeutralPair", scalar_dim); + } + + protected: + framework::DataType IndicateDataType( + const framework::ExecutionContext &ctx) const override { + return framework::ToDataType(ctx.Input("Score")->type()); + } +}; + +class PositiveNegativePairOpMaker : public framework::OpProtoAndCheckerMaker { + public: + PositiveNegativePairOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Score", + "(Tensor, float) Model Score on an item (with " + "respect to QueryID). It's a 2-D tensor with shape [batch_size, " + "depth], where the column specified by the attribute \"column\" " + "is used as item score."); + AddInput("Label", + "(Tensor, float) Label of an item (with repsect to " + "QueryId). It's a 2-D tensor with shape [batch_size, 1]."); + AddInput("QueryID", + "(Tensor, int64) Query ID that indicates the context. Its shape " + "should be the same as Label."); + AddInput( + "AccumulatePositivePair", + "(float) Optional. The accumulated number of positive pairs over a " + "stream of data. If provided, the output PositivePair will be " + "initialized with this number rather than 0. it won't be modified " + "in place.") + .AsDispensable(); + AddInput( + "AccumulateNegativePair", + "(float) Optional. The accumulated number of negative pairs over a " + "stream of data. If provided, the output NegativePair will be " + "initialized with this number rather than 0. it won't be modified " + "in place.") + .AsDispensable(); + AddInput("AccumulateNeutralPair", + "(float) Optional. The accumulated number of neutral pairs over a " + "stream of data. If provided, the output NeutralPair will be " + "initialized with this number rather than 0. it won't be modified " + "in place.") + .AsDispensable(); + AddInput("Weight", + "(float) Optional. Weight of current item. If specified, its " + "shape should be the same as Label, and the meaning of the output " + "changes from numbers of pairs to the total sum of pairs' " + "weights. Weight of a pair of items is the average of their " + "weights.") + .AsDispensable(); + AddOutput("PositivePair", + "(float) Number of positive pairs, i.e. the pairs of " + "items that are ranked correctly."); + AddOutput("NegativePair", + "(float) Number of negative pairs, i.e. the pairs of " + "items that are ranked incorrectly."); + AddOutput("NeutralPair", + "(float) Number of neutral pairs, i.e. the pairs of items " + "that have the same score.") + .AsDispensable(); + AddAttr( + "column", + "(int, default -1) The column position of Score used to rank items in " + "descending order. It must be in the range of [-rank(Score), " + "rank(Score)). " + "If `dim < 0`, the dim to reduce is `rank + dim`. " + "Noting that reducing on the first dim will make the LoD info lost.") + .SetDefault(0); + AddComment(R"DOC( + PositiveNegativePairOp can be used to evaluate Learning To Rank(LTR) + model performance. + Within some context, e.g. the "query", a LTR model generates scores + for a list of items, which gives a partial order of the items. + PositiveNegativePairOp takes a list of reference rank order + (Input("Label")) and the model generated scores (Input(Score)) as + inputs and counts the pairs that ranked correctly and incorrectly. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair, + ops::PositiveNegativePairOp, + ops::PositiveNegativePairOpMaker); +REGISTER_OP_CPU_KERNEL( + positive_negative_pair, + ops::PositiveNegativePairKernel, + ops::PositiveNegativePairKernel); diff --git a/paddle/operators/positive_negative_pair_op.h b/paddle/operators/positive_negative_pair_op.h new file mode 100644 index 0000000000000000000000000000000000000000..2efd3777e04c17b27c07bccde524de5785af35fe --- /dev/null +++ b/paddle/operators/positive_negative_pair_op.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/utils/Logging.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; + +template +class PositiveNegativePairKernel : public framework::OpKernel { + public: + struct PredictionResult { + PredictionResult(T score, T label, T weight) + : score(score), label(label), weight(weight) {} + T score; + T label; + T weight; + }; + + void Compute(const framework::ExecutionContext& context) const override { + auto score_t = context.Input("Score"); + auto label_t = context.Input("Label"); + auto query_t = context.Input("QueryID"); + auto acc_positive_t = context.Input("AccumulatePositivePair"); + auto acc_negative_t = context.Input("AccumulateNegativePair"); + auto acc_neutral_t = context.Input("AccumulateNeutralPair"); + auto positive_t = context.Output("PositivePair"); + auto negative_t = context.Output("NegativePair"); + auto neutral_t = context.Output("NeutralPair"); + auto weight_t = context.Input("Weight"); + + auto score = score_t->data(); + auto label = label_t->data(); + auto query = query_t->data(); + const T* weight = nullptr; + if (weight_t != nullptr) { + weight = weight_t->data(); + } + T* positive = positive_t->mutable_data(context.GetPlace()); + T* negative = negative_t->mutable_data(context.GetPlace()); + T* neutral = neutral_t->mutable_data(context.GetPlace()); + + auto score_dim = score_t->dims(); + auto batch_size = score_dim[0]; + auto width = score_dim[1]; + auto column = context.Attr("column"); + if (column < 0) { + column += width; + } + + // construct document instances for each query: Query => List[, ...] + std::unordered_map> predictions; + for (auto i = 0; i < batch_size; ++i) { + if (predictions.find(query[i]) == predictions.end()) { + predictions.emplace( + std::make_pair(query[i], std::vector())); + } + predictions[query[i]].emplace_back(score[i * width + column], label[i], + weight_t != nullptr ? weight[i] : 1.0); + } + + // for each query, accumulate pair counts + T pos = 0, neg = 0, neu = 0; + if (acc_positive_t != nullptr && acc_negative_t != nullptr && + acc_neutral_t != nullptr) { + pos = acc_positive_t->data()[0]; + neg = acc_negative_t->data()[0]; + neu = acc_neutral_t->data()[0]; + } + auto evaluate_one_list = [&pos, &neg, + &neu](std::vector vec) { + for (auto ite1 = vec.begin(); ite1 != vec.end(); ++ite1) { + for (auto ite2 = ite1 + 1; ite2 != vec.end(); ++ite2) { + if (ite1->label == ite2->label) { // labels are equal, ignore. + continue; + } + T w = (ite1->weight + ite2->weight) * 0.5; + if (ite1->score == ite2->score) { + neu += w; + } + (ite1->score - ite2->score) * (ite1->label - ite2->label) > 0.0 + ? pos += w + : neg += w; + } + } + }; + for (auto prediction : predictions) { + evaluate_one_list(prediction.second); + } + *positive = pos; + *negative = neg; + *neutral = neu; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index 63050a4ec2f16800ae6f8ee077fd0c1daa8eff87..710f280017fa5e188b187a3e91b27e2bedc65d10 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -27,6 +27,11 @@ class SequencePoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SequencePoolOp should not be null."); ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + if (ctx->Attrs().Get("pooltype") == "MAX") { + PADDLE_ENFORCE(ctx->HasOutput("MaxIndex"), + "Output(MaxIndex) of SequencePoolOp should not be null."); + ctx->SetOutputDim("MaxIndex", ctx->GetInputDim("X")); + } } }; @@ -35,10 +40,14 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { SequencePoolOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "(LoDTensor), the variable-length input of SequencePoolOp"); + AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp"); AddOutput("Out", - "(Tensor), output of SequencePoolOp, which does not contain LoD " + "(Tensor) The output of SequencePoolOp does not contain LoD " "infomation."); + AddOutput("MaxIndex", + "(Tensor) This tensor is used for the sequence max-pooling " + "to record the max indexes.") + .AsIntermediate(); AddAttr( "pooltype", "(int, default AVERAGE) the pooling pooltype of SequencePoolOp.") @@ -96,6 +105,12 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { } ctx->SetOutputDim(framework::GradVarName("X"), x_dims); } + + protected: + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + return framework::ToDataType(ctx.Input("X")->type()); + } }; } // namespace operators diff --git a/paddle/operators/sequence_pool_op.h b/paddle/operators/sequence_pool_op.h index e0e0493fe0ef7e1963ce5c2e3f37c164a605809b..2b8a25c2414c20efaffedfc8603697b3a104634f 100644 --- a/paddle/operators/sequence_pool_op.h +++ b/paddle/operators/sequence_pool_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/sequence_pooling.h" namespace paddle { namespace operators { @@ -34,7 +35,7 @@ class SequencePoolKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); std::string pooltype = context.Attr("pooltype"); auto dims = in->dims(); @@ -53,6 +54,16 @@ class SequencePoolKernel : public framework::OpKernel { auto lod_level_0 = lod[0]; out->mutable_data(context.GetPlace()); + + if (pooltype == "MAX") { + math::MaxSeqPoolFunctor max_pool; + auto* index = context.Output("MaxIndex"); + index->Resize({dims}); + index->mutable_data(context.GetPlace()); + max_pool(context.device_context(), *in, out, index); + return; + } + auto place = context.GetEigenDevice(); for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { Tensor in_t = in->Slice(static_cast(lod_level_0[i]), @@ -69,8 +80,6 @@ class SequencePoolKernel : public framework::OpKernel { } else if (pooltype == "SQRT") { out_e.device(place) = in_e.sum(Eigen::array({{0}})) / std::sqrt(static_cast(h)); - } else if (pooltype == "MAX") { - out_e.device(place) = in_e.maximum(Eigen::array({{0}})); } else if (pooltype == "LAST") { out_e.device(place) = in_e.chip(h - 1, 0); } else if (pooltype == "FIRST") { @@ -87,8 +96,8 @@ class SequencePoolGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); + auto* out_g = context.Input(framework::GradVarName("Out")); auto* in_g = context.Output(framework::GradVarName("X")); - auto* out_g = context.Input(framework::GradVarName("Out")); std::string pooltype = context.Attr("pooltype"); auto dims = in->dims(); @@ -96,6 +105,14 @@ class SequencePoolGradKernel : public framework::OpKernel { int64_t w = in->numel() / dims[0]; in_g->mutable_data(context.GetPlace()); + + if (pooltype == "MAX") { + math::MaxSeqPoolGradFunctor max_pool_grad; + auto* index = context.Input("MaxIndex"); + max_pool_grad(context.device_context(), *out_g, *index, in_g); + return; + } + if (pooltype == "LAST" || pooltype == "FIRST") { // set X@Grad be zero at first when pooltype is LAST/FIRST math::SetConstant functor; @@ -118,20 +135,6 @@ class SequencePoolGradKernel : public framework::OpKernel { } else if (pooltype == "SQRT") { in_g_e.device(place) = (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); - } else if (pooltype == "MAX") { - auto in_t = - in->Slice(static_cast(lod[i]), static_cast(lod[i + 1])); - Eigen::Map> - in_t_map(in_t.data(), h, w); - int row_id; - Eigen::array extents{{1, 1}}; - for (int col_id = 0; col_id < w; col_id++) { - in_t_map.col(col_id).maxCoeff(&row_id); - Eigen::array in_offsets{{row_id, col_id}}; - Eigen::array out_offsets{{0, col_id}}; - in_g_e.slice(in_offsets, extents).device(place) = - out_g_e.slice(out_offsets, extents); - } } else if (pooltype == "LAST") { in_g_e.chip(h - 1, 0).device(place) = out_g_e; } else if (pooltype == "FIRST") { diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index a006e0a595057d019b443d22cea4bdf171a6ee0b..c6b94f5cc947ccb86315fd9058b8c57d1a996927 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -4,13 +4,13 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/softmax_with_cross_entropy_op.h" #include @@ -30,12 +30,10 @@ class SoftmaxWithCrossEntropyOpMaker "which is a 2-D tensor with shape [N x K]. N is the batch_size, " "and K is the class number."); AddInput("Label", - "(Tensor, default: Tensor), The ground truth which is a 2-D " - "tensor. " - "If softLabel is set to false, Label is a Tensor with shape " - "[N x 1]." - "If softLabel is set to true, Label is a Tensor " - "with shape [N x K]."); + "(Tensor) The ground truth which is a 2-D tensor. If soft_label " + "is set to false, Label is a Tensor with shape [N x 1]. If " + "soft_label is set to true, Label is a Tensor with " + "shape [N x K]."); AddOutput( "Softmax", "(Tensor, default: Tensor), A 2-D tensor with shape [N x K]. " @@ -62,7 +60,7 @@ Because this operator performs a softmax on logits internally, it expects unscaled logits. This operator should not be used with the output of softmax operator since that would produce incorrect results. -When the attribute softLabel is set false, this operators expects mutually +When the attribute soft_label is set false, this operators expects mutually exclusive hard labels, each sample in a batch is in exactly one class with a probability of 1.0. Each sample in the batch will have a single label. @@ -198,6 +196,8 @@ REGISTER_OPERATOR(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp, REGISTER_OPERATOR(softmax_with_cross_entropy_grad, ops::SoftmaxWithCrossEntropyOpGrad); REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyKernel); + ops::SoftmaxWithCrossEntropyKernel, + ops::SoftmaxWithCrossEntropyKernel); REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyGradKernel); + ops::SoftmaxWithCrossEntropyGradKernel, + ops::SoftmaxWithCrossEntropyGradKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu index 7602918bb39312db3c4d1a4064801712ef94ec72..b1faddac3fd21aaf817caf9d3e57e664f4e0e2d5 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -4,13 +4,13 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU @@ -24,7 +24,7 @@ using Tensor = framework::Tensor; namespace { template __global__ void CrossEntropyGrad(T* logit_grad, const T* loss_grad, - const int* labels, const int batch_size, + const int64_t* labels, const int batch_size, const int class_num) { int tid = blockIdx.x * blockDim.x + threadIdx.x; int sample_idx = tid / class_num; @@ -50,7 +50,7 @@ __global__ void SoftCrossEntropyGradientKernel(T* logit_grad, int ids = blockIdx.x * blockDim.x + threadIdx.x; if (ids < batch_size * class_num) { int row_ids = ids / class_num; - logit_grad[ids] = logit_grad[ids] * (loss_grad[row_ids] - labels[ids]); + logit_grad[ids] = loss_grad[row_ids] * (logit_grad[ids] - labels[ids]); } } } // namespace @@ -104,7 +104,7 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { .stream()>>>(logit_grad_data, loss_grad_data, label_data, batch_size, class_num); } else { - const int* label_data = labels->data(); + const int64_t* label_data = labels->data(); CrossEntropyGrad<<< grid, block, 0, reinterpret_cast( context.device_context()) @@ -119,6 +119,8 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyCUDAKernel); + ops::SoftmaxWithCrossEntropyCUDAKernel, + ops::SoftmaxWithCrossEntropyCUDAKernel); REGISTER_OP_GPU_KERNEL(softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyGradCUDAKernel); + ops::SoftmaxWithCrossEntropyGradCUDAKernel, + ops::SoftmaxWithCrossEntropyGradCUDAKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index 7f3f9e23aa9455437cfa893363b3e59a0699dbea..c4ab3f74b4b07d13957d99e01aa4868fac719f61 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -4,13 +4,13 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" @@ -60,25 +60,25 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { logit_grad->ShareDataWith(*context.Input("Softmax")); const int class_num = logit_grad->dims()[1]; + auto out_grad_mat = EigenMatrix::From(*out_grad); + auto logit_grad_mat = EigenMatrix::From(*logit_grad); + if (context.Attr("soft_label")) { - auto out_grad_mat = EigenMatrix::From(*out_grad); - auto logit_grad_mat = EigenMatrix::From(*logit_grad); auto lbl_mat = EigenMatrix::From(*labels); - logit_grad_mat.device(context.GetEigenDevice()) = - logit_grad_mat * - (out_grad_mat.broadcast(Eigen::DSizes(1, class_num)) - - lbl_mat); + out_grad_mat.broadcast(Eigen::DSizes(1, class_num)) * + (logit_grad_mat - lbl_mat); } else { + logit_grad_mat.device(context.GetEigenDevice()) = + logit_grad_mat * + out_grad_mat.broadcast(Eigen::DSizes(1, class_num)); + const int batch_size = logit_grad->dims()[0]; - const int* label_data = labels->data(); - const T* out_grad_data = out_grad->data(); + const int64_t* label_data = labels->data(); T* logit_grad_data = logit_grad->data(); - + const T* out_grad_data = out_grad->data(); for (int i = 0; i < batch_size; ++i) { - int index = i * class_num + label_data[i]; - logit_grad_data[index] = - out_grad_data[i] * (logit_grad_data[index] - 1.); + logit_grad_data[i * class_num + label_data[i]] -= out_grad_data[i]; } } } diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index d9d3dd6e37a8ffd7aa7a2e6f47a1c225474f630b..b1e58952fdba35183822ad2f9a51d5bcc5e6ad6a 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -24,10 +24,16 @@ class SumOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInputs("X"), "Inputs(X) should not be null"); - auto x_dims = ctx->GetInputsDim("X"); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SumOp should not be null."); + if (ctx->IsRuntime() && + ctx->GetOutputsVarType("Out")[0] == + framework::VarDesc::LOD_TENSOR_ARRAY) { + return; // skip runtime infershape when is tensor array; + } + auto x_dims = ctx->GetInputsDim("X"); size_t N = x_dims.size(); PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1."); @@ -39,6 +45,28 @@ class SumOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Out", in_dim); ctx->ShareLoD("X", /*->*/ "Out"); } + + protected: + framework::DataType IndicateDataType( + const framework::ExecutionContext& ctx) const override { + auto x_vars = ctx.MultiInputVar("X"); + if (x_vars[0]->IsType()) { + return framework::ToDataType( + x_vars[0]->Get().type()); + } else if (x_vars[0]->IsType()) { + return framework::ToDataType( + x_vars[0]->Get().value().type()); + } else if (x_vars[0]->IsType()) { + auto& array = x_vars[0]->Get(); + for (auto& each : array) { + if (each.numel() != 0) { + return framework::ToDataType(each.type()); + } + } + } + PADDLE_THROW("Unexpected branch. Input type is %s", + x_vars[0]->Type().name()); + } }; class SumOpMaker : public framework::OpProtoAndCheckerMaker { @@ -63,18 +91,32 @@ class SumOpVarTypeInference : public framework::VarTypeInference { void operator()(const framework::OpDescBind& op_desc, framework::BlockDescBind* block) const override { auto& inputs = op_desc.Input("X"); - auto default_var_type = framework::VarDesc::SELECTED_ROWS; + auto var_type = framework::VarDesc::SELECTED_ROWS; bool any_input_is_lod_tensor = std::any_of( inputs.begin(), inputs.end(), [block](const std::string& name) { return block->Var(name)->GetType() == framework::VarDesc::LOD_TENSOR; }); - if (any_input_is_lod_tensor) { - default_var_type = framework::VarDesc::LOD_TENSOR; + + auto is_tensor_array = [block](const std::string& name) { + return block->Var(name)->GetType() == + framework::VarDesc::LOD_TENSOR_ARRAY; + }; + + bool any_input_is_tensor_array = + std::any_of(inputs.begin(), inputs.end(), is_tensor_array); + bool all_inputs_are_tensor_array = + std::all_of(inputs.begin(), inputs.end(), is_tensor_array); + + if (any_input_is_tensor_array) { + PADDLE_ENFORCE(all_inputs_are_tensor_array); + var_type = framework::VarDesc::LOD_TENSOR_ARRAY; + } else if (any_input_is_lod_tensor) { + var_type = framework::VarDesc::LOD_TENSOR; } auto out_var_name = op_desc.Output("Out").front(); - block->Var(out_var_name)->SetType(default_var_type); + block->Var(out_var_name)->SetType(var_type); } }; diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index ad441a598040aca71a72c9c03d477934d14e9a8b..4ca15611392b3117aa6c92cba95911eb8bebeb15 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" +#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/math/math_function.h" #include "paddle/operators/math/selected_rows_functor.h" @@ -28,7 +29,7 @@ using EigenVector = framework::EigenVector; template class SumKernel : public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& context) const override { + void Compute(const framework::ExecutionContext &context) const override { auto in_vars = context.MultiInputVar("X"); int N = in_vars.size(); auto out_var = context.OutputVar("Out"); @@ -36,7 +37,7 @@ class SumKernel : public framework::OpKernel { bool in_place = out_var == in_vars[0]; if (out_var->IsType()) { - auto* out = context.Output("Out"); + auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); auto result = EigenVector::Flatten(*out); @@ -51,11 +52,11 @@ class SumKernel : public framework::OpKernel { // If in_place, just skip the first tensor for (int i = in_place ? 1 : 0; i < N; i++) { if (in_vars[i]->IsType()) { - auto& in_t = in_vars[i]->Get(); + auto &in_t = in_vars[i]->Get(); auto in = EigenVector::Flatten(in_t); result.device(place) = result + in; } else if (in_vars[i]->IsType()) { - auto& in_t = in_vars[i]->Get(); + auto &in_t = in_vars[i]->Get(); functor(context.device_context(), in_t, out); } else { PADDLE_THROW("Variable type must be LoDTensor/SelectedRows."); @@ -63,8 +64,8 @@ class SumKernel : public framework::OpKernel { } } else if (out_var->IsType()) { PADDLE_ENFORCE(!in_place, "SelectedRows not support inplace sum now"); - auto* out = context.Output("Out"); - auto* out_value = out->mutable_value(); + auto *out = context.Output("Out"); + auto *out_value = out->mutable_value(); // Runtime InferShape size_t first_dim = 0; @@ -88,9 +89,36 @@ class SumKernel : public framework::OpKernel { offset, out); offset += in_vars[i]->Get().value().numel(); } + } else if (out_var->IsType()) { + auto &out_array = *out_var->GetMutable(); + for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) { + PADDLE_ENFORCE(in_vars[i]->IsType(), + "Only support all inputs are TensorArray"); + auto &in_array = in_vars[i]->Get(); + + for (size_t i = 0; i < in_array.size(); ++i) { + if (in_array[i].numel() != 0) { + if (i >= out_array.size()) { + out_array.resize(i + 1); + } + if (out_array[i].numel() == 0) { + out_array[i].CopyFrom(in_array[i], in_array[i].place(), + context.device_context()); + out_array[i].set_lod(in_array[i].lod()); + } else { + PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); + auto in = EigenVector::Flatten(in_array[i]); + auto result = EigenVector::Flatten(out_array[i]); + result.device(context.GetEigenDevice()) = result + in; + } + } + } + } + } else { + PADDLE_THROW("Unexpected branch, output variable type is %s", + out_var->Type().name()); } } }; - } // namespace operators } // namespace paddle diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..50824032ca0e23b6f961928103ea4aa74b6ac23a --- /dev/null +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -0,0 +1,218 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include "paddle/framework/lod_tensor_array.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { +class ArrayOpBase : public framework::OperatorBase { + public: + ArrayOpBase(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override {} + + protected: + size_t GetOffset(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const { + auto *i = scope.FindVar(Input("I")); + PADDLE_ENFORCE(i != nullptr, "I must be set"); + auto &i_tensor = i->Get(); + PADDLE_ENFORCE_EQ(i_tensor.numel(), 1); + size_t offset; + if (platform::is_gpu_place(i_tensor.place())) { + // FIXME: Avoid copy from GPU to CPU + framework::Tensor t; + t.CopyFrom(i_tensor, platform::CPUPlace(), dev_ctx); + dev_ctx.Wait(); + offset = static_cast(*t.data()); + } else { + offset = static_cast(*i_tensor.data()); + } + return offset; + } +}; + +class WriteToArrayOp : public ArrayOpBase { + public: + WriteToArrayOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : ArrayOpBase(type, inputs, outputs, attrs) {} + + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto *x = scope.FindVar(Input("X")); + PADDLE_ENFORCE(x != nullptr, "X must be set"); + auto &x_tensor = x->Get(); + size_t offset = GetOffset(scope, dev_ctx); + auto *out = + scope.FindVar(Output("Out"))->GetMutable(); + if (offset >= out->size()) { + out->resize(offset + 1); + } + auto *out_tensor = &out->at(offset); + out_tensor->CopyFrom(x_tensor, dev_ctx.GetPlace(), dev_ctx); + out_tensor->set_lod(x_tensor.lod()); + } +}; + +class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + WriteToArrayOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(LoDTensor) the tensor will be written to tensor array"); + AddInput( + "I", + "(Tensor) the subscript index in tensor array. The number of element " + "should be 1"); + AddOutput("Out", "(TensorArray) the tensor array will be written"); + AddComment(R"DOC(Write a LoDTensor to a LoDTensor array. + +Assume T is LoDTensor, i is the subscript of the array, and A is the array. The +equation is + +A[i] = T +)DOC"); + } +}; + +class WriteToArrayInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("I"), "Must set the subscript index"); + PADDLE_ENFORCE_EQ(framework::product(context->GetInputDim("I")), 1, + "The number of element of subscript index must be 1"); + PADDLE_ENFORCE(context->HasInput("X"), NotHasXError()); + PADDLE_ENFORCE(context->HasOutput("Out"), NotHasOutError()); + context->SetOutputDim("Out", context->GetInputDim("X")); + } + + protected: + virtual const char *NotHasXError() const { return "Must set the lod tensor"; } + + virtual const char *NotHasOutError() const { + return "Must set the lod tensor array"; + } +}; + +class WriteToArrayInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDescBind &op_desc, + framework::BlockDescBind *block) const override { + for (auto &out_var : op_desc.OutputArgumentNames()) { + VLOG(10) << "Set Variable " << out_var << " as LOD_TENSOR_ARRAY"; + block->Var(out_var)->SetType(framework::VarDesc::LOD_TENSOR_ARRAY); + } + } +}; + +class ReadFromArrayOp : public ArrayOpBase { + public: + ReadFromArrayOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : ArrayOpBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto *x = scope.FindVar(Input("X")); + PADDLE_ENFORCE(x != nullptr, "X must be set"); + auto &x_array = x->Get(); + auto *out = scope.FindVar(Output("Out")); + PADDLE_ENFORCE(out != nullptr, "Out must be set"); + auto *out_tesnor = out->GetMutable(); + size_t offset = GetOffset(scope, dev_ctx); + PADDLE_ENFORCE_LT(offset, x_array.size()); + out_tesnor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx); + out_tesnor->set_lod(x_array[offset].lod()); + } +}; + +class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + ReadFromArrayProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(TensorArray) the array will be read from."); + AddInput("I", + "(Tensor) the subscript index in tensor array. The number of " + "element should be 1"); + AddOutput("Out", "(LoDTensor) the tensor will be read from."); + AddComment(R"DOC(Read a LoDTensor from a LoDTensor Array + +Assume T is LoDTensor, i is th e subscript of the array, and A is the array. The +equation is + +T = A[i] +)DOC"); + } +}; + +class ReadFromArrayInferShape : public WriteToArrayInferShape { + protected: + const char *NotHasXError() const override { + return "The input array X must be set"; + } + const char *NotHasOutError() const override { + return "The output tensor out must be set"; + } +}; + +class WriteToArrayGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDescBind(); + grad_op->SetType("read_from_array"); + grad_op->SetInput("I", Input("I")); + grad_op->SetInput("X", OutputGrad("Out")); + grad_op->SetOutput("Out", InputGrad("X")); + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + +class ReadFromArrayGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDescBind(); + grad_op->SetType("write_to_array"); + grad_op->SetInput("I", Input("I")); + grad_op->SetInput("X", OutputGrad("Out")); + grad_op->SetOutput("Out", InputGrad("X")); + grad_op->SetAttrMap(Attrs()); + return std::unique_ptr(grad_op); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(write_to_array, ops::WriteToArrayOp, + ops::WriteToArrayInferShape, ops::WriteToArrayOpProtoMaker, + ops::WriteToArrayGradMaker, ops::WriteToArrayInferVarType); +REGISTER_OPERATOR(read_from_array, ops::ReadFromArrayOp, + ops::ReadFromArrayInferShape, ops::ReadFromArrayProtoMaker, + ops::ReadFromArrayGradMaker); diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 926fee47e1f86efa60dc40a2727edb06499bec4f..25fc35311fc63988c64a445d72fc6255e49e8d4b 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -1,5 +1,3 @@ -include_directories(${CMAKE_CURRENT_BINARY_DIR}) - set(OPITMIZER_SRCS adadelta_optimizer.cc adagrad_optimizer.cc @@ -9,11 +7,6 @@ set(OPITMIZER_SRCS sgd_optimizer.cc ) -add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS}) -add_dependencies(paddle_optimizer paddle_proto ${external_project_dependencies}) - - -if(WITH_TESTING) - add_simple_unittest(serialization_test) - add_simple_unittest(parameter_optimizer_test) -endif() +cc_library(paddle_optimizer STATIC SRCS ${OPITMIZER_SRCS} DEPS paddle_proto glog) +cc_test(serialization_test SRCS serialization_test.cc DEPS paddle_proto) +cc_test(parameter_optimizer_test SRCS parameter_optimizer_test.cc DEPS paddle_optimizer) diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc index 34913c405075ed72af30ed056f74e8b4d7482488..5cc7c47d4486c3d149c37fd6e312780f3d44eda8 100644 --- a/paddle/optimizer/adadelta_optimizer.cc +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "adadelta_optimizer.h" #include #include diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h index bc634ee46d60abc9ffc4a31abac5c2f8edaf7aba..6aab1ad553b15ebbd2d04c9323c5e56e1b8f60f5 100644 --- a/paddle/optimizer/adadelta_optimizer.h +++ b/paddle/optimizer/adadelta_optimizer.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #pragma once #include "parameter_optimizer.h" diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc index d915ffb8705eaa96bc96b8071a2c534d4d472273..c981996bab1b2e7ae5d6e2d858a73efde12e32f3 100644 --- a/paddle/optimizer/adagrad_optimizer.cc +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include #include "adagrad_optimizer.h" diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h index b2935f8aff87f710f508c5c5757dd36526ca63f9..447b7c7547d5bad7436df6f3b3582b4a219f08c8 100644 --- a/paddle/optimizer/adagrad_optimizer.h +++ b/paddle/optimizer/adagrad_optimizer.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #pragma once #include "parameter_optimizer.h" diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index 18e5896a22dc8a3c6292293fffc36ca9e3737b4c..6dc2d749708d0e2a7f36734d89eec30d4576842e 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "adam_optimizer.h" #include diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h index d25cdc0731f65e9875d2fbf67783cce62d88af60..37ab53afc37a5f749a2909de12c7871ed926583f 100644 --- a/paddle/optimizer/adam_optimizer.h +++ b/paddle/optimizer/adam_optimizer.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #pragma once #include "parameter_optimizer.h" diff --git a/paddle/optimizer/optimizer.cc b/paddle/optimizer/optimizer.cc index a2af139d012433214b825bd68289708098b76da8..faa23764522cef03bae1359adbf58d10ee7809ac 100644 --- a/paddle/optimizer/optimizer.cc +++ b/paddle/optimizer/optimizer.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "optimizer.h" #include #include @@ -6,8 +20,8 @@ #include "parameter_optimizer.h" -using namespace paddle; -using namespace paddle::optimizer; +using paddle::optimizer::ParameterOptimizer; +using paddle::optimizer::Tensor; template struct EnumToType {}; @@ -15,22 +29,21 @@ struct EnumToType {}; template struct TypeToEnum {}; -#define MATCH_ENUM_TYPE(TYPE, ENUM) \ - template <> \ - struct TypeToEnum { \ - static paddle_element_type v() { return ENUM; }; \ - static constexpr TYPE value = ENUM; \ - }; \ - template <> \ - struct EnumToType { \ - typedef TYPE Type; \ +#define MATCH_ENUM_TYPE(TYPE, ENUM) \ + template <> \ + struct TypeToEnum { \ + static paddle_element_type v() { return ENUM; } \ + static constexpr TYPE value = ENUM; \ + }; \ + template <> \ + struct EnumToType { \ + typedef TYPE Type; \ } MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32); MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32); MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64); MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64); -// TODO(zhihong): only implement below type, need to fix MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32); MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64); diff --git a/paddle/optimizer/optimizer.h b/paddle/optimizer/optimizer.h index aabf7a458dd30092ed1e522c4d88c6cfe63fcce1..e6fa12a4d250ccb078358704b0131942ea6ab039 100644 --- a/paddle/optimizer/optimizer.h +++ b/paddle/optimizer/optimizer.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #pragma once #include diff --git a/paddle/optimizer/parameter_optimizer.cc b/paddle/optimizer/parameter_optimizer.cc index db0714635f9366b0404019688daf4708b4a0052f..da92c2d01cc2a27d1fadd51a338d23b01e0cb0bc 100644 --- a/paddle/optimizer/parameter_optimizer.cc +++ b/paddle/optimizer/parameter_optimizer.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include #include "adadelta_optimizer.h" #include "adagrad_optimizer.h" diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h index 8319f84e1b820adf5cc0006045f2e13dffa91797..99d0416e751c4ca6695d6ed77396e18d48fc86b8 100644 --- a/paddle/optimizer/parameter_optimizer.h +++ b/paddle/optimizer/parameter_optimizer.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #pragma once #include diff --git a/paddle/optimizer/parameter_optimizer_test.cpp b/paddle/optimizer/parameter_optimizer_test.cc similarity index 98% rename from paddle/optimizer/parameter_optimizer_test.cpp rename to paddle/optimizer/parameter_optimizer_test.cc index c99b2254ac6974343206c237377b2440ba8efdf8..f29e5317120642e3790a6f6c1976bdda67093a0c 100644 --- a/paddle/optimizer/parameter_optimizer_test.cpp +++ b/paddle/optimizer/parameter_optimizer_test.cc @@ -110,7 +110,7 @@ public: int s = 0; float* newp = (float*)opts_[i]->get_weight(&s); - EXPECT_EQ(s, kSize); + EXPECT_EQ(static_cast(s), kSize); for (size_t j = 0; j < kSize; ++j) { EXPECT_EQ(newp[j], (*p)[j]); } diff --git a/paddle/optimizer/serialization_test.cpp b/paddle/optimizer/serialization_test.cc similarity index 100% rename from paddle/optimizer/serialization_test.cpp rename to paddle/optimizer/serialization_test.cc diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index 1090419083c8b8cf60eca02791ef673287f4a9a4..c150144ac24b8375d08691a98be680b6bf5d1e7f 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #include "sgd_optimizer.h" #include "serialization.h" diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h index 6e1a0f0d3f9ecfeb51ccb355d65985a2e6388fb0..0b1da0aa27d98e8d6a8d9fd7a1ebe355acb2a1f4 100644 --- a/paddle/optimizer/sgd_optimizer.h +++ b/paddle/optimizer/sgd_optimizer.h @@ -1,3 +1,17 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + #pragma once #include "parameter_optimizer.h" @@ -15,7 +29,6 @@ public: nesterov_(n) { if (momentum_ != 0.0) { size_t size = parameter->size(); - // TODO: fix it with align aware allocator bind to Tensor momentums_ = new Tensor(size); } } diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 5462e6c6c7a840f75f8c15195c6d6910f30ef733..5a1ff9b7976abbe4a37f8366181d9d1ae78ea4a0 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -97,6 +97,15 @@ namespace pybind { using namespace paddle::framework; // NOLINT +template +static py::bytes SerializeMessage(T &self) { + // Check IsInitialized in Python + std::string retv; + PADDLE_ENFORCE(self.Proto()->SerializePartialToString(&retv), + "Cannot serialize message"); + return retv; +} + // Bind Methods void BindProgramDesc(py::module &m) { py::class_(m, "ProgramDesc", "") @@ -132,17 +141,7 @@ void BindProgramDesc(py::module &m) { .def("block", &ProgramDescBind::MutableBlock, py::return_value_policy::reference) .def("num_blocks", &ProgramDescBind::Size) - .def("serialize_to_string", - [](ProgramDescBind &program_desc) -> py::bytes { - const ProgramDesc *desc = program_desc.Proto(); - PADDLE_ENFORCE(desc->IsInitialized(), - "ProgramDesc has not been initialized."); - std::string res; - PADDLE_ENFORCE( - desc->SerializeToString(&res), - "Serialize ProgramDesc Error. This could be a bug of Paddle."); - return res; - }) + .def("serialize_to_string", SerializeMessage) .def("parse_from_string", [](ProgramDescBind &program_desc, const std::string &data) { ProgramDesc *desc = program_desc.Proto(); @@ -181,16 +180,7 @@ void BindBlockDesc(py::module &m) { py::return_value_policy::reference) .def("op_size", &BlockDescBind::OpSize) .def("op", &BlockDescBind::Op, py::return_value_policy::reference) - .def("serialize_to_string", [](BlockDescBind &block_desc) -> py::bytes { - const BlockDesc *desc = block_desc.Proto(); - PADDLE_ENFORCE(desc->IsInitialized(), - "BlockDesc has not been initialized."); - std::string res; - PADDLE_ENFORCE( - desc->SerializeToString(&res), - "Serialize BlockDesc Error. This could be a bug of Paddle."); - return res; - }); + .def("serialize_to_string", SerializeMessage); } void BindVarDsec(py::module &m) { @@ -219,17 +209,7 @@ void BindVarDsec(py::module &m) { .def("set_lod_level", &VarDescBind::SetLoDLevel) .def("type", &VarDescBind::GetType) .def("set_type", &VarDescBind::SetType) - .def("serialize_to_string", - [](VarDescBind &var_desc) -> py::bytes { - const VarDesc *desc = var_desc.Proto(); - PADDLE_ENFORCE(desc->IsInitialized(), - "VarDesc has not been initialized."); - std::string res; - PADDLE_ENFORCE( - desc->SerializeToString(&res), - "Serialize VarDesc Error. This could be a bug of Paddle."); - return res; - }) + .def("serialize_to_string", SerializeMessage) .def("persistable", &VarDescBind::Persistable) .def("set_persistable", &VarDescBind::SetPersistable); @@ -274,16 +254,7 @@ void BindOpDesc(py::module &m) { .def("check_attrs", &OpDescBind::CheckAttrs) .def("infer_shape", &OpDescBind::InferShape) .def("infer_var_type", &OpDescBind::InferVarType) - .def("serialize_to_string", [](OpDescBind &op_desc) -> py::bytes { - const OpDesc *desc = op_desc.Proto(); - PADDLE_ENFORCE(desc->IsInitialized(), - "OpDesc has not been initialized."); - std::string res; - PADDLE_ENFORCE( - desc->SerializeToString(&res), - "Serialize OpDesc Error. This could be a bug of Paddle."); - return res; - }); + .def("serialize_to_string", SerializeMessage); } } // namespace pybind diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 5bdf8c833522564e6b1027ca5dad8c0bb481cdc4..53e68648e6c1718cc4d4d9fe074e8deb418d3b1d 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -2,171 +2,185 @@ set -xe -# Set BASE_IMAGE according to env variables -if [[ ${WITH_GPU} == "ON" ]]; then - BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" -else - BASE_IMAGE="ubuntu:16.04" -fi - -DOCKERFILE_GPU_ENV="" -DOCKERFILE_CUDNN_DSO="" -if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then - DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" - DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so" -fi - -mkdir -p /paddle/build -cd /paddle/build - -# build script will not fail if *.deb does not exist -rm *.deb 2>/dev/null || true -# delete previous built whl packages -rm -rf /paddle/paddle/dist 2>/dev/null || true - -cat </dev/null || true + # delete previous built whl packages + rm -rf /paddle/paddle/dist 2>/dev/null || true -if [ ${WITH_TESTING:-ON} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then -cat < /paddle/build/Dockerfile < -ENV HOME /root +function gen_dockerfile() { + + cat <> /paddle/build/Dockerfile < /paddle/build/Dockerfile < + ENV HOME /root EOF -fi - -if [[ ${WITH_GPU} == "ON" ]]; then - NCCL_DEPS="apt-get install -y libnccl-dev &&" -else - NCCL_DEPS="" -fi - -cat >> /paddle/build/Dockerfile <> /paddle/build/Dockerfile < -#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \ - !defined(__aarch64__) +#if defined(__APPLE__) || defined(__OSX__) int fegetexcept(void); int feenableexcept(unsigned int excepts); diff --git a/paddle/utils/arch/osx/Excepts.cpp b/paddle/utils/arch/osx/Excepts.cpp index 42ecaa06d256c9d259a20c648626605d77ce0308..ac444615786fa9f89f96504a31b2289eae7bb643 100644 --- a/paddle/utils/arch/osx/Excepts.cpp +++ b/paddle/utils/arch/osx/Excepts.cpp @@ -14,9 +14,13 @@ limitations under the License. */ #include "paddle/utils/Excepts.h" -#if (defined(__APPLE__) || defined(__OSX__)) && !defined(__arm__) && \ - !defined(__aarch64__) - +#if defined(__APPLE__) || defined(__OSX__) +#if defined(__arm__) || defined(__arm64__) +// TODO(liuyiqun): implement the arm version +int fegetexcept(void) { return -1; } +int feenableexcept(unsigned int excepts) { return -1; } +int fedisableexcept(unsigned int excepts) { return -1; } +#else int fegetexcept(void) { static fenv_t fenv; return fegetenv(&fenv) ? -1 : (fenv.__control & FE_ALL_EXCEPT); @@ -49,5 +53,5 @@ int fedisableexcept(unsigned int excepts) { return (fesetenv(&fenv) ? -1 : old_excepts); } - +#endif #endif diff --git a/paddle/utils/tests/test_StringUtils.cpp b/paddle/utils/tests/test_StringUtils.cpp index fdc914d1bcc3c74e0f05ef475069abc315bdc306..248f58a7f26e26e82b55110930964cee04fb558b 100644 --- a/paddle/utils/tests/test_StringUtils.cpp +++ b/paddle/utils/tests/test_StringUtils.cpp @@ -18,6 +18,6 @@ limitations under the License. */ TEST(StringUtil, to) { ASSERT_NEAR(paddle::str::to("12.45"), 12.45, 1e-5); - ASSERT_DEATH(paddle::str::to("12.45x23"), ".*"); - ASSERT_DEATH(paddle::str::to(""), ".*"); + ASSERT_DEATH_IF_SUPPORTED(paddle::str::to("12.45x23"), ".*"); + ASSERT_DEATH_IF_SUPPORTED(paddle::str::to(""), ".*"); } diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 7bd6d59b0096c23bb791b9b50702130057628879..32578ad7799c0a276972ccef7770c2eae8438069 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -44,6 +44,7 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/pad add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp + COMMAND touch stub.cc COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 6e8ac8838bf3012969bf4f597cdf4430693fe313..169e201046a0d7b8c3e85f60946d8c1c762c88f4 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -143,6 +143,7 @@ __all__ = [ 'scale_shift_layer', 'img_conv3d_layer', 'resize_layer', + 'sub_seq_layer', ] @@ -252,6 +253,7 @@ class LayerType(object): SCALE_SHIFT_LAYER = 'scale_shift' RESIZE = 'resize' + SUB_SEQ_LAYER = 'subseq' @staticmethod def is_layer_type(type_name): @@ -6980,3 +6982,58 @@ def resize_layer(input, size, name=None): """ Layer(name=name, type=LayerType.RESIZE, inputs=Input(input.name), size=size) return LayerOutput(name, LayerType.RESIZE, parents=[input], size=input.size) + + +@wrap_act_default(act=LinearActivation()) +@wrap_name_default('sub_seq') +def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None): + """ + sub_seq_layer will return sub-sequences from the input sequences. For each + sequence in the input sequence layer, sub_seq_layer will slice it by given + offset and size. Please notice that, number of offset value and size value + both are equal to the number of sequence in the input layer. + + .. code-block:: python + + sub_seq = sub_seq_layer(input=input_seq, offsets=offsets, sizes=sizes) + + :param name: The name of this layer. It is optional. + :type name: basestring + :param input: The input of this layer, which should be sequence. + :type input: LayerOutput + :param offsets: offset indices to slice the input sequence, which should be + sequence type. + :type offsets: LayerOutput + :param sizes: sizes of the sub-sequences, which should be sequence type. + :type sizes: LayerOutput + :param act: Layer activation, default is LinearActivation + :type act: BaseActivation. + :param bias_attr: The Bias Attribute. If the parameter is set to + False or something not type of ParameterAttribute, + no bias is defined. If the parameter is set to + True, the bias is initialized to zero. + :type bias_attr: ParameterAttribute | None | bool | Any + :return: LayerOutput object. + :rtype: LayerOutput + """ + + assert isinstance(input, LayerOutput), ( + 'The first input of sub_seq_layer layer must be a PaddlePaddle layer.') + assert isinstance(offsets, LayerOutput), ( + 'The offset indices for sub_seq_layer, ' + 'must be a PaddlePaddle layer.') + assert isinstance(sizes, LayerOutput), ( + 'The sizes of sub-sequences, must be a PaddlePaddle layer.') + + Layer( + name=name, + type=LayerType.SUB_SEQ_LAYER, + inputs=[input.name, offsets.name, sizes.name], + active_type=act.name, + bias=ParamAttr.to_bias(bias_attr)) + + return LayerOutput( + name, + LayerType.SUB_SEQ_LAYER, + parents=[input, offsets, sizes], + size=input.size) diff --git a/python/paddle/trainer_config_helpers/optimizers.py b/python/paddle/trainer_config_helpers/optimizers.py index c3495ee110bfaf91a47637a52e88b3bb56dce7a9..c3cd4cf8c32e20f3ef86305489fc415397dec1b8 100644 --- a/python/paddle/trainer_config_helpers/optimizers.py +++ b/python/paddle/trainer_config_helpers/optimizers.py @@ -116,7 +116,7 @@ class AdamOptimizer(BaseSGDOptimizer): m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\ v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\ - w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}} + w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}} :param beta1: the :math:`\\beta_1` in equation. :type beta1: float diff --git a/python/paddle/utils/merge_model.py b/python/paddle/utils/merge_model.py index 48e5087cc281bd3a3d0b4a403372456ebbf39c62..421e953d2775f145800cf7179ec644697a265060 100644 --- a/python/paddle/utils/merge_model.py +++ b/python/paddle/utils/merge_model.py @@ -23,32 +23,32 @@ from paddle.v2.topology import Topology def merge_v2_model(net, param_file, output_file): - '''Integrate the model config and model parameters into one file. - + '''Merge the model config and parameters into one file. + The model configuration file describes the model structure which ends with .py. The parameters file stores the parameters of the model which ends with .tar.gz. - - @param net The output layer of the network. - @param param_file Path of the model parameters(.tar.gz) which is stored by v2 api. + + @param net The output layer of the network for inference. + @param param_file Path of the parameters (.tar.gz) which is stored by v2 api. @param output_file Path of the merged file which will be generated. - + Usage: - from paddle.util.merge_model import merge_v2_model + from paddle.utils.merge_model import merge_v2_model # import your network configuration - from mobilenet import mobile_net - - net = mobile_net(3*224*224, 102) + from example_net import net_conf + + net = net_conf(is_predict=True) param_file = './param_pass_00000.tar.gz' output_file = './output.paddle' - + merge_v2_model(net, param_file, output_file) ''' assert isinstance(net, LayerOutput), \ - "The net should be the output of the network" + "The net should be the output of the network for inference" assert os.path.exists(param_file), \ "The model parameters file %s does not exists " % (param_file) diff --git a/python/paddle/v2/framework/executor.py b/python/paddle/v2/framework/executor.py index 8268d0d8f5126b00365d4e5e76cd98de4c47e670..f5c833190e73a277bef2509e02c4be051768933d 100644 --- a/python/paddle/v2/framework/executor.py +++ b/python/paddle/v2/framework/executor.py @@ -1,5 +1,5 @@ import paddle.v2.framework.core as core -from paddle.v2.framework.framework import Block, Program +from paddle.v2.framework.framework import Block, Program, g_main_program g_scope = core.Scope() @@ -18,7 +18,7 @@ class Executor(object): self.executor = core.Executor(act_places) def run(self, - program, + program=None, feed=None, fetch_list=None, feed_var_name='feed', @@ -29,6 +29,9 @@ class Executor(object): if fetch_list is None: fetch_list = [] + if program is None: + program = g_main_program + if not isinstance(program, Program): raise TypeError() diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index dd23c47961b6b27ffca32dc7fe496d715411c858..8fb3cca91e5f8759b8a83b12428c78d222f382ac 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -12,6 +12,14 @@ def unique_name(prefix): return "_".join([prefix, str(uid)]) +def _debug_string_(proto): + error_fields = list() + if not proto.IsInitialized(error_fields): + raise ValueError("{0} are not initialized\nThe message is {1}".format( + error_fields, proto)) + return proto.__str__() + + class Variable(object): def __init__(self, block, @@ -95,7 +103,7 @@ class Variable(object): def __str__(self): protostr = self.desc.serialize_to_string() proto = framework_pb2.VarDesc.FromString(str(protostr)) - return proto.__str__() + return _debug_string_(proto) __repr__ = __str__ @@ -286,7 +294,7 @@ class Operator(object): def __str__(self): protostr = self.desc.serialize_to_string() proto = framework_pb2.OpDesc.FromString(str(protostr)) - return proto.__str__() + return _debug_string_(proto) __repr__ = __str__ @@ -343,7 +351,7 @@ class Block(object): def __str__(self): protostr = self.desc.serialize_to_string() proto = framework_pb2.BlockDesc.FromString(str(protostr)) - return proto.__str__() + return _debug_string_(proto) __repr__ = __str__ @@ -448,7 +456,7 @@ class Program(object): def __str__(self): protostr = self.desc.serialize_to_string() proto = framework_pb2.ProgramDesc.FromString(str(protostr)) - return proto.__str__() + return _debug_string_(proto) def clone(self): p = Program() diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index 0739b2d2e2964f7f3565baf068a80c89938d193a..917d3d938862640145591c5084c9f0bc4d7c23bf 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -1,6 +1,8 @@ import paddle.v2.framework.core as core -from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, Operator -from paddle.v2.framework.initializer import ConstantInitializer, NormalInitializer +from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ + Operator +from paddle.v2.framework.initializer import ConstantInitializer, \ + NormalInitializer from paddle.v2.framework.layer_helper import LayerHelper, unique_name import re @@ -372,11 +374,13 @@ def sequence_pool(input, pool_type, **kwargs): helper = LayerHelper('sequence_pool', input=input, **kwargs) dtype = helper.input_dtype() pool_out = helper.create_tmp_variable(dtype) + max_index = helper.create_tmp_variable(dtype) helper.append_op( type="sequence_pool", - inputs={"X": [input]}, - outputs={"Out": [pool_out]}, + inputs={"X": input}, + outputs={"Out": pool_out, + "MaxIndex": max_index}, attrs={"pooltype": pool_type.upper()}) return pool_out @@ -577,25 +581,45 @@ class StaticRNN(object): if self.status != StaticRNN.IN_RNN_BLOCK: raise ValueError("You must invoke {0} in rnn block".format(method)) - def memory(self, init=None, shape=None, dtype=None, init_value=0): + def memory(self, + init=None, + shape=None, + batch_ref=None, + init_value=0.0, + init_batch_dim_idx=0, + ref_batch_dim_idx=1): + ''' + :param init: boot memory, if not set, a shape, batch_ref must be provided + :param shape: shape of the boot memory + :param batch_ref: batch size reference variable + :param init_value: the init value of boot memory + :param init_batch_dim_idx: the index of batch size in init's dimension + :param ref_batch_dim_idx: the index of batch size in batch_ref's dimension + :return: boot memory + ''' self._assert_in_rnn_block_('memory') if init is None: - if shape is None or dtype is None: + if shape is None or batch_ref is None: raise ValueError( - "if init is None, memory at least need shape and dtype") + "if init is None, memory at least need shape and batch_ref") parent_block = self.parent_block() var_name = unique_name("@".join([self.helper.name, "memory_boot"])) boot_var = parent_block.create_var( - name=var_name, shape=shape, dtype=dtype, persistable=False) + name=var_name, + shape=shape, + dtype=batch_ref.data_type, + persistable=False) parent_block.append_op( - type="fill_constant", - inputs={}, + type="fill_constant_batch_size_like", + inputs={'Input': [batch_ref]}, outputs={'Out': [boot_var]}, attrs={ 'value': init_value, - 'shape': [40] + list(boot_var.shape[1:]), - 'data_type': boot_var.data_type + 'shape': boot_var.shape, + 'data_type': boot_var.data_type, + 'input_dim_idx': ref_batch_dim_idx, + 'output_dim_idx': init_batch_dim_idx }) return self.memory(init=boot_var) @@ -749,3 +773,68 @@ def lod_rank_table(x, level=0, main_program=None): outputs={'Out': table}, attrs={'level': level}) return table + + +def fill_constant(shape, dtype, value, main_program=None): + helper = LayerHelper("ones", **locals()) + out = helper.create_tmp_variable(dtype=dtype) + helper.append_op( + type='fill_constant', + inputs={}, + outputs={'Out': [out]}, + attrs={ + 'shape': shape, + 'data_type': out.data_type, + 'value': float(value) + }) + out.stop_gradient = True + return out + + +def ones(shape, dtype, main_program=None): + return fill_constant(value=1.0, **locals()) + + +def zeros(shape, dtype, main_program=None): + return fill_constant(value=0.0, **locals()) + + +def increment(x, value=1.0, main_program=None): + helper = LayerHelper("increment", **locals()) + tmp = helper.create_tmp_variable(dtype=x.data_type) + helper.append_op( + type='increment', + inputs={'X': [x]}, + outputs={'Out': [tmp]}, + attrs={'step': value}) + return tmp + + +def array_write(x, i, array=None, main_program=None): + helper = LayerHelper('array_write', **locals()) + if array is None: + array = helper.create_variable( + name="{0}.out".format(helper.name), + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, + dtype=x.data_type) + helper.append_op( + type='write_to_array', + inputs={'X': [x], + 'I': [i]}, + outputs={'Out': [array]}) + return array + + +def array_read(array, i, main_program=None): + helper = LayerHelper('array_read', **locals()) + if not isinstance( + array, + Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: + raise TypeError("array should be tensor array vairable") + out = helper.create_tmp_variable(dtype=array.data_type) + helper.append_op( + type='read_from_array', + inputs={'X': [array], + 'I': [i]}, + outputs={'Out': [out]}) + return out diff --git a/python/paddle/v2/framework/tests/test_array_read_write_op.py b/python/paddle/v2/framework/tests/test_array_read_write_op.py new file mode 100644 index 0000000000000000000000000000000000000000..b2a2ff2b8213305fe039ae494fb933e65a76781a --- /dev/null +++ b/python/paddle/v2/framework/tests/test_array_read_write_op.py @@ -0,0 +1,93 @@ +import unittest +import paddle.v2.framework.core as core +import paddle.v2.framework.layers as layers +from paddle.v2.framework.executor import Executor +from paddle.v2.framework.backward import append_backward_ops +from paddle.v2.framework.framework import g_main_program +import numpy + + +class TestArrayReadWrite(unittest.TestCase): + def test_read_write(self): + x = [ + layers.data( + name='x0', shape=[100]), layers.data( + name='x1', shape=[100]), layers.data( + name='x2', shape=[100]) + ] + + for each_x in x: + each_x.stop_gradient = False + + i = layers.zeros(shape=[1], dtype='int64') + arr = layers.array_write(x=x[0], i=i) + i = layers.increment(x=i) + i.stop_gradient = True + arr = layers.array_write(x=x[1], i=i, array=arr) + i = layers.increment(x=i) + i.stop_gradient = True + arr = layers.array_write(x=x[2], i=i, array=arr) + + i = layers.zeros(shape=[1], dtype='int64') + a0 = layers.array_read(array=arr, i=i) + i = layers.increment(x=i) + i.stop_gradient = True # index should not calculate gradient + a1 = layers.array_read(array=arr, i=i) + i = layers.increment(x=i) + i.stop_gradient = True + a2 = layers.array_read(array=arr, i=i) + + mean_a0 = layers.mean(x=a0) + mean_a1 = layers.mean(x=a1) + mean_a2 = layers.mean(x=a2) + + a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) + + mean_x0 = layers.mean(x=x[0]) + mean_x1 = layers.mean(x=x[1]) + mean_x2 = layers.mean(x=x[2]) + + x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) + + scope = core.Scope() + cpu = core.CPUPlace() + + exe = Executor(cpu) + + tensor = core.LoDTensor() + tensor.set(numpy.random.random(size=(100, 100)).astype('float32'), cpu) + + outs = map(numpy.array, + exe.run(feed={'x0': tensor, + 'x1': tensor, + 'x2': tensor}, + fetch_list=[a_sum, x_sum], + scope=scope)) + self.assertEqual(outs[0], outs[1]) + + total_sum = layers.sums(input=[a_sum, x_sum]) + total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) + + append_backward_ops(total_sum_scaled) + + g_vars = map(g_main_program.global_block().var, + [each_x.name + "@GRAD" for each_x in x]) + g_out = [ + item.sum() + for item in map( + numpy.array, + exe.run(feed={'x0': tensor, + 'x1': tensor, + 'x2': tensor}, + fetch_list=g_vars)) + ] + g_out_sum = numpy.array(g_out).sum() + + # since our final gradient is 1 and the neural network are all linear + # with mean_op. + # the input gradient should also be 1 + self.assertAlmostEqual(1.0, g_out_sum, delta=0.1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_conv2d_op.py b/python/paddle/v2/framework/tests/test_conv2d_op.py index f58b96463cf78103b2acb3d80652ef0aa988ad49..04ae7f294c27fdceaaff2e9a7ed854213e643945 100644 --- a/python/paddle/v2/framework/tests/test_conv2d_op.py +++ b/python/paddle/v2/framework/tests/test_conv2d_op.py @@ -61,25 +61,23 @@ class TestConv2dOp(OpTest): def test_check_grad(self): self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.05) + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) def test_check_grad_no_filter(self): self.check_grad( ['Input'], 'Output', - max_relative_error=0.05, + max_relative_error=0.02, no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): self.check_grad( ['Filter'], 'Output', - max_relative_error=0.05, + max_relative_error=0.02, no_grad_set=set(['Input'])) def init_test_case(self): - # self.groups = 1 - # self.op_type = "conv2d" self.pad = [0, 0] self.stride = [1, 1] self.dilations = [1, 1] @@ -103,6 +101,9 @@ class TestWithGroup(TestConv2dOp): self.op_type = "conv2d" +#----------------Conv2dCudnn---------------- + + class TestCudnn(TestConv2dOp): def init_group(self): self.groups = 1 diff --git a/python/paddle/v2/framework/tests/test_conv2d_transpose_op.py b/python/paddle/v2/framework/tests/test_conv2d_transpose_op.py index 999a0bdc629010d96a8db31b317ba7a65bf35773..54349c018c4a53b8767d6cd4f94d99c719dc0237 100644 --- a/python/paddle/v2/framework/tests/test_conv2d_transpose_op.py +++ b/python/paddle/v2/framework/tests/test_conv2d_transpose_op.py @@ -58,36 +58,37 @@ class TestConv2dTransposeOp(OpTest): print 'check output here for', self.op_type self.check_output() - def init_test_case(self): - self.pad = [0, 0] - self.stride = [1, 1] - self.dilations = [1, 1] - self.input_size = [2, 3, 5, 5] # NCHW - f_c = self.input_size[1] - self.filter_size = [f_c, 6, 3, 3] - - def init_op_type(self): - self.op_type = "conv2d_transpose" - def test_check_grad_no_input(self): self.check_grad( ['Filter'], 'Output', - max_relative_error=0.05, + max_relative_error=0.02, no_grad_set=set(['Input'])) def test_check_grad_no_filter(self): self.check_grad( ['Input'], 'Output', - max_relative_error=0.05, + max_relative_error=0.02, no_grad_set=set(['Filter'])) def test_check_grad(self): self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.05) + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + def init_op_type(self): + self.op_type = "conv2d_transpose" +# ------------ test_cudnn ------------ class TestCudnn(TestConv2dTransposeOp): def init_op_type(self): self.op_type = "conv2d_transpose_cudnn" diff --git a/python/paddle/v2/framework/tests/test_conv3d_op.py b/python/paddle/v2/framework/tests/test_conv3d_op.py new file mode 100644 index 0000000000000000000000000000000000000000..44c192f58d25f8ddaa38d2ba7c7c19b9a5bd7dc1 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_conv3d_op.py @@ -0,0 +1,131 @@ +import unittest +import numpy as np +from op_test import OpTest + + +def conv3d_forward_naive(input, filter, group, conv_param): + in_n, in_c, in_d, in_h, in_w = input.shape + out_c, f_c, f_d, f_h, f_w = filter.shape + assert f_c * group == in_c + assert np.mod(out_c, group) == 0 + sub_out_c = out_c / group + + stride, pad = conv_param['stride'], conv_param['pad'] + out_d = 1 + (in_d + 2 * pad[0] - f_h) / stride[0] + out_h = 1 + (in_h + 2 * pad[1] - f_h) / stride[1] + out_w = 1 + (in_w + 2 * pad[2] - f_w) / stride[2] + out = np.zeros((in_n, out_c, out_d, out_h, out_w)) + + input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], ), + (pad[2], )), + mode='constant', + constant_values=0) + for d in range(out_d): + for i in range(out_h): + for j in range(out_w): + for g in range(group): + input_pad_masked = \ + input_pad[:, g * f_c:(g + 1) * f_c, + d * stride[0]:d * stride[0] + f_d, + i * stride[1]:i * stride[1] + f_h, + j * stride[2]:j * stride[2] + f_w] + f_sub = filter[g * sub_out_c:(g + 1) * + sub_out_c, :, :, :, :] + for k in range(sub_out_c): + out[:, g * sub_out_c + k, d, i, j] = \ + np.sum(input_pad_masked * f_sub[k, :, :, :, :], + axis=(1, 2, 3, 4)) + + return out + + +class TestConv3dOp(OpTest): + def setUp(self): + self.init_group() + self.init_op_type() + self.init_test_case() + + conv3d_param = {'stride': self.stride, 'pad': self.pad} + input = np.random.random(self.input_size).astype("float32") + filter = np.random.random(self.filter_size).astype("float32") + output = conv3d_forward_naive(input, filter, self.groups, + conv3d_param).astype("float32") + + self.inputs = {'Input': input, 'Filter': filter} + self.attrs = { + 'strides': self.stride, + 'paddings': self.pad, + 'groups': self.groups + } + self.outputs = {'Output': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.03) + + def test_check_grad_no_filter(self): + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) + + def test_check_grad_no_input(self): + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) + + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 4, 4, 4] # NCDHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3, 3] + + def init_group(self): + self.groups = 1 + + def init_op_type(self): + self.op_type = "conv3d" + + +class TestCase1(TestConv3dOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 4, 4, 4] # NCDHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3, 3] + + def init_group(self): + self.groups = 1 + + def init_op_type(self): + self.op_type = "conv3d" + + +class TestWithGroup1(TestConv3dOp): + def init_group(self): + self.groups = 3 + + def init_op_type(self): + self.op_type = "conv3d" + + +class TestWithGroup2(TestCase1): + def init_group(self): + self.groups = 3 + + def init_op_type(self): + self.op_type = "conv3d" + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py b/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py new file mode 100644 index 0000000000000000000000000000000000000000..132fe7931438a30cf02e4ad2894c0838e48ffc9f --- /dev/null +++ b/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py @@ -0,0 +1,97 @@ +import unittest +import numpy as np +from op_test import OpTest + + +def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): + # [2, 3, 5, 5, 5] + in_n, in_c, in_d, in_h, in_w = input_.shape + # [3, 6, 3, 3, 3] + f_c, out_c, f_d, f_h, f_w = filter_.shape + assert in_c == f_c + + stride, pad = conv3dtranspose_param['stride'], conv3dtranspose_param['pad'] + out_d = (in_d - 1) * stride[0] + f_d + out_h = (in_h - 1) * stride[1] + f_h + out_w = (in_w - 1) * stride[2] + f_w + + out = np.zeros((in_n, out_c, out_d, out_h, out_w)) + + for n in range(in_n): + for d in range(in_d): + for i in range(in_h): + for j in range(in_w): + input_masked = input_[n, :, d, i, j] # (c) + input_masked = np.reshape(input_masked, (in_c, 1, 1, 1)) + input_masked = np.tile(input_masked, (1, f_d, f_h, f_w)) + + for k in range(out_c): + tmp_out = np.sum(input_masked * filter_[:, k, :, :, :], + axis=0) + d1, d2 = d * stride[0], d * stride[0] + f_d + i1, i2 = i * stride[1], i * stride[1] + f_h + j1, j2 = j * stride[2], j * stride[2] + f_w + out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out + + return out + + +class TestConv3dTransposeOp(OpTest): + def setUp(self): + # init as conv transpose + self.init_op_type() + + # [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7] + self.init_test_case() + + conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad} + input_ = np.random.random(self.input_size).astype("float32") + filter_ = np.random.random(self.filter_size).astype("float32") + output = conv3dtranspose_forward_naive( + input_, filter_, conv3dtranspose_param).astype("float32") + # print 'deconv output py', output, output.shape + + self.inputs = {'Input': input_, 'Filter': filter_} + self.attrs = { + 'strides': self.stride, + 'paddings': self.pad, + # 'dilations': self.dilations + } + self.outputs = {'Output': output} + + def test_check_output(self): + print 'check output here' + self.check_output() + + def test_check_grad(self): + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + + def test_check_grad_no_filter(self): + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter'])) + + def test_check_grad_no_input(self): + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) + + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.input_size = [2, 3, 5, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + def init_op_type(self): + self.op_type = "conv3d_transpose" + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py b/python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py index 319ae52fb38ee122808a6e6b0e44d3a3787d8e53..99de6b5d052b41499800afb6181a235da340bc15 100644 --- a/python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py +++ b/python/paddle/v2/framework/tests/test_fill_constant_batch_size_like_op.py @@ -21,9 +21,14 @@ class TestFillConstantBatchSizeLikeWhenSecondDimIsBatchSize(OpTest): def setUp(self): self.op_type = "fill_constant_batch_size_like" self.inputs = {'Input': np.random.random((219, 232)).astype("float32")} - self.attrs = {'value': 3.5, 'shape': [132, -1, 7], 'dim_idx': 1} - - out = np.random.random((132, 232, 7)).astype("float32") + self.attrs = { + 'value': 3.5, + 'shape': [132, -1, 7], + 'input_dim_idx': 0, + 'output_dim_idx': 1 + } + + out = np.random.random((132, 219, 7)).astype("float32") out.fill(3.5) self.outputs = {'Out': out} diff --git a/python/paddle/v2/framework/tests/test_framework_debug_str.py b/python/paddle/v2/framework/tests/test_framework_debug_str.py new file mode 100644 index 0000000000000000000000000000000000000000..8fdf8f91171ee334fac93c05a4d49056fa0e803d --- /dev/null +++ b/python/paddle/v2/framework/tests/test_framework_debug_str.py @@ -0,0 +1,13 @@ +import unittest +from paddle.v2.framework.framework import Program + + +class TestDebugStringFramework(unittest.TestCase): + def test_debug_str(self): + p = Program() + p.current_block().create_var(name='t', shape=[0, 1]) + self.assertRaises(ValueError, callableObj=p.__str__) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gru_op.py b/python/paddle/v2/framework/tests/test_gru_op.py new file mode 100644 index 0000000000000000000000000000000000000000..b2474cff94c6c71cc62bc8e69a5d83e38d51c511 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gru_op.py @@ -0,0 +1,156 @@ +import unittest +import numpy as np +import math +from op_test import OpTest +from test_lstm_op import identity, sigmoid, tanh, relu + + +class TestGRUOp(OpTest): + batch_size = 9 + frame_size = 5 + activate = { + 'identity': identity, + 'sigmoid': sigmoid, + 'tanh': tanh, + 'relu': relu + } + + @staticmethod + def seq_to_batch(lod, is_reverse): + idx_in_seq_list = [] + seq_starts = lod[0] + seq_lens = [] + for i in range(len(seq_starts) - 1): + seq_lens.append(seq_starts[i + 1] - seq_starts[i]) + sorted_seqs = sorted( + range(len(seq_lens)), lambda x, y: seq_lens[y] - seq_lens[x]) + num_batch = seq_lens[sorted_seqs[0]] + for batch_idx in range(num_batch): + idx_in_seq = [] + for i in range(len(seq_lens)): + if seq_lens[sorted_seqs[i]] <= batch_idx: + break + idx = (seq_starts[sorted_seqs[i] + 1] - 1 - batch_idx + ) if is_reverse else ( + seq_starts[sorted_seqs[i]] + batch_idx) + idx_in_seq.append(idx) + idx_in_seq_list.append(idx_in_seq) + return idx_in_seq_list + + def gru_step(self, x, h_p, w, b): + batch_size = x.shape[0] + frame_size = w.shape[0] + g = x + np.tile(b, (batch_size, 1)) + w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape( + (frame_size, frame_size * 2)) + u_r = self.activate[self.attrs['gate_activation']](np.dot( + h_p, w_u_r) + g[:, :frame_size * 2]) + u = u_r[:, :frame_size] + r = u_r[:, frame_size:frame_size * 2] + r_h_p = r * h_p + w_c = w.flatten()[frame_size * frame_size * 2:].reshape( + (frame_size, frame_size)) + c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + + g[:, frame_size * 2:]) + g = np.hstack((u_r, c)) + h = u * c + (1 - u) * h_p + return g, r_h_p, h + + def gru(self): + input, lod = self.inputs['Input'] + w = self.inputs['Weight'] + b = self.inputs['Bias'] if self.inputs.has_key('Bias') else np.zeros( + (1, self.frame_size * 3)) + batch_gate = self.outputs['BatchGate'] + batch_reset_hidden_prev = self.outputs['BatchResetHiddenPrev'] + batch_hidden = self.outputs['BatchHidden'] + hidden = self.outputs['Hidden'] + idx_in_seq_list = self.idx_in_seq_list + h_p = self.inputs['H0'] if self.inputs.has_key('H0') else np.zeros( + (len(idx_in_seq_list[0]), self.frame_size)) + num_batch = len(idx_in_seq_list) + end_idx = 0 + for batch_idx in range(num_batch): + x = input[idx_in_seq_list[batch_idx]] + g, r_h_p, h = self.gru_step(x, h_p, w, b) + if batch_idx < (num_batch - 1): + h_p = h[:len(idx_in_seq_list[batch_idx + 1])] + start_idx = end_idx + end_idx = start_idx + len(idx_in_seq_list[batch_idx]) + batch_gate[start_idx:end_idx] = g + batch_reset_hidden_prev[start_idx:end_idx] = r_h_p + batch_hidden[start_idx:end_idx] = h + hidden[idx_in_seq_list[batch_idx]] = h + return batch_gate, batch_reset_hidden_prev, hidden + + def set_data(self): + lod = [[0, 2, 6, self.batch_size]] + self.idx_in_seq_list = self.seq_to_batch(lod, self.is_reverse) + batch_size = self.batch_size + frame_size = self.frame_size + input = np.random.rand(batch_size, frame_size * 3).astype('float64') + h0 = np.random.rand(len(self.idx_in_seq_list[0]), + frame_size).astype('float64') + weight = np.random.rand(frame_size, frame_size * 3).astype('float64') + bias = np.random.rand(1, frame_size * 3).astype('float64') + + self.inputs = { + 'Input': (input, lod), + 'H0': h0, + 'Weight': weight, + 'Bias': bias + } + + self.outputs = { + 'BatchGate': np.zeros( + (batch_size, frame_size * 3), dtype='float64'), + 'BatchResetHiddenPrev': np.zeros( + (batch_size, frame_size), dtype='float64'), + 'BatchHidden': np.zeros( + (batch_size, frame_size), dtype='float64'), + 'Hidden': np.zeros( + (batch_size, frame_size), dtype='float64') + } + + def set_confs(self): + self.is_reverse = False + self.attrs = { + 'activation': 'tanh', + 'gate_activation': 'sigmoid', + 'is_reverse': self.is_reverse + } + + def setUp(self): + self.op_type = "gru" + self.set_confs() + self.set_data() + self.gru() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['Input', 'H0', 'Weight', 'Bias'], ['Hidden']) + + +class TestGRUOpNoInitial(TestGRUOp): + def set_data(self): + super(TestGRUOpNoInitial, self).set_data() + self.inputs.pop('H0') + + def test_check_grad(self): + self.check_grad(['Input', 'Weight', 'Bias'], ['Hidden']) + + +class TestGRUOpReverse(TestGRUOp): + def set_confs(self): + self.is_reverse = True + self.attrs = { + 'activation': 'identity', + 'gate_activation': 'sigmoid', + 'is_reverse': self.is_reverse + } + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_huber_loss_op.py b/python/paddle/v2/framework/tests/test_huber_loss_op.py index 003e7d7ed7ccdfc48b0aa8db0a6765b5c93e7c14..a24fcbec6cc4801118ce4ef97eb4692cd2351c28 100644 --- a/python/paddle/v2/framework/tests/test_huber_loss_op.py +++ b/python/paddle/v2/framework/tests/test_huber_loss_op.py @@ -21,7 +21,8 @@ class TestHuberLossOp(OpTest): 'Y': np.random.uniform(0, 1., (samples_num, 1)).astype('float32'), } residual = self.inputs['Y'] - self.inputs['X'] - loss = np.vectorize(huber_loss_forward)(residual, delta) + loss = np.vectorize(huber_loss_forward)(residual, + delta).astype('float32') self.attrs = {'delta': delta} self.outputs = { 'Residual': residual, @@ -43,6 +44,5 @@ class TestHuberLossOp(OpTest): ['X'], 'Out', max_relative_error=0.008, no_grad_set=set('residual')) -# TODO(typhoonzero): should add this back till we fix it -#if __name__ == '__main__': -# unittest.main() +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py b/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py new file mode 100644 index 0000000000000000000000000000000000000000..f6a6c428a26dece01fe2958991edd3edf3a8266e --- /dev/null +++ b/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py @@ -0,0 +1,106 @@ +import unittest +import itertools +import numpy as np +from op_test import OpTest + + +def py_pnpair_op(score, label, query, column=-1, weight=None): + # group by query id + predictions = {} + batch_size = label.shape[0] + if weight is None: + weight = np.ones(shape=(batch_size, 1)).astype('float32') + for s, l, q, w in zip(score, label, query, weight): + s, l, q, w = s[column], l[0], q[0], w[0] + if q not in predictions: + predictions[q] = [] + predictions[q].append((s, l, w)) + + # accumulate statistics + pos, neg, neu = 0, 0, 0 + for _, ranks in predictions.items(): + for e1, e2 in itertools.combinations(ranks, 2): + s1, s2, l1, l2, w1, w2 = e1[0], e2[0], e1[1], e2[1], e1[2], e2[2] + w = (w1 + w2) * 0.5 + if l1 == l2: + continue + if s1 == s2: + neu += w + elif (s1 - s2) * (l1 - l2) > 0: + pos += w + else: + neg += w + + return np.array(pos).astype('float32'), np.array(neg).astype( + 'float32'), np.array(neu).astype('float32') + + +class TestPositiveNegativePairOp(OpTest): + def setUp(self): + self.op_type = 'positive_negative_pair' + batch_size = 20 + max_query_id = 5 + score = np.random.normal(size=(batch_size, 1)).astype('float32') + label = np.random.normal(size=(batch_size, 1)).astype('float32') + query = np.array( + [np.random.randint(max_query_id) for i in range(batch_size)]) + query = np.reshape(query, newshape=(batch_size, 1)).astype('int64') + + pos, neg, neu = py_pnpair_op(score, label, query) + self.inputs = {'Score': score, 'Label': label, 'QueryID': query} + self.attrs = {'column': -1} + self.outputs = { + 'PositivePair': pos, + 'NegativePair': neg, + 'NeutralPair': neu + } + + def test_check_output(self): + self.check_output() + + +class TestPositiveNegativePairOpAccumulateWeight(OpTest): + def setUp(self): + self.op_type = 'positive_negative_pair' + batch_size = 20 + max_query_id = 5 + max_random_num = 2 << 15 + score_dim = 2 + score = np.random.normal(size=(batch_size, 2)).astype('float32') + label = np.random.normal(size=(batch_size, 1)).astype('float32') + weight = np.random.normal(size=(batch_size, 1)).astype('float32') + query = np.array( + [np.random.randint(max_query_id) for i in range(batch_size)]) + query = np.reshape(query, newshape=(batch_size, 1)).astype('int64') + acc_pos = np.reshape( + np.random.randint(max_random_num), newshape=(1)).astype('float32') + acc_neg = np.reshape( + np.random.randint(max_random_num), newshape=(1)).astype('float32') + acc_neu = np.reshape( + np.random.randint(max_random_num), newshape=(1)).astype('float32') + column = np.random.randint(score_dim) + + pos, neg, neu = py_pnpair_op( + score, label, query, column=column, weight=weight) + self.inputs = { + 'Score': score, + 'Label': label, + 'QueryID': query, + 'AccumulatePositivePair': acc_pos, + 'AccumulateNegativePair': acc_neg, + 'AccumulateNeutralPair': acc_neu, + 'Weight': weight + } + self.attrs = {'column': column} + self.outputs = { + 'PositivePair': pos + acc_pos, + 'NegativePair': neg + acc_neg, + 'NeutralPair': neu + acc_neu + } + + def test_check_output(self): + self.check_output() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 001de349d1f62e661335e63f7519c59697a1030c..16100429dd4010eb5c9a3e8896212f39295a4c8a 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -1,9 +1,6 @@ import unittest -import logging - -from op_test import get_numeric_gradient -from paddle.v2.framework.layers import * +import paddle.v2.framework.layers as layers from paddle.v2.framework.framework import Program from paddle.v2.framework.executor import Executor from paddle.v2.framework.backward import append_backward_ops @@ -16,8 +13,8 @@ class PyRNNBase(object): self.x = np.ones(shape=input_shape).astype("float32") self.y = np.zeros(shape=output_shape).astype("float32") - def step(self): - pass + def step(self, step_id, x): + raise NotImplementedError def forward(self): for step_id in range(self.x.shape[0]): @@ -116,30 +113,30 @@ class RecurrentOpTest1(unittest.TestCase): self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) - self.output = mean(x=self.create_rnn_op(), **self.p_info) + self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) def create_rnn_op(self): - x = data( + x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], data_type='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False - h_boot = data( + h_boot = layers.data( shape=[self.input_dim], data_type='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False - rnn = StaticRNN(main_program=self.main_program) + rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) - h = scale( - x=elementwise_add( + h = layers.scale( + x=layers.elementwise_add( x=h_pre, y=x_t, **self.p_info), scale=self.py_rnn.scale, **self.p_info) @@ -249,41 +246,41 @@ class RecurrentOpTest2(RecurrentOpTest1): self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape) - self.output = mean(x=self.create_rnn_op(), **self.p_info) + self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) def create_rnn_op(self): - x = data( + x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], data_type='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False - h_boot = data( + h_boot = layers.data( shape=[self.input_dim], data_type='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False - rnn = StaticRNN(main_program=self.main_program) + rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) - temp_l = fc(input=x_t, - size=self.input_dim, - param_attr={'name': 'W'}, - bias_attr=False, - **self.p_info) - temp_r = fc(input=h_pre, - size=self.input_dim, - param_attr={'name': 'U'}, - bias_attr=False, - **self.p_info) - - h = sigmoid( - x=elementwise_add( + temp_l = layers.fc(input=x_t, + size=self.input_dim, + param_attr={'name': 'W'}, + bias_attr=False, + **self.p_info) + temp_r = layers.fc(input=h_pre, + size=self.input_dim, + param_attr={'name': 'U'}, + bias_attr=False, + **self.p_info) + + h = layers.sigmoid( + x=layers.elementwise_add( x=temp_l, y=temp_r, **self.p_info), **self.p_info) @@ -293,7 +290,7 @@ class RecurrentOpTest2(RecurrentOpTest1): return rnn() -class RecurrentOpTest3(RecurrentOpTest1): +class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): ''' Test RNNOp with two memories equation: @@ -310,8 +307,8 @@ class RecurrentOpTest3(RecurrentOpTest1): class PySimpleRNN3(PyRNNBase): def __init__(self, input_shape, output_shape): - super(RecurrentOpTest3.PySimpleRNN3, self).__init__(input_shape, - output_shape) + super(RecurrentOpMultipleMemoryTest.PySimpleRNN3, self).__init__( + input_shape, output_shape) seq_len, batch_size, input_dim = input_shape self.h_boot1 = np.random.normal(size=(batch_size, @@ -345,27 +342,27 @@ class RecurrentOpTest3(RecurrentOpTest1): self.input_shape = (self.sent_len, self.batch_size, self.input_dim) self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = RecurrentOpTest3.PySimpleRNN3(self.input_shape, - self.output_shape) + self.py_rnn = RecurrentOpMultipleMemoryTest.PySimpleRNN3( + self.input_shape, self.output_shape) - self.output = mean(x=self.create_rnn_op(), **self.p_info) + self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) def create_rnn_op(self): - x = data( + x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], data_type='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False - h_boot1 = data( + h_boot1 = layers.data( shape=[self.batch_size, self.input_dim], data_type='float32', name='h_boot1', append_batch_size=False, **self.p_info) h_boot1.stop_gradient = False - h_boot2 = data( + h_boot2 = layers.data( shape=[self.batch_size, self.input_dim], data_type='float32', name='h_boot2', @@ -373,15 +370,15 @@ class RecurrentOpTest3(RecurrentOpTest1): **self.p_info) h_boot2.stop_gradient = False - rnn = StaticRNN(main_program=self.main_program) + rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre1 = rnn.memory(init=h_boot1) h_pre2 = rnn.memory(init=h_boot2) x_t = rnn.step_input(x) - mem1 = scale(x=h_pre1, scale=1.0, **self.p_info) - mem2 = scale(x=h_pre2, scale=1.0, **self.p_info) - out = sums(input=[mem1, x_t, mem2], **self.p_info) + mem1 = layers.scale(x=h_pre1, scale=1.0, **self.p_info) + mem2 = layers.scale(x=h_pre2, scale=1.0, **self.p_info) + out = layers.sums(input=[mem1, x_t, mem2], **self.p_info) rnn.update_memory(h_pre1, mem1) rnn.update_memory(h_pre2, mem2) @@ -390,5 +387,70 @@ class RecurrentOpTest3(RecurrentOpTest1): return rnn() +class RecurrentOpNoMemBootTest(RecurrentOpTest1): + ''' + Test RNNOp with two memories + equation: + mem = x + mem_pre + y = mem + vars: + - x + memories: + - mem + outputs: + - y + ''' + + class PySimpleRNN4(PyRNNBase): + def __init__(self, input_shape, output_shape): + super(RecurrentOpNoMemBootTest.PySimpleRNN4, self).__init__( + input_shape, output_shape) + men_dim = input_shape + self.mems = np.zeros(shape=men_dim).astype("float32") + + def step(self, step_id, x): + if step_id == 0: + pre_mem = np.zeros_like(x) + else: + pre_mem = self.mems[step_id - 1] + self.mems[step_id] = pre_mem + x + self.y[step_id] = self.mems[step_id] + + input_dim = 1 + batch_size = 1 + sent_len = 2 + + def setUp(self): + self.setup_program() + + self.data_field = {"x"} + + self.input_shape = (self.sent_len, self.batch_size, self.input_dim) + self.output_shape = (self.sent_len, self.batch_size, self.input_dim) + self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(self.input_shape, + self.output_shape) + self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) + print self.main_program + + def create_rnn_op(self): + x = layers.data( + shape=[self.sent_len, self.batch_size, self.input_dim], + data_type='float32', + name='x', + append_batch_size=False, + **self.p_info) + x.stop_gradient = False + + rnn = layers.StaticRNN(main_program=self.main_program) + with rnn.step(): + mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x) + x_t = rnn.step_input(x) + mem = layers.elementwise_add(x=mem_pre, y=x_t, **self.p_info) + rnn.update_memory(mem_pre, mem) + rnn.output(mem) + + return rnn() + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_seq_pool.py b/python/paddle/v2/framework/tests/test_seq_pool.py index efc4920124afb539017a3b3f211c7320da68ffef..512d8b315f29cecf79ae274dca491c240f3447a1 100644 --- a/python/paddle/v2/framework/tests/test_seq_pool.py +++ b/python/paddle/v2/framework/tests/test_seq_pool.py @@ -29,6 +29,9 @@ class TestSeqAvgPool(OpTest): self.check_output() def test_check_grad(self): + # Remove MaxIndex after check_grad is refined. + self.outputs['MaxIndex'] = \ + np.zeros(self.outputs['Out'].shape).astype('int32') self.check_grad(["X"], "Out") @@ -85,31 +88,53 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17)) def test_check_grad(self): + # Remove MaxIndex after check_grad is refined. + self.outputs['MaxIndex'] = \ + np.zeros(self.outputs['Out'].shape).astype('int32') self.check_grad(["X"], "Out", max_relative_error=0.06) class TestSeqMaxPool(TestSeqAvgPool): + def set_data(self): + self.op_type = 'sequence_pool' + x = np.random.uniform(0.1, 1, [13, 23]).astype('float32') + lod = [[0, 4, 5, 8, 13]] + for i in range(4): + l = lod[0][i + 1] - lod[0][i] + x[lod[0][i] + np.random.randint(l), :] += 2.0 + + self.inputs = {'X': (x, lod)} + + out = np.zeros((4, 23)).astype('float32') + self.outputs = {'Out': out} + return x, lod, out + def compute(self, x, lod, out): self.attrs = {'pooltype': "MAX"} for i in range(4): sub_x = x[lod[0][i]:lod[0][i + 1], :] out[i] = np.amax(sub_x, axis=0) - def test_check_grad(self): - # Remove MaxPool2D from gradient check to confirm the success of CI. - return - class TestSeqMaxPool2D(TestSeqAvgPool2D): + def set_data(self): + self.op_type = 'sequence_pool' + x = np.random.uniform(0.1, 1, [13, 3, 11]).astype('float32') + lod = [[0, 4, 5, 8, 13]] + self.inputs = {'X': (x, lod)} + for i in range(4): + l = lod[0][i + 1] - lod[0][i] + x[lod[0][i] + np.random.randint(l), :] += 1.0 + + out = np.zeros((4, 3, 11)).astype('float32') + self.outputs = {'Out': out} + return x, lod, out + def compute(self, x, lod, out): self.attrs = {'pooltype': "MAX"} for i in range(4): - sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) - out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 17)) - - def test_check_grad(self): - # Remove MaxPool2D from gradient check to confirm the success of CI. - return + sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 11)) + out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11)) class TestSeqLastPool(TestSeqAvgPool): diff --git a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py index f93feb20696f126423bc9412eab3b4aa41b19426..c2f07f9096c69f3d4977f9444bdd5dcda8028973 100644 --- a/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_with_cross_entropy_op.py @@ -12,30 +12,30 @@ class TestSoftmaxWithCrossEntropyOp(OpTest): def setUp(self): self.op_type = "softmax_with_cross_entropy" - batch_size = 3 + batch_size = 2 class_num = 37 logits = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + [batch_size, class_num]).astype("float64") softmax = np.apply_along_axis(stable_softmax, 1, logits) - labels = np.random.randint(0, class_num, [batch_size, 1], dtype="int32") + labels = np.random.randint(0, class_num, [batch_size, 1], dtype="int64") cross_entropy = np.asmatrix( [[-np.log(softmax[i][labels[i][0]])] for i in range(softmax.shape[0])], - dtype="float32") + dtype="float64") self.inputs = {"Logits": logits, "Label": labels} self.outputs = { - "Softmax": softmax.astype('float32'), - "Loss": cross_entropy.astype('float32') + "Softmax": softmax.astype("float64"), + "Loss": cross_entropy.astype("float64") } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss", max_relative_error=0.05) + self.check_grad(["Logits"], "Loss") class TestSoftmaxWithCrossEntropyOp2(OpTest): @@ -49,19 +49,19 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest): class_num = 37 logits = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + [batch_size, class_num]).astype("float64") softmax = np.apply_along_axis(stable_softmax, 1, logits) labels = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + [batch_size, class_num]).astype("float64") labels /= np.sum(labels, axis=1, keepdims=True) cross_entropy = (-labels * np.log(softmax)).sum( - axis=1, keepdims=True).astype("float32") + axis=1, keepdims=True).astype("float64") self.inputs = {"Logits": logits, "Label": labels} self.outputs = { - "Softmax": softmax.astype('float32'), - "Loss": cross_entropy.astype('float32') + "Softmax": softmax.astype("float64"), + "Loss": cross_entropy.astype("float64") } self.attrs = {"soft_label": True} @@ -69,9 +69,8 @@ class TestSoftmaxWithCrossEntropyOp2(OpTest): self.check_output() def test_check_grad(self): - self.check_grad(["Logits"], "Loss", max_relative_error=0.05) + self.check_grad(["Logits"], "Loss") if __name__ == "__main__": - exit(0) # FIXME: xe has bug unittest.main() diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 29f0945eb4c88eab8fa9ee83f455190dfd473aa4..94d706b1d6289a7bffbdfb161c35d44c78fdf46f 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -11,11 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Optimizers(update equation) for SGD method. - -TODO(yuyang18): Complete comments. -""" import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.optimizers as v1_optimizers @@ -101,32 +96,37 @@ class Optimizer(object): class Momentum(Optimizer): """ - SGD Optimizer. - - SGD is an optimization method, trying to find a neural network that - minimize the "cost/error" of it by iteration. In paddle's implementation - SGD Optimizer is synchronized, which means all gradients will be wait to - calculate and reduced into one gradient, then do optimize operation. + Momentum Optimizer. - The neural network consider the learning problem of minimizing an objective - function, that has the form of a sum + When sparse=False, the momentum update formula is as follows: .. math:: - Q(w) = \\sum_{i}^{n} Q_i(w) + v_{t} &= k * v_{t-1} - \\gamma_t / (g_{t} + \\lambda w_{t-1}) \\\\ + w_{t} &= w_{t-1} + v_{t} \\\\ - The value of function Q sometimes is the cost of neural network (Mean - Square Error between prediction and label for example). The function Q is - parametrised by w, the weight/bias of neural network. And weights is what to - be learned. The i is the i-th observation in (trainning) data. + where, :math:`k` is momentum, :math:`\\lambda` is decay rate, + :math:`\\gamma_t` is learning rate at the t'th iteration. + :math:`w_{t}` is the weight as the t'th iteration. + And the :math:`v_{t}` is the history momentum variable. - So, the SGD method will optimize the weight by + When sparse=True, the update scheme: .. math:: - w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w) - - where :math:`\\eta` is learning rate. And :math:`n` is batch size. + \\alpha_t &= \\alpha_{t-1} / k \\\\ + \\beta_t &= \\beta_{t-1} / (1 + \\lambda \\gamma_t) \\\\ + u_t &= u_{t-1} - \\alpha_t \\gamma_t g_t \\\\ + v_t &= v_{t-1} + \\tau_{t-1} \\alpha_t \\gamma_t g_t \\\\ + \\tau_t &= \\tau_{t-1} + \\beta_t / \\alpha_t + + where :math:`k` is momentum, :math:`\\lambda` is decay rate, + :math:`\\gamma_t` is learning rate at the t'th iteration. + + :param momentum: the momentum factor. + :type momentum: float + :param sparse: with sparse support or not, False by default. + :type sparse: bool """ def __init__(self, momentum=None, sparse=False, **kwargs): @@ -146,7 +146,7 @@ class Adam(Optimizer): m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\ v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\ - w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}} + w & = w - \\frac{\\eta m(w, t)}{\\sqrt{v(w,t) + \\epsilon}} :param beta1: the :math:`\\beta_1` in equation. :type beta1: float diff --git a/python/setup.py.in b/python/setup.py.in index 87b3823e52604b889cdee76bc696a1ae9b9de802..5348c2d8d7e9b5adc5fe93e2943bef149ba047cc 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,4 +1,4 @@ -from setuptools import setup, Distribution +from setuptools import setup, Distribution, Extension class BinaryDistribution(Distribution): def has_ext_modules(foo): return True @@ -41,6 +41,7 @@ setup(name='paddlepaddle', description='Parallel Distributed Deep Learning', install_requires=setup_requires, packages=packages, + ext_modules=[Extension('_foo', ['stub.cc'])], package_data={ 'paddle.v2.master': ['libpaddle_master.so'], 'paddle.v2.framework': ['core.so'], @@ -54,6 +55,5 @@ setup(name='paddlepaddle', 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' }, scripts=paddle_bins, - distclass=BinaryDistribution, data_files=[(paddle_rt_lib_dir, paddle_rt_libs)] )