提交 df84e39a 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_add_FillZerosLikeOp_test

...@@ -17,10 +17,14 @@ ...@@ -17,10 +17,14 @@
- id: detect-private-key - id: detect-private-key
files: (?!.*third_party)^.*$ | (?!.*book)^.*$ files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- id: end-of-file-fixer - id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git - repo: local
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
hooks: hooks:
- id: clang-formater - id: clang-format
name: clang-format
description: Format files with ClangFormat.
entry: clang-format -i
language: system
files: \.(c|cc|cxx|cpp|h|hpp|hxx)$
- repo: https://github.com/PaddlePaddle/pre-commit-golang - repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 8337620115c25ff8333f1b1a493bd031049bd7c0 sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
hooks: hooks:
......
...@@ -36,8 +36,8 @@ include(simd) ...@@ -36,8 +36,8 @@ include(simd)
################################ Configurations ####################################### ################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND})
option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
......
...@@ -27,7 +27,7 @@ RUN apt-get update && \ ...@@ -27,7 +27,7 @@ RUN apt-get update && \
git python-pip python-dev openssh-server bison \ git python-pip python-dev openssh-server bison \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-numpy python-matplotlib gcc g++ \ python-numpy python-matplotlib gcc-4.8 g++-4.8 \
automake locales clang-format-3.8 swig doxygen cmake \ automake locales clang-format-3.8 swig doxygen cmake \
liblapack-dev liblapacke-dev libboost-dev \ liblapack-dev liblapacke-dev libboost-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \
......
...@@ -72,7 +72,7 @@ We provide [English](http://doc.paddlepaddle.org/develop/doc/) and ...@@ -72,7 +72,7 @@ We provide [English](http://doc.paddlepaddle.org/develop/doc/) and
- [Deep Learning 101](http://book.paddlepaddle.org/index.html) - [Deep Learning 101](http://book.paddlepaddle.org/index.html)
You might want to start from the this online interactive book that can run in Jupyter Notebook. You might want to start from this online interactive book that can run in Jupyter Notebook.
- [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) - [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
......
...@@ -20,34 +20,30 @@ INCLUDE(ExternalProject) ...@@ -20,34 +20,30 @@ INCLUDE(ExternalProject)
SET(MKLDNN_PROJECT "extern_mkldnn") SET(MKLDNN_PROJECT "extern_mkldnn")
SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET(MKLDNN_INSTALL_ROOT ${CMAKE_INSTALL_PREFIX}) SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn)
IF(NOT "$ENV{HOME}" STREQUAL "/root") SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
SET(MKLDNN_INSTALL_ROOT "$ENV{HOME}")
ENDIF()
SET(MKLDNN_INSTALL_DIR "${MKLDNN_INSTALL_ROOT}/opt/paddle/third_party/mkldnn")
SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE)
IF(WIN32) IF(WIN32 OR APPLE)
MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet." MESSAGE(WARNING
"Force WITH_MKLDNN=OFF") "Windows or Mac is not supported with MKLDNN in Paddle yet."
SET(WITH_MKLDNN OFF) "Force WITH_MKLDNN=OFF")
SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in Windows and MacOS" FORCE)
return() return()
ELSE(WIN32) ENDIF()
SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path")
#SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR})
IF(${CBLAS_PROVIDER} STREQUAL "MKLML") IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) SET(MKLDNN_DEPENDS ${MKLML_PROJECT})
SET(MKLDNN_MKLROOT ${MKLML_ROOT}) SET(MKLDNN_MKLROOT ${MKLML_ROOT})
SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB})
SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR})
MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}")
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
...@@ -57,16 +53,15 @@ ExternalProject_Add( ...@@ -57,16 +53,15 @@ ExternalProject_Add(
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "v0.9" GIT_TAG "v0.9"
PREFIX ${MKLDNN_SOURCES_DIR} PREFIX ${MKLDNN_SOURCES_DIR}
CONFIGURE_COMMAND mkdir -p <SOURCE_DIR>/build
BUILD_COMMAND cd <SOURCE_DIR>/build
&& cmake .. -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKLDNN_MKLROOT}
&& $(MAKE)
INSTALL_COMMAND cd <SOURCE_DIR>/build && $(MAKE) install
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
-DMKLROOT:PATH=${MKLDNN_MKLROOT}
) )
ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY}) SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIBRARY}") MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}")
LIST(APPEND external_project_dependencies mkldnn) LIST(APPEND external_project_dependencies mkldnn)
...@@ -16,19 +16,23 @@ IF(NOT ${WITH_MKLML}) ...@@ -16,19 +16,23 @@ IF(NOT ${WITH_MKLML})
return() return()
ENDIF(NOT ${WITH_MKLML}) ENDIF(NOT ${WITH_MKLML})
IF(WIN32 OR APPLE)
MESSAGE(WARNING
"Windows or Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE)
return()
ENDIF()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(MKLML_PROJECT "extern_mklml") SET(MKLML_PROJECT "extern_mklml")
SET(MKLML_VER "mklml_lnx_2018.0.20170425") SET(MKLML_VER "mklml_lnx_2018.0.20170720")
SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz") SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
SET(MKLML_DST_DIR "opt/paddle/third_party/mklml") SET(MKLML_DST_DIR "mklml")
SET(MKLML_INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}") SET(MKLML_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
IF(NOT "$ENV{HOME}" STREQUAL "/root")
SET(MKLML_INSTALL_ROOT "$ENV{HOME}")
ENDIF()
SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR}) SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR})
SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER}) SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER})
SET(MKLML_INC_DIR ${MKLML_ROOT}/include) SET(MKLML_INC_DIR ${MKLML_ROOT}/include)
......
...@@ -9,6 +9,11 @@ function(CheckCompilerCXX11Flag) ...@@ -9,6 +9,11 @@ function(CheckCompilerCXX11Flag)
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
endif() endif()
# TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem.
# Use Debug mode instead for now.
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9)
set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE)
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang"
# Apple Clang is a different compiler than upstream Clang which havs different version numbers. # Apple Clang is a different compiler than upstream Clang which havs different version numbers.
......
...@@ -104,6 +104,11 @@ cross_channel_norm ...@@ -104,6 +104,11 @@ cross_channel_norm
------------------ ------------------
.. autoclass:: paddle.v2.layer.cross_channel_norm .. autoclass:: paddle.v2.layer.cross_channel_norm
:noindex: :noindex:
row_l2_norm
-----------
.. autoclass:: paddle.v2.layer.row_l2_norm
:noindex:
Recurrent Layers Recurrent Layers
================ ================
...@@ -320,6 +325,11 @@ scaling ...@@ -320,6 +325,11 @@ scaling
.. autoclass:: paddle.v2.layer.scaling .. autoclass:: paddle.v2.layer.scaling
:noindex: :noindex:
clip
----
.. autoclass:: paddle.v2.layer.clip
:noindex:
slope_intercept slope_intercept
--------------- ---------------
.. autoclass:: paddle.v2.layer.slope_intercept .. autoclass:: paddle.v2.layer.slope_intercept
......
...@@ -15,7 +15,6 @@ if(Boost_FOUND) ...@@ -15,7 +15,6 @@ if(Boost_FOUND)
add_subdirectory(platform) add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(pybind)
endif() endif()
if(WITH_C_API) if(WITH_C_API)
......
...@@ -1022,6 +1022,15 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, ...@@ -1022,6 +1022,15 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real alpha = 1.0f; real alpha = 1.0f;
real beta = 1.0f; real beta = 1.0f;
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
int batch_size = ((cudnn_tensor_descriptor)inputDesc)->batch_size;
if (batch_size > 1024 && g_cudnn_lib_version < 6000) {
LOG(INFO) << " To process current batch data with size " << batch_size
<< " (>1024), cudnnBatchNorm requires cuDNN version >= 6000."
<< " If there is an error complaining CUDNN_STATUS_NOT_SUPPORTED,"
<< " just recompile PaddlePaddle with cuDNN >= 6000, replacing"
<< " current version " << g_cudnn_lib_version;
}
CHECK_CUDNN( CHECK_CUDNN(
dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle, dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle,
mode, mode,
......
...@@ -31,8 +31,14 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc. ...@@ -31,8 +31,14 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init) add_dependencies(framework_py_proto framework_py_proto_init)
cc_library(net SRCS net.cc DEPS op_registry) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(net_op_test SRCS net_op_test.cc DEPS net)
cc_library(backward SRCS backward.cc DEPS net)
cc_test(backward_test SRCS backward_test.cc DEPS backward) cc_test(backward_test SRCS backward_test.cc DEPS backward)
cc_library(paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python backward
fc_op
sgd_op
add_op
mean_op
cross_entropy_op
recurrent_op)
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include <list> #include <list>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -32,7 +32,7 @@ static bool AllInSet(const std::vector<std::string>& names, ...@@ -32,7 +32,7 @@ static bool AllInSet(const std::vector<std::string>& names,
} }
static std::shared_ptr<OperatorBase> NOP() { static std::shared_ptr<OperatorBase> NOP() {
auto net_op = std::make_shared<NetOp>(); auto net_op = std::make_shared<operators::NetOp>();
net_op->type_ = "@NOP@"; net_op->type_ = "@NOP@";
net_op->CompleteAddOp(); net_op->CompleteAddOp();
return net_op; return net_op;
...@@ -77,11 +77,11 @@ std::shared_ptr<OperatorBase> BackwardRecursive( ...@@ -77,11 +77,11 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
} }
// Returned gradient network // Returned gradient network
auto net = std::make_shared<NetOp>(); auto net = std::make_shared<operators::NetOp>();
if (forwardOp.IsNetOp()) { if (forwardOp.IsNetOp()) {
// Because forwardOp is a net op, it can static_cast. // Because forwardOp is a net op, it can static_cast.
auto& forwardNet = static_cast<const NetOp&>(forwardOp); auto& forwardNet = static_cast<const operators::NetOp&>(forwardOp);
// Map from output gradient variable name to operator's indices in backward // Map from output gradient variable name to operator's indices in backward
// net. That operator generates that variable. // net. That operator generates that variable.
...@@ -168,6 +168,9 @@ std::shared_ptr<OperatorBase> Backward( ...@@ -168,6 +168,9 @@ std::shared_ptr<OperatorBase> Backward(
std::unordered_set<std::string> no_grad_names; std::unordered_set<std::string> no_grad_names;
no_grad_names.reserve(no_grad_vars.size()); no_grad_names.reserve(no_grad_vars.size());
no_grad_names.insert(OperatorBase::EMPTY_VAR_NAME() +
OperatorBase::GRAD_VAR_SUFFIX());
for (auto& name : no_grad_vars) { for (auto& name : no_grad_vars) {
no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX()); no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX());
} }
......
...@@ -15,8 +15,9 @@ ...@@ -15,8 +15,9 @@
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/type_alias.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -70,7 +71,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { ...@@ -70,7 +71,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class FcOp : public NetOp { class FcOp : public ops::NetOp {
public: public:
void Init() override { void Init() override {
AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")},
...@@ -182,7 +183,8 @@ TEST(Backward, simple_op_not_need_grad) { ...@@ -182,7 +183,8 @@ TEST(Backward, simple_op_not_need_grad) {
auto no_input_gop = f::Backward(*fwd, {"X", "b"}); auto no_input_gop = f::Backward(*fwd, {"X", "b"});
ASSERT_NE(no_input_gop, nullptr); ASSERT_NE(no_input_gop, nullptr);
ASSERT_TRUE(no_input_gop->IsNetOp()); ASSERT_TRUE(no_input_gop->IsNetOp());
ASSERT_EQ(0UL, std::static_pointer_cast<f::NetOp>(no_input_gop)->ops_.size()); ASSERT_EQ(0UL,
std::static_pointer_cast<ops::NetOp>(no_input_gop)->ops_.size());
} }
TEST(Backward, net_fc_backward_normal) { TEST(Backward, net_fc_backward_normal) {
...@@ -191,7 +193,7 @@ TEST(Backward, net_fc_backward_normal) { ...@@ -191,7 +193,7 @@ TEST(Backward, net_fc_backward_normal) {
ASSERT_NE(fwd, nullptr); ASSERT_NE(fwd, nullptr);
std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {}); std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {});
ASSERT_TRUE(gop->IsNetOp()); ASSERT_TRUE(gop->IsNetOp());
auto net = static_cast<f::NetOp *>(gop.get()); auto net = static_cast<ops::NetOp *>(gop.get());
ASSERT_NO_THROW(net->DebugString()); ASSERT_NO_THROW(net->DebugString());
...@@ -214,7 +216,7 @@ TEST(Backward, net_fc_backward_not_have_b) { ...@@ -214,7 +216,7 @@ TEST(Backward, net_fc_backward_not_have_b) {
ASSERT_NE(fwd, nullptr); ASSERT_NE(fwd, nullptr);
std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {}); std::shared_ptr<f::OperatorBase> gop = f::Backward(*fwd, {});
ASSERT_TRUE(gop->IsNetOp()); ASSERT_TRUE(gop->IsNetOp());
auto net = static_cast<f::NetOp *>(gop.get()); auto net = static_cast<ops::NetOp *>(gop.get());
ASSERT_NO_THROW(net->DebugString()); ASSERT_NO_THROW(net->DebugString());
...@@ -228,7 +230,7 @@ TEST(Backward, net_fc_backward_not_have_b) { ...@@ -228,7 +230,7 @@ TEST(Backward, net_fc_backward_not_have_b) {
} }
TEST(Backward, net_input_of_network_not_need_grad) { TEST(Backward, net_input_of_network_not_need_grad) {
f::NetOp net; ops::NetOp net;
net.AddOp(f::OpRegistry::CreateOp("fc", {"X", "W1", "b1"}, net.AddOp(f::OpRegistry::CreateOp("fc", {"X", "W1", "b1"},
{"mul_tmp_0", "add_tmp_0", "hidden0"}, {})); {"mul_tmp_0", "add_tmp_0", "hidden0"}, {}));
net.AddOp(f::OpRegistry::CreateOp("fc", {"hidden0", "W2", "b2"}, net.AddOp(f::OpRegistry::CreateOp("fc", {"hidden0", "W2", "b2"},
...@@ -236,7 +238,7 @@ TEST(Backward, net_input_of_network_not_need_grad) { ...@@ -236,7 +238,7 @@ TEST(Backward, net_input_of_network_not_need_grad) {
net.CompleteAddOp(); net.CompleteAddOp();
auto bwd = Backward(net, {"X"}); // X@GRAD is not need. auto bwd = Backward(net, {"X"}); // X@GRAD is not need.
ASSERT_TRUE(bwd->IsNetOp()); ASSERT_TRUE(bwd->IsNetOp());
auto bwd_net = static_cast<f::NetOp *>(bwd.get()); auto bwd_net = static_cast<ops::NetOp *>(bwd.get());
std::unordered_set<std::string> all_output = std::unordered_set<std::string>( std::unordered_set<std::string> all_output = std::unordered_set<std::string>(
bwd_net->outputs_.begin(), bwd_net->outputs_.end()); bwd_net->outputs_.begin(), bwd_net->outputs_.end());
...@@ -253,7 +255,7 @@ TEST(Backward, net_input_of_network_not_need_grad) { ...@@ -253,7 +255,7 @@ TEST(Backward, net_input_of_network_not_need_grad) {
ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_EQ(2UL, bwd_net->ops_.size());
ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
auto first_fc_grad = static_cast<f::NetOp *>(bwd_net->ops_[1].get()); auto first_fc_grad = static_cast<ops::NetOp *>(bwd_net->ops_[1].get());
ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(3UL, first_fc_grad->ops_.size());
ASSERT_EQ( ASSERT_EQ(
f::OperatorBase::EMPTY_VAR_NAME(), f::OperatorBase::EMPTY_VAR_NAME(),
...@@ -261,14 +263,14 @@ TEST(Backward, net_input_of_network_not_need_grad) { ...@@ -261,14 +263,14 @@ TEST(Backward, net_input_of_network_not_need_grad) {
} }
TEST(Backward, net_shared_weight) { TEST(Backward, net_shared_weight) {
f::NetOp net; ops::NetOp net;
net.AddOp(f::OpRegistry::CreateOp("mul", {"X", "W"}, {"Out"}, {})); net.AddOp(f::OpRegistry::CreateOp("mul", {"X", "W"}, {"Out"}, {}));
net.AddOp(f::OpRegistry::CreateOp("mul", {"Out", "W"}, {"FinalOut"}, {})); net.AddOp(f::OpRegistry::CreateOp("mul", {"Out", "W"}, {"FinalOut"}, {}));
net.CompleteAddOp(); net.CompleteAddOp();
auto bwd = f::Backward(net, {}); auto bwd = f::Backward(net, {});
ASSERT_TRUE(bwd->IsNetOp()); ASSERT_TRUE(bwd->IsNetOp());
auto bwd_net = static_cast<f::NetOp *>(bwd.get()); auto bwd_net = static_cast<ops::NetOp *>(bwd.get());
ASSERT_EQ(3UL, bwd_net->ops_.size()); ASSERT_EQ(3UL, bwd_net->ops_.size());
ASSERT_EQ("add", bwd_net->ops_[2]->type_); ASSERT_EQ("add", bwd_net->ops_[2]->type_);
} }
...@@ -285,7 +287,7 @@ TEST(Backward, op_all_input_are_not_need) { ...@@ -285,7 +287,7 @@ TEST(Backward, op_all_input_are_not_need) {
auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {});
auto backward = f::Backward(*fwd, {"X", "b"}); auto backward = f::Backward(*fwd, {"X", "b"});
ASSERT_TRUE(backward->IsNetOp()); ASSERT_TRUE(backward->IsNetOp());
auto net = static_cast<f::NetOp *>(backward.get()); auto net = static_cast<ops::NetOp *>(backward.get());
ASSERT_TRUE(net->ops_.empty()); ASSERT_TRUE(net->ops_.empty());
} }
...@@ -293,7 +295,7 @@ TEST(Backward, op_all_output_are_not_need) { ...@@ -293,7 +295,7 @@ TEST(Backward, op_all_output_are_not_need) {
auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {});
auto backward = f::Backward(*fwd, {"Out"}); auto backward = f::Backward(*fwd, {"Out"});
ASSERT_TRUE(backward->IsNetOp()); ASSERT_TRUE(backward->IsNetOp());
auto net = static_cast<f::NetOp *>(backward.get()); auto net = static_cast<ops::NetOp *>(backward.get());
ASSERT_TRUE(net->ops_.empty()); ASSERT_TRUE(net->ops_.empty());
} }
...@@ -301,7 +303,7 @@ TEST(Backward, op_part_of_output_are_not_need) { ...@@ -301,7 +303,7 @@ TEST(Backward, op_part_of_output_are_not_need) {
auto fwd = f::OpRegistry::CreateOp("many_output_op", {"X"}, {"Y", "Z"}, {}); auto fwd = f::OpRegistry::CreateOp("many_output_op", {"X"}, {"Y", "Z"}, {});
auto backward = f::Backward(*fwd, {"Z"}); auto backward = f::Backward(*fwd, {"Z"});
ASSERT_TRUE(backward->IsNetOp()); ASSERT_TRUE(backward->IsNetOp());
auto net = static_cast<f::NetOp *>(backward.get()); auto net = static_cast<ops::NetOp *>(backward.get());
ASSERT_EQ(net->ops_.size(), 2UL); ASSERT_EQ(net->ops_.size(), 2UL);
auto &fill_zero = *net->ops_[0]; auto &fill_zero = *net->ops_[0];
...@@ -341,7 +343,7 @@ TEST(Backward, op_part_of_input_are_not_need) { ...@@ -341,7 +343,7 @@ TEST(Backward, op_part_of_input_are_not_need) {
} }
TEST(Backward, linear_net_intermediate_variable_has_no_grad) { TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
f::NetOp net; ops::NetOp net;
net.AddOp(f::OpRegistry::CreateOp("fc", {"x1", "w1", "b1"}, net.AddOp(f::OpRegistry::CreateOp("fc", {"x1", "w1", "b1"},
{"mul_out1", "add_out1", "out1"}, {})); {"mul_out1", "add_out1", "out1"}, {}));
net.AddOp(f::OpRegistry::CreateOp("fc", {"out1", "w2", "b2"}, net.AddOp(f::OpRegistry::CreateOp("fc", {"out1", "w2", "b2"},
...@@ -351,7 +353,7 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ...@@ -351,7 +353,7 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
net.CompleteAddOp(); net.CompleteAddOp();
auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"});
ASSERT_TRUE(backward->IsNetOp()); ASSERT_TRUE(backward->IsNetOp());
auto bwd_net = static_cast<f::NetOp *>(backward.get()); auto bwd_net = static_cast<ops::NetOp *>(backward.get());
ASSERT_EQ(bwd_net->ops_.size(), 3UL); ASSERT_EQ(bwd_net->ops_.size(), 3UL);
auto &grad_fc = *bwd_net->ops_[0]; auto &grad_fc = *bwd_net->ops_[0];
EXPECT_EQ(grad_fc.inputs_.size(), EXPECT_EQ(grad_fc.inputs_.size(),
......
...@@ -400,6 +400,14 @@ class GradOpRegisterHelper { ...@@ -400,6 +400,14 @@ class GradOpRegisterHelper {
return 0; \ return 0; \
} }
/**
* Macro to Forbid user register Gradient Operator.
*/
#define NO_GRADIENT(__op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_gradient_op__##__op_type##__op_type##_grad, \
"NO_GRADIENT must be in global namespace")
/** /**
* Macro to Register OperatorKernel. * Macro to Register OperatorKernel.
*/ */
......
...@@ -20,16 +20,16 @@ namespace paddle { ...@@ -20,16 +20,16 @@ namespace paddle {
namespace framework { namespace framework {
template <> template <>
Eigen::DefaultDevice* ExecutionContext::GetEigenDevice< Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
platform::CPUPlace, Eigen::DefaultDevice>() const { platform::CPUPlace, Eigen::DefaultDevice>() const {
return device_context_.get_eigen_device<Eigen::DefaultDevice>(); return *device_context_.get_eigen_device<Eigen::DefaultDevice>();
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
template <> template <>
Eigen::GpuDevice* Eigen::GpuDevice&
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const { ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
return device_context_.get_eigen_device<Eigen::GpuDevice>(); return *device_context_.get_eigen_device<Eigen::GpuDevice>();
} }
#endif #endif
...@@ -52,7 +52,8 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const { ...@@ -52,7 +52,8 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "IO Idx could not be nullptr"); PADDLE_ENFORCE(in_out_idxs_ != nullptr, "IO Idx could not be nullptr");
auto input_format = GetAttr<std::vector<int>>("input_format"); auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_->at(name); auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(input_format.at((size_t)offset + 1) <= (int)inputs_.size(), PADDLE_ENFORCE(input_format.at(static_cast<size_t>(offset) + 1) <=
static_cast<int>(inputs_.size()),
"Input Out Of Range"); "Input Out Of Range");
return std::vector<std::string>{ return std::vector<std::string>{
...@@ -78,7 +79,8 @@ std::vector<std::string> OperatorBase::Outputs(const std::string& name) const { ...@@ -78,7 +79,8 @@ std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr"); PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr");
auto output_format = GetAttr<std::vector<int>>("output_format"); auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_->at(name); auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(output_format.at((size_t)offset + 1) <= (int)outputs_.size(), PADDLE_ENFORCE(output_format.at(static_cast<size_t>(offset) + 1) <=
static_cast<int>(outputs_.size()),
"Output Out of Range"); "Output Out of Range");
return std::vector<std::string>{ return std::vector<std::string>{
outputs_.begin() + output_format.at(offset), outputs_.begin() + output_format.at(offset),
......
...@@ -55,6 +55,10 @@ class OperatorBase { ...@@ -55,6 +55,10 @@ class OperatorBase {
/// e.g. Variable "x@GRAD" is the gradient of varibale "x". /// e.g. Variable "x@GRAD" is the gradient of varibale "x".
static std::string GRAD_VAR_SUFFIX() { return "@GRAD"; } static std::string GRAD_VAR_SUFFIX() { return "@GRAD"; }
static std::string GRAD_VAR_NAME(const std::string& name) {
return name + GRAD_VAR_SUFFIX();
}
/// Variables with this suffix are supposed to be filled up with zeros. /// Variables with this suffix are supposed to be filled up with zeros.
static std::string ZERO_VAR_SUFFIX() { return "@ZERO"; } static std::string ZERO_VAR_SUFFIX() { return "@ZERO"; }
...@@ -161,22 +165,30 @@ class OperatorContext { ...@@ -161,22 +165,30 @@ class OperatorContext {
template <typename T> template <typename T>
const T* Input(const size_t index) const { const T* Input(const size_t index) const {
return &(InputVar(index)->Get<T>()); auto var = InputVar(index);
PADDLE_ENFORCE(var != nullptr, "Input(%d) should not be nullptr", index);
return &var->Get<T>();
} }
template <typename T> template <typename T>
T* Output(const size_t index) const { T* Output(const size_t index) const {
return OutputVar(index)->GetMutable<T>(); auto var = OutputVar(index);
PADDLE_ENFORCE(var != nullptr, "Output(%d) should not be nullptr", index);
return var->GetMutable<T>();
} }
template <typename T> template <typename T>
const T* Input(const std::string& name) const { const T* Input(const std::string& name) const {
return &(InputVar(name)->Get<T>()); auto var = InputVar(name);
PADDLE_ENFORCE(var != nullptr, "Input(%s) should not be nullptr", name);
return &var->Get<T>();
} }
template <typename T> template <typename T>
T* Output(const std::string& name) const { T* Output(const std::string& name) const {
return OutputVar(name)->GetMutable<T>(); auto var = OutputVar(name);
PADDLE_ENFORCE(var != nullptr, "Output(%s) should not be nullptr", name);
return var->GetMutable<T>();
} }
template <typename T> template <typename T>
...@@ -185,8 +197,12 @@ class OperatorContext { ...@@ -185,8 +197,12 @@ class OperatorContext {
std::vector<const T*> res; std::vector<const T*> res;
res.reserve(names.size()); res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res), std::transform(names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) { [&](const std::string& sub_name) {
return &scope_.FindVar(name)->Get<T>(); auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr,
"MultiInput(%s:%s) should not be nullptr",
name, sub_name);
return &var->Get<T>();
}); });
return res; return res;
} }
...@@ -197,8 +213,12 @@ class OperatorContext { ...@@ -197,8 +213,12 @@ class OperatorContext {
std::vector<const T*> res; std::vector<const T*> res;
res.reserve(names.size()); res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res), std::transform(names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) { [&](const std::string& sub_name) {
return scope_.FindVar(name)->GetMutable<T>(); auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr,
"MultiOutput(%s:%s) should not be nullptr",
name, sub_name);
return var->GetMutable<T>();
}); });
return res; return res;
} }
...@@ -237,7 +257,7 @@ class ExecutionContext : public OperatorContext { ...@@ -237,7 +257,7 @@ class ExecutionContext : public OperatorContext {
template <typename PlaceType, template <typename PlaceType,
typename DeviceType = typename DeviceType =
typename EigenDeviceConverter<PlaceType>::EigenDeviceType> typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
DeviceType* GetEigenDevice() const; DeviceType& GetEigenDevice() const;
platform::Place GetPlace() const { return device_context_.GetPlace(); } platform::Place GetPlace() const { return device_context_.GetPlace(); }
......
...@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); ...@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
...@@ -16,35 +16,43 @@ limitations under the License. */ ...@@ -16,35 +16,43 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include "paddle/framework/net.h" #include "paddle/framework/backward.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include "paddle/pybind/tensor_bind.h" #include "paddle/framework/tensor_py.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/type_alias.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#include "pybind11/numpy.h" #include "pybind11/numpy.h"
#include "pybind11/pybind11.h" #include "pybind11/pybind11.h"
#include "pybind11/stl.h" #include "pybind11/stl.h"
namespace py = pybind11; namespace py = pybind11;
namespace pd = paddle::framework;
USE_OP(add_two); USE_OP(add_two);
USE_OP(onehot_cross_entropy); USE_OP(onehot_cross_entropy);
USE_OP_WITHOUT_KERNEL(fc); USE_OP_WITHOUT_KERNEL(fc);
USE_OP(sgd); USE_OP(sgd);
USE_OP(mul); USE_OP(mul);
USE_OP(mean);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(softmax); USE_OP(softmax);
USE_OP(rowwise_add); USE_OP(rowwise_add);
USE_OP_WITHOUT_KERNEL(recurrent_op); USE_OP_WITHOUT_KERNEL(recurrent_op);
USE_OP(fill_zeros_like); namespace paddle {
namespace framework {
template <typename ClassType> template <typename ClassType>
void ExposeOperator(ClassType& m) { void ExposeOperator(ClassType &m) {
m.def("infer_shape", &ClassType::type::InferShape) m.def("infer_shape", &ClassType::type::InferShape)
.def("run", &ClassType::type::Run) .def("run", &ClassType::type::Run)
.def("type",
[](const typename ClassType::type &op) -> std::string {
return op.type_;
})
.def("outputs", .def("outputs",
[](const typename ClassType::type& op) -> std::vector<std::string> { [](const typename ClassType::type &op) -> std::vector<std::string> {
return op.outputs_; return op.outputs_;
}) })
.def("__str__", &ClassType::type::DebugString); .def("__str__", &ClassType::type::DebugString);
...@@ -55,71 +63,83 @@ static size_t UniqueIntegerGenerator() { ...@@ -55,71 +63,83 @@ static size_t UniqueIntegerGenerator() {
return generator.fetch_add(1); return generator.fetch_add(1);
} }
bool IsCompileGPU() {
#ifdef PADDLE_ONLY_CPU
return false;
#else
return true;
#endif
}
PYBIND11_PLUGIN(core) { PYBIND11_PLUGIN(core) {
py::module m("core", "C++ core of PaddlePaddle"); py::module m("core", "C++ core of PaddlePaddle");
py::class_<pd::Tensor>(m, "Tensor", py::buffer_protocol()) py::class_<Tensor>(m, "Tensor", py::buffer_protocol())
.def_buffer([](pd::Tensor& self) -> py::buffer_info { .def_buffer(
return paddle::pybind::CastToPyBuffer(self); [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
})
.def("get_dims", .def("get_dims",
[](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) [](const Tensor &self) { return vectorize(self.dims()); })
.def("set_dims", .def("set_dims",
[](pd::Tensor& self, const std::vector<int>& dim) { [](Tensor &self, const std::vector<int> &dim) {
self.Resize(pd::make_ddim(dim)); self.Resize(make_ddim(dim));
}) })
.def("alloc_float", .def("alloc_float",
[](pd::Tensor& self) { [](Tensor &self, paddle::platform::GPUPlace &place) {
self.mutable_data<float>(paddle::platform::CPUPlace()); self.mutable_data<float>(place);
})
.def("alloc_float",
[](Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<float>(place);
})
.def("alloc_int",
[](Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<int>(place);
}) })
.def("alloc_int", .def("alloc_int",
[](pd::Tensor& self) { [](Tensor &self, paddle::platform::GPUPlace &place) {
self.mutable_data<int>(paddle::platform::CPUPlace()); self.mutable_data<int>(place);
}) })
.def("set", paddle::pybind::PyTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", paddle::pybind::PyTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("shape", #ifndef PADDLE_ONLY_CPU
[](pd::Tensor& self) { return pd::vectorize(self.dims()); }); .def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>)
#endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); });
py::class_<pd::Variable>(m, "Variable", R"DOC(Variable Class. py::class_<Variable>(m, "Variable", R"DOC(Variable Class.
All parameter, weight, gradient are variables in Paddle. All parameter, weight, gradient are variables in Paddle.
)DOC") )DOC")
.def("is_int", [](const pd::Variable& var) { return var.IsType<int>(); }) .def("is_int", [](const Variable &var) { return var.IsType<int>(); })
.def("set_int", .def("set_int",
[](pd::Variable& var, int val) -> void { [](Variable &var, int val) -> void { *var.GetMutable<int>() = val; })
*var.GetMutable<int>() = val; .def("get_int", [](const Variable &var) -> int { return var.Get<int>(); })
})
.def("get_int",
[](const pd::Variable& var) -> int { return var.Get<int>(); })
.def("get_tensor", .def("get_tensor",
[](pd::Variable& self) -> pd::Tensor* { [](Variable &self) -> Tensor * { return self.GetMutable<Tensor>(); },
return self.GetMutable<pd::Tensor>();
},
py::return_value_policy::reference) py::return_value_policy::reference)
.def("get_net", .def("get_net",
[](pd::Variable& self) -> pd::NetOp* { [](Variable &self) -> ops::NetOp * {
return self.GetMutable<pd::NetOp>(); return self.GetMutable<ops::NetOp>();
}, },
py::return_value_policy::reference); py::return_value_policy::reference);
py::class_<pd::Scope>(m, "Scope", "") py::class_<Scope>(m, "Scope", "")
.def("new_var", .def("new_var",
[](pd::Scope& self, const std::string& name) -> pd::Variable* { [](Scope &self, const std::string &name) -> Variable * {
return self.NewVar(name); return self.NewVar(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("find_var", &pd::Scope::FindVar, py::return_value_policy::reference) .def("find_var", &Scope::FindVar, py::return_value_policy::reference)
.def(py::init<>()) .def(py::init<>())
.def("new_scope", .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
[](pd::Scope& self) -> pd::Scope* { return &self.NewScope(); },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("drop_kids", &pd::Scope::DropKids); .def("drop_kids", &Scope::DropKids);
//! @note: Be careful! PyBind will return std::string as an unicode, not //! @note: Be careful! PyBind will return std::string as an unicode, not
//! Python str. If you want a str object, you should cast them in Python. //! Python str. If you want a str object, you should cast them in Python.
m.def("get_all_op_protos", []() -> std::vector<py::bytes> { m.def("get_all_op_protos", []() -> std::vector<py::bytes> {
auto& protos = pd::OpRegistry::protos(); auto &protos = OpRegistry::protos();
std::vector<py::bytes> ret_values; std::vector<py::bytes> ret_values;
for (auto it = protos.begin(); it != protos.end(); ++it) { for (auto it = protos.begin(); it != protos.end(); ++it) {
PADDLE_ENFORCE(it->second.IsInitialized(), PADDLE_ENFORCE(it->second.IsInitialized(),
...@@ -134,47 +154,76 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -134,47 +154,76 @@ All parameter, weight, gradient are variables in Paddle.
m.def_submodule( m.def_submodule(
"var_names", "var_names",
"The module will return special predefined variable name in Paddle") "The module will return special predefined variable name in Paddle")
.def("empty", pd::OperatorBase::EMPTY_VAR_NAME) .def("empty", OperatorBase::EMPTY_VAR_NAME)
.def("temp", pd::OperatorBase::TMP_VAR_NAME); .def("temp", OperatorBase::TMP_VAR_NAME);
// clang-format off
py::class_<paddle::platform::DeviceContext>(m, "DeviceContext") py::class_<paddle::platform::DeviceContext>(m, "DeviceContext")
.def_static("cpu_context", []() -> paddle::platform::DeviceContext* { .def_static("create",
return new paddle::platform::CPUDeviceContext(); [](paddle::platform::CPUPlace& place)
}); -> paddle::platform::DeviceContext* {
return new paddle::platform::CPUDeviceContext();
py::class_<pd::OperatorBase, std::shared_ptr<pd::OperatorBase>> operator_base( })
.def_static("create",
[](paddle::platform::GPUPlace& place)
-> paddle::platform::DeviceContext* {
#ifdef PADDLE_ONLY_CPU
PADDLE_THROW("GPUPlace is not supported in CPU device.");
#else
return new paddle::platform::CUDADeviceContext(place);
#endif
});
// clang-format on
py::class_<paddle::platform::GPUPlace>(m, "GPUPlace").def(py::init<int>());
py::class_<paddle::platform::CPUPlace>(m, "CPUPlace").def(py::init<>());
py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base(
m, "Operator"); m, "Operator");
operator_base.def_static("create", [](py::bytes protobin) { operator_base.def_static("create", [](py::bytes protobin) {
pd::OpDesc desc; OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc"); "Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(), PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s", "User OpDesc is not initialized, reason %s",
desc.InitializationErrorString()); desc.InitializationErrorString());
return pd::OpRegistry::CreateOp(desc); return OpRegistry::CreateOp(desc);
}); });
operator_base.def("backward",
[](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) {
return Backward(forwardOp, no_grad_vars);
});
ExposeOperator(operator_base); ExposeOperator(operator_base);
py::class_<pd::NetOp, std::shared_ptr<pd::NetOp>> net(m, "Net"); py::class_<ops::NetOp, std::shared_ptr<ops::NetOp>> net(m, "Net");
net.def_static("create", net.def_static("create",
[]() -> std::shared_ptr<pd::NetOp> { []() -> std::shared_ptr<ops::NetOp> {
auto retv = std::make_shared<pd::NetOp>(); auto retv = std::make_shared<ops::NetOp>();
retv->type_ = "plain_net"; retv->type_ = "plain_net";
return retv; return retv;
}) })
.def("add_op", &pd::NetOp::AddOp) .def("add_op", &ops::NetOp::AddOp)
.def("add_op", .def(
[](pd::NetOp& self, const std::shared_ptr<pd::NetOp>& net) -> void { "add_op",
self.AddOp(std::static_pointer_cast<pd::OperatorBase>(net)); [](ops::NetOp &self, const std::shared_ptr<ops::NetOp> &net) -> void {
}) self.AddOp(std::static_pointer_cast<OperatorBase>(net));
.def("complete_add_op", &pd::NetOp::CompleteAddOp) })
.def("complete_add_op", &ops::NetOp::CompleteAddOp)
.def("complete_add_op", .def("complete_add_op",
[](std::shared_ptr<pd::NetOp>& self) { self->CompleteAddOp(); }); [](std::shared_ptr<ops::NetOp> &self) { self->CompleteAddOp(); });
ExposeOperator(net); ExposeOperator(net);
m.def("unique_integer", UniqueIntegerGenerator); m.def("unique_integer", UniqueIntegerGenerator);
m.def("is_compile_gpu", IsCompileGPU);
return m.ptr(); return m.ptr();
} }
} // namespace framework
} // namespace paddle
...@@ -26,19 +26,17 @@ limitations under the License. */ ...@@ -26,19 +26,17 @@ limitations under the License. */
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
namespace paddle { namespace paddle {
namespace pybind {
namespace details { // forward declare
template <bool less, size_t i, typename... args>
struct CastToPyBufferImpl;
} // namespace details
} // namespace pybind
namespace framework { namespace framework {
namespace details {
template <bool less, size_t i, typename... args>
struct CastToPyBufferImpl;
}
class Tensor { class Tensor {
public: public:
template <bool less, size_t i, typename... args> template <bool less, size_t i, typename... args>
friend struct paddle::pybind::details::CastToPyBufferImpl; friend struct details::CastToPyBufferImpl;
template <typename T, size_t D, int MajorType, typename IndexType> template <typename T, size_t D, int MajorType, typename IndexType>
friend struct EigenTensor; friend struct EigenTensor;
...@@ -167,4 +165,4 @@ class Tensor { ...@@ -167,4 +165,4 @@ class Tensor {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#include "paddle/framework/detail/tensor-inl.h" #include "paddle/framework/tensor_impl.h"
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/memory/memcpy.h" #include "paddle/memory/memcpy.h"
namespace paddle { namespace paddle {
...@@ -62,9 +61,11 @@ inline T* Tensor::mutable_data(platform::Place place) { ...@@ -62,9 +61,11 @@ inline T* Tensor::mutable_data(platform::Place place) {
if (platform::is_cpu_place(place)) { if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>( holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size)); boost::get<platform::CPUPlace>(place), size));
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_ONLY_CPU
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
} }
#ifndef PADDLE_ONLY_CPU #else
else if (platform::is_gpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>( holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), size)); boost::get<platform::GPUPlace>(place), size));
} }
......
...@@ -13,15 +13,17 @@ ...@@ -13,15 +13,17 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <paddle/framework/tensor.h> #include <string>
#include <pybind11/numpy.h> #include "paddle/framework/tensor.h"
#include <pybind11/pybind11.h> #include "paddle/memory/memcpy.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
namespace py = pybind11; namespace py = pybind11;
namespace paddle { namespace paddle {
namespace pybind { namespace framework {
namespace details { namespace details {
...@@ -40,9 +42,6 @@ template <size_t I, typename... ARGS> ...@@ -40,9 +42,6 @@ template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<true, I, ARGS...> { struct CastToPyBufferImpl<true, I, ARGS...> {
using CUR_TYPE = typename std::tuple_element<I, std::tuple<ARGS...>>::type; using CUR_TYPE = typename std::tuple_element<I, std::tuple<ARGS...>>::type;
py::buffer_info operator()(framework::Tensor &tensor) { py::buffer_info operator()(framework::Tensor &tensor) {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(tensor.holder_->place()),
"Only CPU tensor can cast to numpy array");
if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) { if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) {
auto dim_vec = framework::vectorize(tensor.dims()); auto dim_vec = framework::vectorize(tensor.dims());
std::vector<size_t> dims_outside; std::vector<size_t> dims_outside;
...@@ -56,14 +55,16 @@ struct CastToPyBufferImpl<true, I, ARGS...> { ...@@ -56,14 +55,16 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
strides[i - 1] = sizeof(CUR_TYPE) * prod; strides[i - 1] = sizeof(CUR_TYPE) * prod;
prod *= dims_outside[i - 1]; prod *= dims_outside[i - 1];
} }
framework::Tensor dst_tensor;
if (paddle::platform::is_gpu_place(tensor.holder_->place())) {
dst_tensor.CopyFrom<CUR_TYPE>(tensor, platform::CPUPlace());
} else if (paddle::platform::is_cpu_place(tensor.holder_->place())) {
dst_tensor = tensor;
}
return py::buffer_info( return py::buffer_info(
tensor.mutable_data<CUR_TYPE>(tensor.holder_->place()), dst_tensor.mutable_data<CUR_TYPE>(dst_tensor.holder_->place()),
sizeof(CUR_TYPE), sizeof(CUR_TYPE), py::format_descriptor<CUR_TYPE>::format(),
py::format_descriptor<CUR_TYPE>::format(), (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides);
(size_t)framework::arity(tensor.dims()),
dims_outside,
strides);
} else { } else {
constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value; constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor); return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
...@@ -77,9 +78,10 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { ...@@ -77,9 +78,10 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
} }
template <typename T> template <typename T>
void PyTensorSetFromArray( void PyCPUTensorSetFromArray(
framework::Tensor &self, framework::Tensor &self,
py::array_t<T, py::array::c_style | py::array::forcecast> array) { py::array_t<T, py::array::c_style | py::array::forcecast> array,
paddle::platform::CPUPlace &place) {
std::vector<int> dims; std::vector<int> dims;
dims.reserve(array.ndim()); dims.reserve(array.ndim());
for (size_t i = 0; i < array.ndim(); ++i) { for (size_t i = 0; i < array.ndim(); ++i) {
...@@ -87,9 +89,28 @@ void PyTensorSetFromArray( ...@@ -87,9 +89,28 @@ void PyTensorSetFromArray(
} }
self.Resize(framework::make_ddim(dims)); self.Resize(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(paddle::platform::CPUPlace()); auto *dst = self.mutable_data<T>(place);
std::memcpy(dst, array.data(), sizeof(T) * array.size()); std::memcpy(dst, array.data(), sizeof(T) * array.size());
} }
#ifndef PADDLE_ONLY_CPU
template <typename T>
void PyCUDATensorSetFromArray(
framework::Tensor &self,
py::array_t<T, py::array::c_style | py::array::forcecast> array,
paddle::platform::GPUPlace &place) {
std::vector<int> dims;
dims.reserve(array.ndim());
for (size_t i = 0; i < array.ndim(); ++i) {
dims.push_back((int)array.shape()[i]);
}
self.Resize(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(place);
paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
cudaMemcpyHostToDevice);
}
#endif
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -109,6 +109,13 @@ protected: ...@@ -109,6 +109,13 @@ protected:
return filter[filter.ndims() - 1]; return filter[filter.ndims() - 1];
} }
// determine whether im2col needs to be performed
inline bool isNeedIm2col(const TensorShape& filter) const {
return !(getFilterHeight(filter) == 1 && getFilterWidth(filter) == 1 &&
strideH() == 1 && strideW() == 1 && paddingH() == 0 &&
paddingW() == 0);
}
std::vector<size_t> strides_; std::vector<size_t> strides_;
std::vector<size_t> paddings_; std::vector<size_t> paddings_;
......
...@@ -66,16 +66,23 @@ public: ...@@ -66,16 +66,23 @@ public:
real* inputData = inputs[0].data<real>(); real* inputData = inputs[0].data<real>();
real* filterData = inputs[1].data<real>(); real* filterData = inputs[1].data<real>();
real* outputData = outputs[0].data<real>(); real* outputData = outputs[0].data<real>();
bool needIm2col = isNeedIm2col(filter);
TensorShape imShape = TensorShape imShape =
TensorShape({inputChannels / groups_, inputHeight, inputWidth}); TensorShape({inputChannels / groups_, inputHeight, inputWidth});
TensorShape colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements()); TensorShape colShape;
real* colData = reinterpret_cast<real*>(memory_->getBuf()); real* colData = NULL;
if (needIm2col) {
colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements());
colData = reinterpret_cast<real*>(memory_->getBuf());
}
Im2ColFunctor<kCFO, Device, real> im2col; Im2ColFunctor<kCFO, Device, real> im2col;
GemmFunctor<Device, real> gemm; GemmFunctor<Device, real> gemm;
...@@ -86,15 +93,18 @@ public: ...@@ -86,15 +93,18 @@ public:
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
for (size_t g = 0; g < groups_; g++) { for (size_t g = 0; g < groups_; g++) {
im2col(inputData + g * inputOffset, if (needIm2col) {
imShape, im2col(inputData + g * inputOffset,
colData, imShape,
colShape, colData,
strideH(), colShape,
strideW(), strideH(),
paddingH(), strideW(),
paddingW()); paddingH(),
paddingW());
} else {
colData = inputData + g * inputOffset;
}
int M = outputChannels / groups_; int M = outputChannels / groups_;
int N = outputHeight * outputWidth; int N = outputHeight * outputWidth;
int K = inputChannels / groups_ * filterHeight * filterWidth; int K = inputChannels / groups_ * filterHeight * filterWidth;
...@@ -159,19 +169,27 @@ public: ...@@ -159,19 +169,27 @@ public:
real* outputGrad = inputs[0].data<real>(); real* outputGrad = inputs[0].data<real>();
real* filterData = inputs[1].data<real>(); real* filterData = inputs[1].data<real>();
real* inputGrad = outputs[0].data<real>(); real* inputGrad = outputs[0].data<real>();
bool needIm2col = isNeedIm2col(filter);
TensorShape imShape = TensorShape imShape =
TensorShape({inputChannels / groups_, inputHeight, inputWidth}); TensorShape({inputChannels / groups_, inputHeight, inputWidth});
TensorShape colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements()); TensorShape colShape;
real* colData = reinterpret_cast<real*>(memory_->getBuf()); real* colData = NULL;
if (needIm2col) {
colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements());
colData = reinterpret_cast<real*>(memory_->getBuf());
}
Col2ImFunctor<kCFO, Device, real> col2im; Col2ImFunctor<kCFO, Device, real> col2im;
GemmFunctor<Device, real> gemm; GemmFunctor<Device, real> gemm;
size_t inputOffset = imShape.getElements(); size_t inputOffset = imShape.getElements();
size_t outputOffset = size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth; (outputChannels / groups_) * outputHeight * outputWidth;
...@@ -182,6 +200,11 @@ public: ...@@ -182,6 +200,11 @@ public:
int K = outputChannels / groups_; int K = outputChannels / groups_;
int N = outputHeight * outputWidth; int N = outputHeight * outputWidth;
int M = inputChannels / groups_ * filterHeight * filterWidth; int M = inputChannels / groups_ * filterHeight * filterWidth;
real scale = 0.0f;
if (!needIm2col) {
colData = inputGrad + g * inputOffset;
scale = 1.0f;
}
gemm(CblasTrans, gemm(CblasTrans,
CblasNoTrans, CblasNoTrans,
M, M,
...@@ -192,17 +215,19 @@ public: ...@@ -192,17 +215,19 @@ public:
M, M,
outputGrad + g * outputOffset, outputGrad + g * outputOffset,
N, N,
0.0f, scale,
colData, colData,
N); N);
col2im(inputGrad + g * inputOffset, if (needIm2col) {
imShape, col2im(inputGrad + g * inputOffset,
colData, imShape,
colShape, colData,
strideH(), colShape,
strideW(), strideH(),
paddingH(), strideW(),
paddingW()); paddingH(),
paddingW());
}
} }
inputGrad += inputChannels * inputHeight * inputWidth; inputGrad += inputChannels * inputHeight * inputWidth;
outputGrad += outputChannels * outputHeight * outputWidth; outputGrad += outputChannels * outputHeight * outputWidth;
...@@ -255,16 +280,23 @@ public: ...@@ -255,16 +280,23 @@ public:
real* outputGrad = inputs[0].data<real>(); real* outputGrad = inputs[0].data<real>();
real* inputData = inputs[1].data<real>(); real* inputData = inputs[1].data<real>();
real* filterGrad = outputs[0].data<real>(); real* filterGrad = outputs[0].data<real>();
bool needIm2col = isNeedIm2col(filter);
TensorShape imShape = TensorShape imShape =
TensorShape({inputChannels / groups_, inputHeight, inputWidth}); TensorShape({inputChannels / groups_, inputHeight, inputWidth});
TensorShape colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements()); TensorShape colShape;
real* colData = reinterpret_cast<real*>(memory_->getBuf()); real* colData = NULL;
if (needIm2col) {
colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements());
colData = reinterpret_cast<real*>(memory_->getBuf());
}
Im2ColFunctor<kCFO, Device, real> im2col; Im2ColFunctor<kCFO, Device, real> im2col;
GemmFunctor<Device, real> gemm; GemmFunctor<Device, real> gemm;
...@@ -274,15 +306,18 @@ public: ...@@ -274,15 +306,18 @@ public:
size_t filterOffset = filter.getElements() / groups_; size_t filterOffset = filter.getElements() / groups_;
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
for (size_t g = 0; g < groups_; g++) { for (size_t g = 0; g < groups_; g++) {
im2col(inputData + g * inputOffset, if (needIm2col) {
imShape, im2col(inputData + g * inputOffset,
colData, imShape,
colShape, colData,
strideH(), colShape,
strideW(), strideH(),
paddingH(), strideW(),
paddingW()); paddingH(),
paddingW());
} else {
colData = inputData + g * inputOffset;
}
int M = outputChannels / groups_; int M = outputChannels / groups_;
int K = outputHeight * outputWidth; int K = outputHeight * outputWidth;
int N = inputChannels / groups_ * filterHeight * filterWidth; int N = inputChannels / groups_ * filterHeight * filterWidth;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
/**
* A layer for clipping the input value by the threshold.
* \f[
* out[i] = \min\left(\max\left(in[i],p_{1}\right),p_{2}\right)
* \f]
*/
class ClipLayer : public Layer {
protected:
double min_;
double max_;
public:
explicit ClipLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
};
REGISTER_LAYER(clip, ClipLayer);
bool ClipLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(inputLayers_.size(), 1U);
auto layerConf = config_.inputs(0).clip_conf();
min_ = layerConf.min();
max_ = layerConf.max();
CHECK_LT(min_, max_);
return true;
}
void ClipLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr inV = getInputValue(0);
resetOutput(inV->getHeight(), inV->getWidth());
MatrixPtr outV = getOutputValue();
outV->copyFrom(*inV);
outV->clip(min_, max_);
}
void ClipLayer::backward(const UpdateCallback& callback) {
MatrixPtr inV = getInputValue(0);
MatrixPtr inG = getInputGrad(0);
if (inG) {
MatrixPtr outV = getOutputValue();
MatrixPtr outG = getOutputGrad();
MatrixPtr tmpMtx;
Matrix::resizeOrCreate(
tmpMtx, outG->getHeight(), outG->getWidth(), false, useGpu_);
tmpMtx->clipDerivative(*inV, min_, max_);
inG->addDotMul(*outG, *tmpMtx, 1, 1);
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
/**
* A layer for L2 normalization in each row,
* \f[
* out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}}
* \f]
* where the size of \f$in\f$ is (batchSize x dataDim),
* and the size of \f$out\f$ is (batchSize x dataDim).
*/
class RowL2NormLayer : public Layer {
protected:
MatrixPtr inSquare_;
MatrixPtr l2NormReciprocal_;
MatrixPtr dotSum_;
public:
explicit RowL2NormLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
};
REGISTER_LAYER(row_l2_norm, RowL2NormLayer);
bool RowL2NormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(inputLayers_.size(), 1U);
return true;
}
void RowL2NormLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr inV = getInputValue(0);
/* malloc memory for the output_ if necessary */
size_t batchSize = inV->getHeight();
size_t dataDim = getSize();
CHECK_EQ(dataDim, inV->getWidth());
resetOutput(batchSize, dataDim);
MatrixPtr outV = getOutputValue();
Matrix::resizeOrCreate(inSquare_, batchSize, dataDim, false, useGpu_);
inV->square2(*inSquare_);
Matrix::resizeOrCreate(l2NormReciprocal_, batchSize, 1, false, useGpu_);
inSquare_->rowSum(*l2NormReciprocal_);
l2NormReciprocal_->sqrt2(*l2NormReciprocal_);
l2NormReciprocal_->scalarDiv(*l2NormReciprocal_, 1.0);
outV->rowScale(0, *inV, *l2NormReciprocal_);
}
void RowL2NormLayer::backward(const UpdateCallback& callback) {
MatrixPtr inV = getInputValue(0);
MatrixPtr inG = getInputGrad(0);
MatrixPtr outV = getOutputValue();
MatrixPtr outG = getOutputGrad();
size_t batchSize = inV->getHeight();
// inG[ij] += outG[ij] / l2NormReciprocal
// inG[ij] += -inV[ij] * l2NormReciprocal * l2NormReciprocal * DotMul(outG[i],
// inV[i])
if (inG) {
Matrix::resizeOrCreate(dotSum_, batchSize, 1, false, useGpu_);
dotSum_->zeroMem();
dotSum_->rowDotMul(0, *outG, *outV);
dotSum_->dotMul(*dotSum_, *l2NormReciprocal_);
dotSum_->dotMul(*dotSum_, *l2NormReciprocal_);
inSquare_->rowScale(0, *inV, *dotSum_);
inG->sub(*inSquare_);
inG->addRowScale(0, *outG, *l2NormReciprocal_);
}
}
} // namespace paddle
...@@ -1899,6 +1899,36 @@ TEST(Layer, CropLayer) { ...@@ -1899,6 +1899,36 @@ TEST(Layer, CropLayer) {
} }
} }
TEST(Layer, ClipLayer) {
const size_t batchSize = 128;
const size_t size = 512;
TestConfig config;
config.layerConfig.set_type("clip");
config.inputDefs.push_back({INPUT_DATA, "input", size, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
ClipConfig* layerConf = input->mutable_clip_conf();
double p1 = std::rand() / (double)RAND_MAX;
double p2 = std::rand() / (double)RAND_MAX;
layerConf->set_min(std::min(p1, p2));
layerConf->set_max(std::max(p1, p2));
for (auto useGpu : {false, true}) {
testLayerGrad(config, "clip", batchSize, false, useGpu, false);
}
}
TEST(Layer, RowL2NormLayer) {
const size_t batchSize = 128;
const size_t size = 512;
TestConfig config;
config.layerConfig.set_type("row_l2_norm");
config.layerConfig.set_size(size);
config.inputDefs.push_back({INPUT_DATA, "input", size, 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false);
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
...@@ -442,6 +442,13 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER, ...@@ -442,6 +442,13 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER,
template<class T> template<class T>
void BaseMatrixT<T>::clip(T p1, T p2) { applyUnary(unary::Clip<T>(p1, p2)); } void BaseMatrixT<T>::clip(T p1, T p2) { applyUnary(unary::Clip<T>(p1, p2)); }
DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, TWO_PARAMETER,
a = b < p1 ? 0 : (b > p2 ? 0 : 1));
template<class T>
void BaseMatrixT<T>::clipDerivative(BaseMatrixT& b, T p1, T p2) {
applyBinary(binary::ClipDerivative<T>(p1, p2), b);
}
DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, ONE_PARAMETER, DEFINE_MATRIX_UNARY_PARAMETER_OP(BiggerThanScalar, ONE_PARAMETER,
a = a > p ? 1.0f : 0.0f); a = a > p ? 1.0f : 0.0f);
template<class T> template<class T>
......
...@@ -488,6 +488,13 @@ public: ...@@ -488,6 +488,13 @@ public:
*/ */
void clip(T p1, T p2); void clip(T p1, T p2);
/**
* this = b < low ? 0 : 1
*
* this = b > high ? 0 : 1
*/
void clipDerivative(BaseMatrixT& b, T p1, T p2);
/** /**
* @code * @code
* a = a > p ? 1.0f : 0.0f * a = a > p ? 1.0f : 0.0f
......
...@@ -41,22 +41,27 @@ function(op_library TARGET) ...@@ -41,22 +41,27 @@ function(op_library TARGET)
endif() endif()
endfunction() endfunction()
cc_library(net_op SRCS net_op.cc DEPS op_registry)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
op_library(add_op SRCS add_op.cc add_op.cu) op_library(add_op SRCS add_op.cc add_op.cu)
cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
op_library(mean_op SRCS mean_op.cc mean_op.cu)
cc_test(mean_op_test SRCS mean_op_test.cc DEPS mean_op)
op_library(mul_op SRCS mul_op.cc mul_op.cu) op_library(mul_op SRCS mul_op.cc mul_op.cu)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc) op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu)
op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op
softmax_op net)
op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
op_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc op_library(fc_op
tensor op_registry operator net) SRCS fc_op.cc
cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc DEPS DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op)
recurrent_network_op gtest mul_op add_op) op_library(recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net_op)
cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op)
...@@ -50,10 +50,6 @@ The equation is: Out = X + Y ...@@ -50,10 +50,6 @@ The equation is: Out = X + Y
class AddOpGrad : public OperatorWithKernel { class AddOpGrad : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override {} void InferShape(const InferShapeContext &ctx) const override {}
std::string DebugString() const override {
LOG(INFO) << "AddOpGrad";
return "";
}
}; };
} // namespace operators } // namespace operators
......
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/add_op.h" #include "paddle/operators/add_op.h"
......
...@@ -28,10 +28,13 @@ public: ...@@ -28,10 +28,13 @@ public:
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
EigenVector<T>::Flatten(*output).device( auto X = EigenVector<T>::Flatten(*input0);
*(context.GetEigenDevice<Place>())) = auto Y = EigenVector<T>::Flatten(*input1);
framework::EigenVector<T>::Flatten(*input0) + auto Z = EigenVector<T>::Flatten(*output);
framework::EigenVector<T>::Flatten(*input1);
auto place = context.GetEigenDevice<Place>();
Z.device(place) = X + Y;
} }
}; };
......
#define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h" #include "paddle/operators/cross_entropy_op.h"
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/mean_op.h"
namespace paddle {
namespace operators {
class MeanOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1, "Input size of AddOp must be one");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.OutputVar(0) != nullptr,
"Input/Output of MeanOp must be initialized.");
ctx.Output<Tensor>(0)->Resize(framework::make_ddim({1}));
}
};
class MeanOpMaker : public OpProtoAndCheckerMaker {
public:
MeanOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op").IgnoreGradient();
AddComment("Mean Operator");
}
};
class MeanGradOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
ctx.Output<Tensor>("X" + GRAD_VAR_SUFFIX())
->Resize(ctx.Input<Tensor>("X")->dims());
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker);
REGISTER_OP_CPU_KERNEL(mean, ops::MeanKernel<ops::CPUPlace, float>);
REGISTER_GRADIENT_OP(mean, mean_grad, ops::MeanGradOp);
REGISTER_OP_CPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::CPUPlace, float>);
#define EIGEN_USE_GPU
#include "paddle/operators/mean_op.h"
REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel<ops::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::GPUPlace, float>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/type_alias.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class MeanKernel : public OpKernel {
public:
void Compute(const ExecutionContext& context) const override {
auto input = context.Input<Tensor>(0);
auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace());
auto X = EigenVector<T>::Flatten(*input);
auto y = EigenScalar<T>::From(*output);
auto place = context.GetEigenDevice<Place>();
y.device(place) = X.mean();
}
};
template <typename Place, typename T>
class MeanGradKernel : public OpKernel {
public:
void Compute(const ExecutionContext& context) const override {
auto OG = context.Input<Tensor>("Out" + OperatorBase::GRAD_VAR_SUFFIX());
PADDLE_ENFORCE(framework::product(OG->dims()) == 1,
"Mean Gradient should be scalar");
auto IG = context.Output<Tensor>("X" + OperatorBase::GRAD_VAR_SUFFIX());
IG->mutable_data<T>(context.GetPlace());
T ig_size = (T)framework::product(IG->dims());
EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) =
EigenScalar<T>::From(*OG) / ig_size;
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <paddle/framework/op_registry.h>
USE_OP(mean);
TEST(MeanOp, GetOpProto) {
auto& protos = paddle::framework::OpRegistry::protos();
auto it = protos.find("mean");
ASSERT_NE(it, protos.end());
}
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/mul_op.h" #include "paddle/operators/mul_op.h"
REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>);
\ No newline at end of file
...@@ -26,13 +26,18 @@ public: ...@@ -26,13 +26,18 @@ public:
Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = { Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = {
{Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}}; {Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}};
auto input0 = context.Input<Tensor>("X");
auto input1 = context.Input<Tensor>("Y");
auto output = context.Output<Tensor>(0); auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
EigenMatrix<T>::From(*output).device(*(context.GetEigenDevice<Place>())) = auto X = EigenMatrix<T>::From(*input0);
EigenMatrix<T>::From(*context.Input<Tensor>("X")) auto Y = EigenMatrix<T>::From(*input1);
.contract(EigenMatrix<T>::From(*context.Input<Tensor>("Y")), auto Z = EigenMatrix<T>::From(*output);
dim_pair); auto place = context.GetEigenDevice<Place>();
Z.device(place) = X.contract(Y, dim_pair);
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
limitations under the License. limitations under the License.
*/ */
#include "paddle/framework/net.h" #include "paddle/operators/net_op.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace framework { namespace operators {
void NetOp::CompleteAddOp(bool calc) { void NetOp::CompleteAddOp(bool calc) {
add_op_done_ = true; add_op_done_ = true;
...@@ -74,5 +74,5 @@ std::string NetOp::DebugString() const { ...@@ -74,5 +74,5 @@ std::string NetOp::DebugString() const {
bool NetOp::IsNetOp() const { return true; } bool NetOp::IsNetOp() const { return true; }
} // namespace framework } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -14,15 +14,17 @@ limitations under the License. */ ...@@ -14,15 +14,17 @@ limitations under the License. */
#pragma once #pragma once
#include <paddle/framework/op_desc.pb.h> #include "paddle/framework/op_desc.pb.h"
#include <paddle/framework/operator.h>
#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include "paddle/operators/type_alias.h"
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
namespace paddle { namespace paddle {
namespace framework { namespace operators {
/** /**
* @brief Network is also a type of Operator * @brief Network is also a type of Operator
* *
...@@ -37,13 +39,13 @@ namespace framework { ...@@ -37,13 +39,13 @@ namespace framework {
* This is the base class of network, all the networks should implement the APIs * This is the base class of network, all the networks should implement the APIs
* it defines. * it defines.
*/ */
class NetOp : public OperatorBase { class NetOp : public framework::OperatorBase {
public: public:
/** /**
* Infer all the operators' input and output variables' shapes, will be called * Infer all the operators' input and output variables' shapes, will be called
* before every mini-batch * before every mini-batch
*/ */
void InferShape(const Scope& scope) const override { void InferShape(const framework::Scope& scope) const override {
for (auto& op : ops_) { for (auto& op : ops_) {
op->InferShape(scope); op->InferShape(scope);
} }
...@@ -56,7 +58,7 @@ class NetOp : public OperatorBase { ...@@ -56,7 +58,7 @@ class NetOp : public OperatorBase {
* scope will be used instead. If no OpContext is provicded, default context * scope will be used instead. If no OpContext is provicded, default context
* will be used. * will be used.
*/ */
void Run(const Scope& scope, void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override { const platform::DeviceContext& dev_ctx) const override {
for (auto& op : ops_) { for (auto& op : ops_) {
op->Run(scope, dev_ctx); op->Run(scope, dev_ctx);
...@@ -88,7 +90,7 @@ class NetOp : public OperatorBase { ...@@ -88,7 +90,7 @@ class NetOp : public OperatorBase {
std::vector<std::shared_ptr<OperatorBase>> ops_; std::vector<std::shared_ptr<OperatorBase>> ops_;
private: private:
bool add_op_done_{false}; bool add_op_done_{false};
template <typename T, typename KeyType> template <typename T, typename KeyType>
...@@ -97,5 +99,5 @@ class NetOp : public OperatorBase { ...@@ -97,5 +99,5 @@ class NetOp : public OperatorBase {
} }
}; };
} // namespace framework } // namespace operators
} // namespace paddle } // namespace paddle
#include "paddle/operators/net_op.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <paddle/framework/net.h>
#include <paddle/framework/op_registry.h> #include "paddle/framework/op_registry.h"
#include <paddle/framework/operator.h> #include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace framework { namespace operators {
static int infer_shape_cnt = 0; static int infer_shape_cnt = 0;
static int run_cnt = 0; static int run_cnt = 0;
class TestOp : public OperatorBase { class TestOp : public OperatorBase {
public: public:
void InferShape(const framework::Scope& scope) const override { void InferShape(const framework::Scope& scope) const override {
++infer_shape_cnt; ++infer_shape_cnt;
} }
...@@ -21,7 +23,7 @@ class TestOp : public OperatorBase { ...@@ -21,7 +23,7 @@ class TestOp : public OperatorBase {
}; };
class EmptyOp : public OperatorBase { class EmptyOp : public OperatorBase {
public: public:
void InferShape(const Scope& scope) const override {} void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope, void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {} const platform::DeviceContext& dev_ctx) const override {}
...@@ -73,7 +75,7 @@ TEST(OpKernel, all) { ...@@ -73,7 +75,7 @@ TEST(OpKernel, all) {
ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet);
} }
TEST(Net, insert_op) { TEST(NetOp, insert_op) {
NetOp net; NetOp net;
auto op1 = std::make_shared<EmptyOp>(); auto op1 = std::make_shared<EmptyOp>();
op1->inputs_ = {"x", "w1", "b1"}; op1->inputs_ = {"x", "w1", "b1"};
...@@ -85,5 +87,5 @@ TEST(Net, insert_op) { ...@@ -85,5 +87,5 @@ TEST(Net, insert_op) {
ASSERT_EQ(3UL, net.ops_.size()); ASSERT_EQ(3UL, net.ops_.size());
} }
} // namespace framework } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/recurrent_network_op.h" #include "paddle/operators/recurrent_op.h"
#include <glog/logging.h> #include <glog/logging.h>
#include <cstring> #include <cstring>
#include <sstream> #include <sstream>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -29,11 +29,15 @@ namespace rnn { ...@@ -29,11 +29,15 @@ namespace rnn {
void SegmentInputs(const std::vector<Scope*>& step_scopes, void SegmentInputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& inlinks, const std::vector<Link>& inlinks,
const size_t seq_len) { const size_t seq_len,
bool infer_shape_mode) {
PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided.");
for (size_t i = 0; i < inlinks.size(); ++i) { for (size_t i = 0; i < inlinks.size(); ++i) {
Tensor* input = auto input_var = step_scopes[0]->FindVar(inlinks[i].external);
step_scopes[0]->FindVar(inlinks[i].external)->GetMutable<Tensor>(); PADDLE_ENFORCE(input_var != nullptr,
"input link [%s] is not in scope.",
inlinks[i].external);
Tensor* input = input_var->GetMutable<Tensor>();
DDim dims = input->dims(); DDim dims = input->dims();
PADDLE_ENFORCE(static_cast<size_t>(dims[0]) == seq_len, PADDLE_ENFORCE(static_cast<size_t>(dims[0]) == seq_len,
"all the inlinks must have same length"); "all the inlinks must have same length");
...@@ -41,7 +45,9 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes, ...@@ -41,7 +45,9 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
for (size_t j = 0; j < seq_len; j++) { for (size_t j = 0; j < seq_len; j++) {
Tensor* step_input = Tensor* step_input =
step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable<Tensor>(); step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable<Tensor>();
*step_input = input->Slice<float>(j, j + 1); if (!infer_shape_mode) {
*step_input = input->Slice<float>(j, j + 1);
}
step_input->Resize(step_dims); step_input->Resize(step_dims);
} }
} }
...@@ -49,36 +55,41 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes, ...@@ -49,36 +55,41 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
void ConcatOutputs(const std::vector<Scope*>& step_scopes, void ConcatOutputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& outlinks, const std::vector<Link>& outlinks,
const size_t seq_len) { const size_t seq_len,
bool infer_shape_mode) {
for (size_t i = 0; i < outlinks.size(); i++) { for (size_t i = 0; i < outlinks.size(); i++) {
Tensor* output = auto output_var = step_scopes[0]->FindVar(outlinks[i].external);
step_scopes[0]->FindVar(outlinks[i].external)->GetMutable<Tensor>(); PADDLE_ENFORCE(output_var != nullptr,
"output link [%s] is not in scope.",
// TODO(qingiqng) remove following code after adding outlinks[i].external);
// InferShape in RecurrentGradientOp Tensor* output = output_var->GetMutable<Tensor>();
DDim step_dims = step_scopes[0] if (infer_shape_mode) {
->FindVar(outlinks[i].internal) DDim step_dims = step_scopes[0]
->GetMutable<Tensor>() ->FindVar(outlinks[i].internal)
->dims(); ->GetMutable<Tensor>()
std::vector<int> dims_vec = vectorize(step_dims); ->dims();
dims_vec.insert(dims_vec.begin(), seq_len); std::vector<int> dims_vec = vectorize(step_dims);
output->mutable_data<float>(make_ddim(dims_vec), platform::CPUPlace()); dims_vec.insert(dims_vec.begin(), seq_len);
output->Resize(make_ddim(dims_vec));
for (size_t j = 0; j < seq_len; j++) { } else {
Tensor* step_output = output->mutable_data<float>(platform::CPUPlace());
step_scopes[j]->FindVar(outlinks[i].internal)->GetMutable<Tensor>(); for (size_t j = 0; j < seq_len; j++) {
// TODO(luotao02) data type and platform::DeviceContext() should set Tensor* step_output =
// correctly step_scopes[j]->FindVar(outlinks[i].internal)->GetMutable<Tensor>();
(output->Slice<float>(j, j + 1)) // TODO(luotao02) data type and platform::DeviceContext() should set
.CopyFrom<float>(*step_output, platform::CPUPlace()); // correctly
(output->Slice<float>(j, j + 1))
.CopyFrom<float>(*step_output, platform::CPUPlace());
}
} }
} }
} }
void LinkMemories(const std::vector<Scope*>& scopes, void LinkMemories(const std::vector<Scope*>& scopes,
const std::vector<rnn::MemoryAttr>& memories, const std::vector<rnn::MemoryAttr>& memories,
size_t step_id, const size_t step_id,
int offset) { const int offset,
bool infer_shape_mode) {
PADDLE_ENFORCE(step_id < scopes.size(), PADDLE_ENFORCE(step_id < scopes.size(),
"step [%d] is out of range of step scopes' size [%d]", "step [%d] is out of range of step scopes' size [%d]",
step_id, step_id,
...@@ -95,18 +106,13 @@ void LinkMemories(const std::vector<Scope*>& scopes, ...@@ -95,18 +106,13 @@ void LinkMemories(const std::vector<Scope*>& scopes,
auto scope = scopes[step_id]; auto scope = scopes[step_id];
auto linked_scope = scopes[step_id + offset]; auto linked_scope = scopes[step_id + offset];
for (auto& attr : memories) { for (auto& attr : memories) {
auto mem = scope->NewVar(attr.pre_var)->GetMutable<Tensor>(); auto mem = scope->FindVar(attr.pre_var)->GetMutable<Tensor>();
// maybe share variable is better?
auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable<Tensor>(); auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable<Tensor>();
mem->ShareDataWith<float>(*linked_mem); if (infer_shape_mode) {
mem->Resize(linked_mem->dims());
// TODO(qingqing) remove following code } else {
// the memory of current step should be allocated in step net mem->ShareDataWith<float>(*linked_mem);
auto m = scope->NewVar(attr.var)->GetMutable<Tensor>(); }
// for unit test, as addOp and mulOp are null currently, if not
// mutable_data, mem.data() in output will be error. We will
// remove this line after merge the correct addOp and mulOp.
m->mutable_data<float>(mem->dims(), platform::CPUPlace());
} }
} }
...@@ -175,60 +181,39 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const { ...@@ -175,60 +181,39 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const {
->dims()[0]; ->dims()[0];
CreateScopes(scope); CreateScopes(scope);
auto step_scopes = GetStepScopes(scope); auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(
// SegmentInputs is called in InferShape. The input must hold memory in step_scopes, arg_->inlinks, seq_len_, true /*infer_shape_mode*/);
// SegmentInputs. But the other op only set dimension for the output in InitMemories(step_scopes[0], true /*infer_shape_mode*/);
// InferShape. That's a problem. Wether the RNN op needs InferShape or not?
// Wether the following functions (SegmentInputs, InitMemories, ...) need
// to rewrite for RNN op?
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_);
InitMemories(step_scopes[0]);
PADDLE_ENFORCE(scope.FindVar(arg_->step_net) != nullptr,
"stepnet [%s] is not in scope.",
arg_->step_net);
Variable* net = scope.FindVar(arg_->step_net); Variable* net = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net"); PADDLE_ENFORCE(net != nullptr, "failed to get step net");
// If the InferShape is called in OperatorBase's run function,
// the rnn op only needs to do InferShape for the first time step
for (size_t i = 0; i < seq_len_; i++) { for (size_t i = 0; i < seq_len_; i++) {
if (i > 0) { if (i > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, i, -1); rnn::LinkMemories(
step_scopes, arg_->memories, i, -1, true /*infer_shape_mode*/);
} }
net->GetMutable<NetOp>()->InferShape(*step_scopes[i]); net->GetMutable<NetOp>()->InferShape(*step_scopes[i]);
} }
rnn::ConcatOutputs(
auto outlinks = arg_->outlinks; step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/);
for (size_t i = 0; i < outlinks.size(); i++) {
DDim step_dims = step_scopes[0]
->FindVar(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
// now only support fixed length
dims_vec.insert(dims_vec.begin(), seq_len_);
Tensor* output =
step_scopes[0]->FindVar(outlinks[i].external)->GetMutable<Tensor>();
output->Resize(make_ddim(dims_vec));
}
} }
void RecurrentAlgorithm::Run(const Scope& scope, void RecurrentAlgorithm::Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const { const platform::DeviceContext& dev_ctx) const {
auto step_scopes = GetStepScopes(scope); auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(
step_scopes, arg_->inlinks, seq_len_, false /*infer_shape_mode*/);
InitMemories(step_scopes[0], false /*infer_shape_mode*/);
Variable* net = scope.FindVar(arg_->step_net); Variable* net = scope.FindVar(arg_->step_net);
for (size_t step_id = 0; step_id < seq_len_; step_id++) { for (size_t step_id = 0; step_id < seq_len_; step_id++) {
// the link memory is done in InferShape
// maybe remove following code after testing
if (step_id > 0) { if (step_id > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1); rnn::LinkMemories(
step_scopes, arg_->memories, step_id, -1, false /*infer_shape_mode*/);
} }
net->GetMutable<NetOp>()->Run(*step_scopes[step_id], dev_ctx); net->GetMutable<NetOp>()->Run(*step_scopes[step_id], dev_ctx);
} }
rnn::ConcatOutputs(
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/);
} }
void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
...@@ -244,18 +229,19 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { ...@@ -244,18 +229,19 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
// Now all variables in scope must be created outside of op. // Now all variables in scope must be created outside of op.
auto net_op = scope.FindVar(arg_->step_net)->GetMutable<NetOp>(); auto net_op = scope.FindVar(arg_->step_net)->GetMutable<NetOp>();
for (auto& input : net_op->inputs_) { for (auto& input : net_op->inputs_) {
// the weight are located in parent scope
if (!step_scope.FindVar(input)) step_scope.NewVar(input); if (!step_scope.FindVar(input)) step_scope.NewVar(input);
} }
for (auto& output : net_op->outputs_) { for (auto& output : net_op->outputs_) {
step_scope.NewVar(output); step_scope.NewVar(output);
} }
step_scopes->emplace_back(&step_scope); step_scopes->emplace_back(&step_scope);
} }
} }
} }
void RecurrentAlgorithm::InitMemories(Scope* step_scope) const { void RecurrentAlgorithm::InitMemories(Scope* step_scope,
bool infer_shape_mode) const {
for (auto& attr : arg_->memories) { for (auto& attr : arg_->memories) {
Tensor* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<Tensor>(); Tensor* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<Tensor>();
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
...@@ -263,13 +249,11 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope) const { ...@@ -263,13 +249,11 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
attr.var, attr.var,
attr.boot_var); attr.boot_var);
Tensor* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable<Tensor>(); Tensor* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable<Tensor>();
pre_mem->ShareDataWith<float>(*boot_mem); if (infer_shape_mode) {
pre_mem->Resize(boot_mem->dims());
// TODO(qingqing) remove following code } else {
// the memory of current step should be allocated in step net pre_mem->ShareDataWith<float>(*boot_mem);
// here for unit test }
auto cur_step_mem = step_scope->NewVar(attr.var)->GetMutable<Tensor>();
cur_step_mem->mutable_data<float>(boot_mem->dims(), platform::CPUPlace());
} }
} }
...@@ -307,13 +291,14 @@ public: ...@@ -307,13 +291,14 @@ public:
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
const auto& name = RecurrentOp::kArgName; const auto& name = RecurrentOp::kArgName;
// inputs and outputs stored in proto // inputs and outputs stored in proto
AddInput(name.inlinks, "the input that need to be segmented for each step.") AddInput(name.inlinks,
"the inputs that need to be segmented for each step.")
.SetMultiple(); .SetMultiple();
AddInput(name.boot_memories, "variables to initialize memories.") AddInput(name.boot_memories, "variables to initialize memories.")
.SetMultiple(); .SetMultiple();
AddInput(name.step_net, "network shared by all steps."); AddInput(name.step_net, "network shared by all steps.");
AddOutput(name.outlinks, "the output that need to concated for all steps.") AddOutput(name.outlinks, "the outputs that need to concated for all steps.")
.SetMultiple(); .SetMultiple();
AddOutput(name.step_scopes, "step scopes"); AddOutput(name.step_scopes, "step scopes");
...@@ -331,34 +316,39 @@ public: ...@@ -331,34 +316,39 @@ public:
void RecurrentGradientAlgorithm::Run( void RecurrentGradientAlgorithm::Run(
const Scope& scope, const platform::DeviceContext& dev_ctx) const { const Scope& scope, const platform::DeviceContext& dev_ctx) const {
auto step_scopes = GetStepScopes(scope); auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); rnn::SegmentInputs(
PADDLE_ENFORCE(scope.FindVar(arg_->step_net) != nullptr, step_scopes, arg_->inlinks, seq_len_, false /*infer_shape_mode*/);
"step net is not in scope.");
Variable* net = scope.FindVar(arg_->step_net); Variable* net = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net"); PADDLE_ENFORCE(net != nullptr, "failed to get step net");
for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
if (static_cast<size_t>(step_id) != seq_len_ - 1) { if (static_cast<size_t>(step_id) != seq_len_ - 1) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1); rnn::LinkMemories(
step_scopes, arg_->memories, step_id, 1, false /*infer_shape_mode*/);
} }
net->GetMutable<NetOp>()->Run(*step_scopes[step_id], dev_ctx); net->GetMutable<NetOp>()->Run(*step_scopes[step_id], dev_ctx);
} }
LinkBootMemoryGradients(step_scopes[0]); LinkBootMemoryGradients(step_scopes[0], false);
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); rnn::ConcatOutputs(
step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/);
} }
void RecurrentGradientAlgorithm::LinkBootMemoryGradients( void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
Scope* step_scope) const { Scope* step_scope, bool infer_shape_mode) const {
for (auto& attr : arg_->memories) { for (auto& attr : arg_->memories) {
Tensor* mem_grad = step_scope->NewVar(attr.var)->GetMutable<Tensor>(); PADDLE_ENFORCE(step_scope->FindVar(attr.var) != nullptr,
PADDLE_ENFORCE(mem_grad != nullptr, "memory variable [%s] does not exists",
"boot_tensor should be retrieved before"); attr.var);
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"memory [%s]'s boot variable [%s] not exists", "boot variable [%s] does not exists",
attr.var,
attr.boot_var); attr.boot_var);
Tensor* mem_grad = step_scope->NewVar(attr.var)->GetMutable<Tensor>();
Tensor* boot_mem_grad = Tensor* boot_mem_grad =
step_scope->NewVar(attr.boot_var)->GetMutable<Tensor>(); step_scope->NewVar(attr.boot_var)->GetMutable<Tensor>();
boot_mem_grad->ShareDataWith<float>(*mem_grad); if (infer_shape_mode) {
boot_mem_grad->Resize(mem_grad->dims());
} else {
boot_mem_grad->ShareDataWith<float>(*mem_grad);
}
} }
} }
...@@ -367,34 +357,20 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { ...@@ -367,34 +357,20 @@ void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const {
->GetMutable<Tensor>() ->GetMutable<Tensor>()
->dims()[0]; ->dims()[0];
auto step_scopes = GetStepScopes(scope); auto step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); rnn::SegmentInputs(
step_scopes, arg_->inlinks, seq_len_, true /*infer_shape_mode*/);
PADDLE_ENFORCE(scope.FindVar(arg_->step_net) != nullptr,
"step net is not in scope.");
Variable* net = scope.FindVar(arg_->step_net); Variable* net = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net"); PADDLE_ENFORCE(net != nullptr, "failed to get step net");
for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) {
if (static_cast<size_t>(step_id) != seq_len_ - 1) { if (static_cast<size_t>(step_id) != seq_len_ - 1) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1); rnn::LinkMemories(
step_scopes, arg_->memories, step_id, 1, true /*infer_shape_mode*/);
} }
net->GetMutable<NetOp>()->InferShape(*step_scopes[step_id]); net->GetMutable<NetOp>()->InferShape(*step_scopes[step_id]);
} }
rnn::ConcatOutputs(
auto outlinks = arg_->outlinks; step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/);
for (size_t i = 0; i < outlinks.size(); i++) { LinkBootMemoryGradients(step_scopes[0], true /*infer_shape_mode*/);
DDim step_dims = step_scopes[0]
->FindVar(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
std::vector<int> dims_vec = vectorize(step_dims);
// now only support fixed length
dims_vec.insert(dims_vec.begin(), seq_len_);
Tensor* output =
step_scopes[0]->FindVar(outlinks[i].external)->GetMutable<Tensor>();
output->Resize(make_ddim(dims_vec));
}
LinkBootMemoryGradients(step_scopes[0]);
} }
void RecurrentGradientOp::Init() { void RecurrentGradientOp::Init() {
......
...@@ -72,19 +72,22 @@ struct ArgumentName { ...@@ -72,19 +72,22 @@ struct ArgumentName {
*/ */
void SegmentInputs(const std::vector<Scope*>& step_scopes, void SegmentInputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& inlinks, const std::vector<Link>& inlinks,
const size_t seq_len); const size_t seq_len,
bool infer_shape_mode);
/** /**
* Process outputs of step nets and merge to variables. * Process outputs of step nets and merge to variables.
*/ */
void ConcatOutputs(const std::vector<Scope*>& step_scopes, void ConcatOutputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& outlinks, const std::vector<Link>& outlinks,
const size_t seq_len); const size_t seq_len,
bool infer_shape_mode);
void LinkMemories(const std::vector<Scope*>& step_scopes, void LinkMemories(const std::vector<Scope*>& step_scopes,
const std::vector<MemoryAttr>& memories, const std::vector<MemoryAttr>& memories,
size_t step_id, const size_t step_id,
int offset); const int offset,
bool infer_shape_mode);
void InitArgument(const ArgumentName& name, Argument* arg); void InitArgument(const ArgumentName& name, Argument* arg);
...@@ -122,7 +125,7 @@ protected: ...@@ -122,7 +125,7 @@ protected:
return *scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>(); return *scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>();
} }
void InitMemories(Scope* step_scopes) const; void InitMemories(Scope* step_scopes, bool infer_shape_mode) const;
private: private:
std::unique_ptr<rnn::Argument> arg_; std::unique_ptr<rnn::Argument> arg_;
...@@ -145,7 +148,7 @@ public: ...@@ -145,7 +148,7 @@ public:
void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const; void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const;
void LinkBootMemoryGradients(Scope* step_scopes) const; void LinkBootMemoryGradients(Scope* step_scopes, bool infer_shape_mode) const;
/** /**
* InferShape must be called before Run. * InferShape must be called before Run.
......
...@@ -11,14 +11,15 @@ ...@@ -11,14 +11,15 @@
limitations under the License. limitations under the License.
*/ */
#include "paddle/operators/recurrent_op.h"
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/operators/recurrent_network_op.h" #include "paddle/operators/net_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -55,7 +56,7 @@ protected: ...@@ -55,7 +56,7 @@ protected:
w->GetMutable<Tensor>()->mutable_data<float>( w->GetMutable<Tensor>()->mutable_data<float>(
make_ddim(std::vector<int>{30, 30}), platform::CPUPlace()); make_ddim(std::vector<int>{30, 30}), platform::CPUPlace());
for (auto boot : std::vector<std::string>{"x_boot", "h_boot"}) { for (auto boot : std::vector<std::string>{"h_boot"}) {
LOG(INFO) << "create global variable " << boot; LOG(INFO) << "create global variable " << boot;
Variable* h_boot = scope_.NewVar(boot); Variable* h_boot = scope_.NewVar(boot);
h_boot->GetMutable<Tensor>()->mutable_data<float>( h_boot->GetMutable<Tensor>()->mutable_data<float>(
...@@ -79,7 +80,6 @@ protected: ...@@ -79,7 +80,6 @@ protected:
op_desc.add_inputs("x0"); op_desc.add_inputs("x0");
op_desc.add_inputs("x1"); op_desc.add_inputs("x1");
// boot_memories 3 // boot_memories 3
op_desc.add_inputs("x_boot");
op_desc.add_inputs("h_boot"); op_desc.add_inputs("h_boot");
// step net 5 // step net 5
op_desc.add_inputs("step_net"); op_desc.add_inputs("step_net");
...@@ -91,7 +91,7 @@ protected: ...@@ -91,7 +91,7 @@ protected:
auto _input_format = std::vector<int>{ auto _input_format = std::vector<int>{
0, // in_link 0, // in_link
3, // memories 3, // memories
5 // step_net 4 // step_net
}; };
auto input_format = op_desc.add_attrs(); auto input_format = op_desc.add_attrs();
input_format->set_name("input_format"); input_format->set_name("input_format");
...@@ -129,12 +129,11 @@ protected: ...@@ -129,12 +129,11 @@ protected:
inlink_alias->add_strings(item); inlink_alias->add_strings(item);
} }
// pre memories // pre memories
for (const auto& item : for (const auto& item : std::vector<std::string>{"rnn/h@pre"}) {
std::vector<std::string>{"rnn/x@pre", "rnn/h@pre"}) {
pre_memories->add_strings(item); pre_memories->add_strings(item);
} }
// memories // memories
for (const auto& item : std::vector<std::string>{"rnn/x", "rnn/h"}) { for (const auto& item : std::vector<std::string>{"rnn/h"}) {
memories->add_strings(item); memories->add_strings(item);
} }
// output alias // output alias
...@@ -151,14 +150,11 @@ protected: ...@@ -151,14 +150,11 @@ protected:
LOG(INFO) << "create variable step_net"; LOG(INFO) << "create variable step_net";
Variable* var = scope_.NewVar("step_net"); Variable* var = scope_.NewVar("step_net");
auto net = var->GetMutable<NetOp>(); auto net = var->GetMutable<NetOp>();
// rnn/s is net's input or output?
net->inputs_ = {"rnn/h@pre", "rnn/w", "rnn/x"};
net->inputs_ = {"rnn/s", "rnn/h"};
net->AddOp( net->AddOp(
OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {})); OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {}));
net->AddOp( net->AddOp(
OpRegistry::CreateOp("add_two", {"rnn/x", "rnn/s"}, {"rnn/h"}, {})); OpRegistry::CreateOp("add_two", {"x@alias", "rnn/s"}, {"rnn/h"}, {}));
net->CompleteAddOp(); net->CompleteAddOp();
} }
...@@ -297,7 +293,10 @@ protected: ...@@ -297,7 +293,10 @@ protected:
inlink.internal = "rnn/x"; inlink.internal = "rnn/x";
auto step_scopes = auto step_scopes =
scope_.FindVar("step_scopes")->GetMutable<std::vector<Scope*>>(); scope_.FindVar("step_scopes")->GetMutable<std::vector<Scope*>>();
rnn::SegmentInputs(*step_scopes, std::vector<rnn::Link>{inlink}, 10); rnn::SegmentInputs(*step_scopes,
std::vector<rnn::Link>{inlink},
10,
true /*infer_shape_mode*/);
} }
void LinkeMemories() { void LinkeMemories() {
...@@ -311,7 +310,8 @@ protected: ...@@ -311,7 +310,8 @@ protected:
auto step_scopes = auto step_scopes =
scope_.FindVar("step_scopes")->GetMutable<std::vector<Scope*>>(); scope_.FindVar("step_scopes")->GetMutable<std::vector<Scope*>>();
for (int i = 1; i < 10; ++i) { for (int i = 1; i < 10; ++i) {
rnn::LinkMemories(*step_scopes, memories, i, -1); rnn::LinkMemories(
*step_scopes, memories, i, -1, true /*infer_shape_mode*/);
} }
} }
...@@ -333,14 +333,14 @@ TEST(RecurrentOp, LinkMemories) { ...@@ -333,14 +333,14 @@ TEST(RecurrentOp, LinkMemories) {
using namespace paddle::operators; using namespace paddle::operators;
// create and init step scopes // create and init step scopes
int len = 10; size_t len = 10;
std::vector<Scope*> step_scopes; std::vector<Scope*> step_scopes;
for (int i = 0; i < len; ++i) { for (size_t i = 0; i < len; ++i) {
auto scope = new Scope(); auto scope = new Scope();
scope->NewVar("pre_h"); scope->NewVar("pre_h");
auto tensor = scope->NewVar("h")->GetMutable<Tensor>(); auto tensor = scope->NewVar("h")->GetMutable<Tensor>();
float* data = tensor->mutable_data<float>({15, 20}, CPUPlace()); float* data = tensor->mutable_data<float>({15, 20}, CPUPlace());
for (int j = 0; j < 15 * 20; ++j) { for (size_t j = 0; j < 15 * 20; ++j) {
data[j] = rand() * (1. / (double)RAND_MAX); data[j] = rand() * (1. / (double)RAND_MAX);
} }
step_scopes.push_back(scope); step_scopes.push_back(scope);
...@@ -354,24 +354,24 @@ TEST(RecurrentOp, LinkMemories) { ...@@ -354,24 +354,24 @@ TEST(RecurrentOp, LinkMemories) {
std::vector<rnn::MemoryAttr> memories; std::vector<rnn::MemoryAttr> memories;
memories.push_back(mem_attr); memories.push_back(mem_attr);
for (int i = 1; i < len; ++i) { for (size_t i = 1; i < len; ++i) {
rnn::LinkMemories(step_scopes, memories, i, -1); rnn::LinkMemories(step_scopes, memories, i, -1, false /*infer_shape_mode*/);
} }
// check // check
for (int i = 0; i < len - 1; ++i) { for (size_t i = 0; i < len - 1; ++i) {
const float* a = const float* a =
step_scopes[i]->FindVar("h")->GetMutable<Tensor>()->data<float>(); step_scopes[i]->FindVar("h")->GetMutable<Tensor>()->data<float>();
const float* b = step_scopes[i + 1] const float* b = step_scopes[i + 1]
->FindVar("pre_h") ->FindVar("pre_h")
->GetMutable<Tensor>() ->GetMutable<Tensor>()
->data<float>(); ->data<float>();
for (size_t i = 0; i < 15 * 20; ++i) { for (size_t j = 0; j < 15 * 20; ++j) {
ASSERT_FLOAT_EQ(a[i], b[i]); ASSERT_FLOAT_EQ(a[j], b[j]);
} }
} }
for (int i = len - 2; i >= 0; --i) { for (int i = len - 2; i >= 0; --i) {
rnn::LinkMemories(step_scopes, memories, i, 1); rnn::LinkMemories(step_scopes, memories, i, 1, false /*infer_shape_mode*/);
} }
// check // check
for (int i = len - 2; i >= 0; --i) { for (int i = len - 2; i >= 0; --i) {
...@@ -379,8 +379,8 @@ TEST(RecurrentOp, LinkMemories) { ...@@ -379,8 +379,8 @@ TEST(RecurrentOp, LinkMemories) {
step_scopes[i]->FindVar("pre_h")->GetMutable<Tensor>()->data<float>(); step_scopes[i]->FindVar("pre_h")->GetMutable<Tensor>()->data<float>();
const float* b = const float* b =
step_scopes[i + 1]->FindVar("h")->GetMutable<Tensor>()->data<float>(); step_scopes[i + 1]->FindVar("h")->GetMutable<Tensor>()->data<float>();
for (size_t i = 0; i < 15 * 20; ++i) { for (size_t j = 0; j < 15 * 20; ++j) {
ASSERT_FLOAT_EQ(a[i], b[i]); ASSERT_FLOAT_EQ(a[j], b[j]);
} }
} }
...@@ -391,9 +391,3 @@ TEST(RecurrentOp, LinkMemories) { ...@@ -391,9 +391,3 @@ TEST(RecurrentOp, LinkMemories) {
USE_OP(add_two); USE_OP(add_two);
USE_OP(mul); USE_OP(mul);
// int main() {
// //! TODO(yuyang18): Temporary disable this unit-test because implementation
// //! error.
// return 0;
//}
\ No newline at end of file
#define EIGEN_USE_GPU
#include "paddle/operators/rowwise_add_op.h" #include "paddle/operators/rowwise_add_op.h"
REGISTER_OP_GPU_KERNEL(rowwise_add, REGISTER_OP_GPU_KERNEL(rowwise_add,
......
...@@ -33,7 +33,7 @@ public: ...@@ -33,7 +33,7 @@ public:
const int rest_size = input.size() / bias_size; const int rest_size = input.size() / bias_size;
Eigen::DSizes<int, 1> one_d(input.size()); Eigen::DSizes<int, 1> one_d(input.size());
Eigen::DSizes<int, 1> bcast(rest_size); Eigen::DSizes<int, 1> bcast(rest_size);
output.reshape(one_d).device(*(context.GetEigenDevice<Place>())) = output.reshape(one_d).device(context.GetEigenDevice<Place>()) =
input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d); input.reshape(one_d) + bias.broadcast(bcast).reshape(one_d);
} }
}; };
......
#define EIGEN_USE_GPU
#include "paddle/operators/sgd_op.h" #include "paddle/operators/sgd_op.h"
REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>);
\ No newline at end of file
...@@ -29,8 +29,12 @@ public: ...@@ -29,8 +29,12 @@ public:
param_out->mutable_data<T>(ctx.GetPlace()); param_out->mutable_data<T>(ctx.GetPlace());
EigenVector<T>::Flatten(*param_out).device(*(ctx.GetEigenDevice<Place>())) = auto p = EigenVector<T>::Flatten(*param);
EigenVector<T>::Flatten(*param) - lr * EigenVector<T>::Flatten(*grad); auto g = EigenVector<T>::Flatten(*grad);
auto o = EigenVector<T>::Flatten(*param_out);
auto place = ctx.GetEigenDevice<Place>();
o.device(place) = p - lr * g;
} }
}; };
......
#define EIGEN_USE_GPU
#include "paddle/operators/sigmoid_op.h" #include "paddle/operators/sigmoid_op.h"
REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::GPUPlace, float>);
...@@ -27,9 +27,11 @@ public: ...@@ -27,9 +27,11 @@ public:
auto output = context.Output<Tensor>(0); auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
EigenVector<T>::Flatten(*output).device( auto X = EigenVector<T>::Flatten(*input);
*(context.GetEigenDevice<Place>())) = auto Y = EigenVector<T>::Flatten(*output);
1.0 / (1.0 + (-1.0 * EigenVector<T>::Flatten(*input)).exp()); auto place = context.GetEigenDevice<Place>();
Y.device(place) = 1.0 / (1.0 + (-1.0 * X).exp());
} }
}; };
} // namespace operators } // namespace operators
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/softmax_op.h" #include "paddle/operators/softmax_op.h"
namespace paddle { namespace paddle {
...@@ -19,12 +20,13 @@ namespace operators { ...@@ -19,12 +20,13 @@ namespace operators {
class SoftmaxOp : public OperatorWithKernel { class SoftmaxOp : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1, "Only one input is need for softmax"); PADDLE_ENFORCE(ctx.InputSize() == 1UL,
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims().size() == 2, "Only one input is need for softmax");
PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL,
"The input of softmax op must be matrix"); "The input of softmax op must be matrix");
PADDLE_ENFORCE(ctx.OutputSize() == 1, PADDLE_ENFORCE(ctx.OutputSize() == 1UL,
"Only one output is need for softmax"); "Only one output is need for softmax");
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims()); ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
} }
}; };
...@@ -40,10 +42,19 @@ public: ...@@ -40,10 +42,19 @@ public:
class SoftmaxOpGrad : public OperatorWithKernel { class SoftmaxOpGrad : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override {} void InferShape(const InferShapeContext &ctx) const override {
std::string DebugString() const override { PADDLE_ENFORCE(ctx.InputSize() == 3UL,
LOG(INFO) << "SoftmaxOpGrad"; "Input of SoftmaxOpGrad should be 3, X, Y, YG");
return ""; PADDLE_ENFORCE(ctx.OutputSize() == 1UL,
"Output of SoftmaxOpGrad should be 1");
PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null");
PADDLE_ENFORCE(ctx.InputVar(GRAD_VAR_NAME("Y")) != nullptr,
"Input(Y@GRAD) should not be null");
PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() ==
ctx.Input<Tensor>(GRAD_VAR_NAME("Y"))->dims(),
"the shape of Input(0) and Input(1) should be the same");
ctx.Output<Tensor>(GRAD_VAR_NAME("X"))
->Resize(ctx.Input<Tensor>("Y")->dims());
} }
}; };
...@@ -51,5 +62,7 @@ protected: ...@@ -51,5 +62,7 @@ protected:
} // namespace paddle } // namespace paddle
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<ops::CPUPlace, float>);
REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad);
REGISTER_OP_CPU_KERNEL(softmax_grad,
ops::SoftmaxGradKernel<ops::CPUPlace, float>);
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/softmax_op.h" #include "paddle/operators/softmax_op.h"
REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel<ops::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(softmax_grad, ops::SoftmaxGradKernel<ops::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/ddim.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/tensor.h"
#include "paddle/operators/type_alias.h" #include "paddle/operators/type_alias.h"
namespace paddle { namespace paddle {
...@@ -23,8 +26,8 @@ template <typename Place, typename T> ...@@ -23,8 +26,8 @@ template <typename Place, typename T>
class SoftmaxKernel : public OpKernel { class SoftmaxKernel : public OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const ExecutionContext& context) const override {
auto input = context.Input<Tensor>(0); auto input = context.Input<Tensor>("X");
auto output = context.Output<Tensor>(0); auto output = context.Output<Tensor>("Y");
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
auto logits = EigenMatrix<T>::From(*input); auto logits = EigenMatrix<T>::From(*input);
...@@ -46,9 +49,9 @@ public: ...@@ -46,9 +49,9 @@ public:
.reshape(batch_by_one) .reshape(batch_by_one)
.broadcast(one_by_class)); .broadcast(one_by_class));
softmax.device(*(context.GetEigenDevice<Place>())) = shifted_logits.exp(); softmax.device(context.GetEigenDevice<Place>()) = shifted_logits.exp();
softmax.device(*(context.GetEigenDevice<Place>())) = softmax.device(context.GetEigenDevice<Place>()) =
(softmax * (softmax *
softmax.sum(along_class) softmax.sum(along_class)
.inverse() .inverse()
...@@ -57,5 +60,38 @@ public: ...@@ -57,5 +60,38 @@ public:
.broadcast(one_by_class)); .broadcast(one_by_class));
} }
}; };
template <typename Place, typename T>
class SoftmaxGradKernel : public OpKernel {
public:
void Compute(const ExecutionContext& context) const override {
std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>();
auto Y = context.Input<Tensor>("Y");
auto dY = context.Input<Tensor>(OperatorBase::GRAD_VAR_NAME("Y"));
auto dX = context.Output<Tensor>(OperatorBase::GRAD_VAR_NAME("X"));
dX->mutable_data<T>(context.GetPlace());
const int batch_size = Y->dims()[0];
const int class_num = Y->dims()[1];
Eigen::DSizes<int, 1> along_class(1);
Eigen::DSizes<int, 2> batch_by_one(batch_size, 1);
Eigen::DSizes<int, 2> one_by_class(1, class_num);
auto Y_eigen = EigenMatrix<T>::From(*Y);
auto dY_eigen = EigenMatrix<T>::From(*dY);
auto dX_eigen = EigenMatrix<T>::From(*dX);
auto place = context.GetEigenDevice<Place>();
auto dot = (Y_eigen * dY_eigen)
.sum(along_class)
.eval()
.reshape(batch_by_one)
.broadcast(one_by_class);
dX_eigen.device(place) = (dY_eigen - dot) * Y_eigen;
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -15,13 +15,14 @@ ...@@ -15,13 +15,14 @@
#pragma once #pragma once
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using OpKernel = framework::OpKernel; using OpKernel = framework::OpKernel;
using OperatorBase = framework::OperatorBase;
using InferShapeContext = framework::InferShapeContext; using InferShapeContext = framework::InferShapeContext;
using ExecutionContext = framework::ExecutionContext; using ExecutionContext = framework::ExecutionContext;
using Variable = framework::Variable; using Variable = framework::Variable;
...@@ -43,14 +44,16 @@ template <typename T, ...@@ -43,14 +44,16 @@ template <typename T,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>; using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using Scope = framework::Scope;
using OperatorWithKernel = framework::OperatorWithKernel; using OperatorWithKernel = framework::OperatorWithKernel;
using OperatorBase = framework::OperatorBase;
using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker;
using OpProto = framework::OpProto; using OpProto = framework::OpProto;
using OpAttrChecker = framework::OpAttrChecker; using OpAttrChecker = framework::OpAttrChecker;
using CPUPlace = platform::CPUPlace; using CPUPlace = platform::CPUPlace;
using GPUPlace = platform::GPUPlace; using GPUPlace = platform::GPUPlace;
using NetOp = framework::NetOp;
using OpRegistry = framework::OpRegistry; using OpRegistry = framework::OpRegistry;
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -144,12 +144,12 @@ inline void throw_on_error(T e) { ...@@ -144,12 +144,12 @@ inline void throw_on_error(T e) {
throw_on_error(e, ""); throw_on_error(e, "");
} }
#define PADDLE_THROW(...) \ #define PADDLE_THROW(...) \
do { \ do { \
throw ::paddle::platform::EnforceNotMet( \ throw ::paddle::platform::EnforceNotMet( \
std::make_exception_ptr( \ std::make_exception_ptr( \
std::runtime_error(string::Sprintf(__VA_ARGS__))), \ std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \
__FILE__, __LINE__); \ __FILE__, __LINE__); \
} while (0) } while (0)
#define PADDLE_ENFORCE(...) \ #define PADDLE_ENFORCE(...) \
......
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python cc_library(paddle_pybind SHARED
add_op fc_op sgd_op cross_entropy_op recurrent_network_op fill_zeros_like_op) SRCS pybind.cc
DEPS pybind python backward
fc_op
sgd_op
add_op
mean_op
cross_entropy_op
recurrent_op
fill_zeros_like_op)
...@@ -69,7 +69,7 @@ cat <<EOF ...@@ -69,7 +69,7 @@ cat <<EOF
Installing ... Installing ...
======================================== ========================================
EOF EOF
make install make install -j `nproc`
pip install /usr/local/opt/paddle/share/wheels/*.whl pip install /usr/local/opt/paddle/share/wheels/*.whl
paddle version paddle version
...@@ -122,7 +122,7 @@ cat <<EOF ...@@ -122,7 +122,7 @@ cat <<EOF
Generating .deb package ... Generating .deb package ...
======================================== ========================================
EOF EOF
cpack -D CPACK_GENERATOR='DEB' .. cpack -D CPACK_GENERATOR='DEB' -j `nproc` ..
cat <<EOF cat <<EOF
...@@ -148,7 +148,7 @@ cat >> /paddle/build/Dockerfile <<EOF ...@@ -148,7 +148,7 @@ cat >> /paddle/build/Dockerfile <<EOF
ADD *.deb / ADD *.deb /
# run paddle version to install python packages first # run paddle version to install python packages first
RUN apt-get update &&\ RUN apt-get update &&\
apt-get install -y python-pip && pip install -U pip && \ apt-get install -y wget python-pip && pip install -U pip && \
dpkg -i /*.deb ; apt-get install -f -y && \ dpkg -i /*.deb ; apt-get install -f -y && \
apt-get clean -y && \ apt-get clean -y && \
rm -f /*.deb && \ rm -f /*.deb && \
......
...@@ -6,14 +6,14 @@ mkdir -p $TRAVIS_BUILD_DIR/build ...@@ -6,14 +6,14 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build
# Compile paddle binaries first # Compile paddle binaries first
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF
mkdir output mkdir output
make -j `nproc` make -j `nproc`
find .. -name '*whl' | xargs pip install # install all wheels. find .. -name '*whl' | xargs pip install # install all wheels.
rm -rf * rm -rf *
# Compile Documentation only. # Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON
make -j `nproc` paddle_docs paddle_docs_cn make -j `nproc` paddle_docs paddle_docs_cn
# check websites for broken links # check websites for broken links
......
...@@ -298,6 +298,11 @@ message DetectionOutputConfig { ...@@ -298,6 +298,11 @@ message DetectionOutputConfig {
optional uint32 width = 9 [default = 1]; optional uint32 width = 9 [default = 1];
} }
message ClipConfig {
required double min = 1;
required double max = 2;
}
message LayerInputConfig { message LayerInputConfig {
required string input_layer_name = 1; required string input_layer_name = 1;
optional string input_parameter_name = 2; optional string input_parameter_name = 2;
...@@ -318,6 +323,7 @@ message LayerInputConfig { ...@@ -318,6 +323,7 @@ message LayerInputConfig {
optional RowConvConfig row_conv_conf = 15; optional RowConvConfig row_conv_conf = 15;
optional MultiBoxLossConfig multibox_loss_conf = 16; optional MultiBoxLossConfig multibox_loss_conf = 16;
optional DetectionOutputConfig detection_output_conf = 17; optional DetectionOutputConfig detection_output_conf = 17;
optional ClipConfig clip_conf = 18;
} }
message LayerConfig { message LayerConfig {
......
...@@ -2198,6 +2198,20 @@ class RowConvLayer(LayerBase): ...@@ -2198,6 +2198,20 @@ class RowConvLayer(LayerBase):
self.create_input_parameter(0, psize, dims) self.create_input_parameter(0, psize, dims)
@config_layer('clip')
class ClipLayer(LayerBase):
def __init__(self, name, inputs, min, max, **xargs):
super(ClipLayer, self).__init__(name, 'clip', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'ClipLayer must have one and only one input.')
config_assert(min < max, 'min must be less than max.')
input_layer = self.get_input_layer(0)
self.set_layer_size(input_layer.size)
self.config.inputs[0].clip_conf.min = min
self.config.inputs[0].clip_conf.max = max
# key: cost type # key: cost type
# value: cost class # value: cost class
g_cost_map = {} g_cost_map = {}
...@@ -2754,6 +2768,16 @@ class SumToOneNormLayer(LayerBase): ...@@ -2754,6 +2768,16 @@ class SumToOneNormLayer(LayerBase):
self.set_layer_size(input_layer0.size) self.set_layer_size(input_layer0.size)
@config_layer('row_l2_norm')
class RowL2NormLayer(LayerBase):
def __init__(self, name, inputs, **xargs):
super(RowL2NormLayer, self).__init__(
name, 'row_l2_norm', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'RowL2NormLayer must have 1 input')
input_layer = self.get_input_layer(0)
self.set_layer_size(input_layer.size)
@config_layer('cos_vm') @config_layer('cos_vm')
class CosSimVecMatLayer(LayerBase): class CosSimVecMatLayer(LayerBase):
def __init__(self, name, size, inputs, cos_scale=1.0, device=None): def __init__(self, name, size, inputs, cos_scale=1.0, device=None):
......
...@@ -76,6 +76,7 @@ __all__ = [ ...@@ -76,6 +76,7 @@ __all__ = [
'trans_layer', 'trans_layer',
'rotate_layer', 'rotate_layer',
'sum_to_one_norm_layer', 'sum_to_one_norm_layer',
'row_l2_norm_layer',
'get_output_layer', 'get_output_layer',
'LayerType', 'LayerType',
'context_projection', 'context_projection',
...@@ -128,6 +129,7 @@ __all__ = [ ...@@ -128,6 +129,7 @@ __all__ = [
'prelu_layer', 'prelu_layer',
'gated_unit_layer', 'gated_unit_layer',
'crop_layer', 'crop_layer',
'clip_layer',
'slice_projection', 'slice_projection',
] ]
...@@ -160,6 +162,7 @@ class LayerType(object): ...@@ -160,6 +162,7 @@ class LayerType(object):
BATCH_NORM_LAYER = 'batch_norm' BATCH_NORM_LAYER = 'batch_norm'
NORM_LAYER = 'norm' NORM_LAYER = 'norm'
SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
ROW_L2_NORM_LAYER = 'row_l2_norm'
ADDTO_LAYER = 'addto' ADDTO_LAYER = 'addto'
CONCAT_LAYER = 'concat' CONCAT_LAYER = 'concat'
...@@ -221,6 +224,7 @@ class LayerType(object): ...@@ -221,6 +224,7 @@ class LayerType(object):
PRELU = 'prelu' PRELU = 'prelu'
CROP_LAYER = 'crop' CROP_LAYER = 'crop'
CLIP_LAYER = 'clip'
@staticmethod @staticmethod
def is_layer_type(type_name): def is_layer_type(type_name):
...@@ -2889,6 +2893,42 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): ...@@ -2889,6 +2893,42 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size) name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size)
@wrap_name_default()
@layer_support()
def row_l2_norm_layer(input, name=None, layer_attr=None):
"""
A layer for L2-normalization in each row.
.. math::
out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}}
where the size of :math:`in` is (batchSize x dataDim) ,
and the size of :math:`out` is a (batchSize x dataDim) .
The example usage is:
.. code-block:: python
row_l2_norm_layer = row_l2_norm_layer(input=layer)
:param input: Input layer.
:type input: LayerOutput
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer(
name=name,
type=LayerType.ROW_L2_NORM_LAYER,
inputs=[input.name],
**ExtraAttr.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.ROW_L2_NORM_LAYER, parents=[input], size=input.size)
@wrap_name_default("addto") @wrap_name_default("addto")
@wrap_act_default(act=LinearActivation()) @wrap_act_default(act=LinearActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
...@@ -6046,3 +6086,36 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): ...@@ -6046,3 +6086,36 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
layer_type=LayerType.CROP_LAYER, layer_type=LayerType.CROP_LAYER,
parents=input, parents=input,
size=l.config.size) size=l.config.size)
@wrap_name_default("clip")
def clip_layer(input, min, max, name=None):
"""
A layer for clipping the input value by the threshold.
.. math::
out[i] = \min\left(\max\left(in[i],p_{1}\right),p_{2}\right)
.. code-block:: python
clip = clip_layer(input=input_layer, min=-10, max=10)
:param name: The Layer Name.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput.
:param min: The lower threshold for clipping.
:type min: double
:param max: The upper threshold for clipping.
:type max: double
:return: LayerOutput
"""
Layer(
name=name,
type=LayerType.CLIP_LAYER,
inputs=[input.name],
min=min,
max=max)
return LayerOutput(
name, LayerType.CLIP_LAYER, parents=[input], size=input.size)
...@@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight ...@@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer) test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__clip_0__"
type: "clip"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
clip_conf {
min: -10
max: 10
}
}
}
input_layer_names: "input"
output_layer_names: "__clip_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__clip_0__"
input_layer_names: "input"
output_layer_names: "__clip_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__row_l2_norm_layer_0__"
type: "row_l2_norm"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
}
}
input_layer_names: "input"
output_layer_names: "__row_l2_norm_layer_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__row_l2_norm_layer_0__"
input_layer_names: "input"
output_layer_names: "__row_l2_norm_layer_0__"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
clip = clip_layer(input=data, min=-10, max=10)
outputs(clip)
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
row_l2_norm = row_l2_norm_layer(input=data)
outputs(row_l2_norm)
...@@ -8,8 +8,8 @@ add_python_test(test_framework ...@@ -8,8 +8,8 @@ add_python_test(test_framework
test_fc_op.py test_fc_op.py
test_add_two_op.py test_add_two_op.py
test_sgd_op.py test_sgd_op.py
test_cross_entropy_op.py
test_mul_op.py test_mul_op.py
test_mean_op.py
test_sigmoid_op.py test_sigmoid_op.py
test_softmax_op.py test_softmax_op.py
test_rowwise_add_op.py test_rowwise_add_op.py
......
...@@ -26,40 +26,45 @@ class OpTestMeta(type): ...@@ -26,40 +26,45 @@ class OpTestMeta(type):
scope = core.Scope() scope = core.Scope()
kwargs = dict() kwargs = dict()
places = []
places.append(core.CPUPlace())
if core.is_compile_gpu():
places.append(core.GPUPlace(0))
for in_name in func.all_input_args: for place in places:
if hasattr(self, in_name): for in_name in func.all_input_args:
kwargs[in_name] = in_name if hasattr(self, in_name):
var = scope.new_var(in_name).get_tensor() kwargs[in_name] = in_name
arr = getattr(self, in_name) var = scope.new_var(in_name).get_tensor()
var.set_dims(arr.shape) arr = getattr(self, in_name)
var.set(arr) var.set_dims(arr.shape)
else: var.set(arr, place)
kwargs[in_name] = "@EMPTY@" else:
kwargs[in_name] = "@EMPTY@"
for out_name in func.all_output_args: for out_name in func.all_output_args:
if hasattr(self, out_name): if hasattr(self, out_name):
kwargs[out_name] = out_name kwargs[out_name] = out_name
scope.new_var(out_name).get_tensor() scope.new_var(out_name).get_tensor()
for attr_name in func.all_attr_args: for attr_name in func.all_attr_args:
if hasattr(self, attr_name): if hasattr(self, attr_name):
kwargs[attr_name] = getattr(self, attr_name) kwargs[attr_name] = getattr(self, attr_name)
op = func(**kwargs) op = func(**kwargs)
op.infer_shape(scope) op.infer_shape(scope)
ctx = core.DeviceContext.cpu_context() ctx = core.DeviceContext.create(place)
op.run(scope, ctx) op.run(scope, ctx)
for out_name in func.all_output_args: for out_name in func.all_output_args:
actual = numpy.array(scope.find_var(out_name).get_tensor()) actual = numpy.array(scope.find_var(out_name).get_tensor())
expect = getattr(self, out_name) expect = getattr(self, out_name)
# TODO(qijun) The default decimal is 7, but numpy.dot and eigen.mul # TODO(qijun) The default decimal is 7, but numpy.dot and eigen.mul
# has some diff, and could not pass unittest. So I set decimal 3 here. # has some diff, and could not pass unittest. So I set decimal 3 here.
# And I will check this in future. # And I will check this in future.
numpy.testing.assert_almost_equal(actual, expect, decimal=3) numpy.testing.assert_almost_equal(actual, expect, decimal=3)
obj.test_all = test_all obj.test_all = test_all
return obj return obj
import unittest import unittest
from op_test_util import OpTestMeta
import numpy import numpy
import paddle.v2.framework.core as core
import paddle.v2.framework.create_op_creation_methods as creation
from op_test_util import OpTestMeta
class TestAddOp(unittest.TestCase): class TestAddOp(unittest.TestCase):
...@@ -8,10 +12,19 @@ class TestAddOp(unittest.TestCase): ...@@ -8,10 +12,19 @@ class TestAddOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "add_two" self.type = "add_two"
self.X = numpy.random.random((342, 345)).astype("float32") self.X = numpy.random.random((102, 105)).astype("float32")
self.Y = numpy.random.random((342, 345)).astype("float32") self.Y = numpy.random.random((102, 105)).astype("float32")
self.Out = self.X + self.Y self.Out = self.X + self.Y
class TestAddGradOp(unittest.TestCase):
def test_add_grad(self):
op = creation.op_creations.add_two(X="X", Y="Y", Out="Out")
backward_op = core.Operator.backward(op, set())
self.assertEqual(backward_op.type(), "add_two_grad")
expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).'''
self.assertEqual(expected, str(backward_op))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -7,17 +7,19 @@ import paddle.v2.framework.create_op_creation_methods as creation ...@@ -7,17 +7,19 @@ import paddle.v2.framework.create_op_creation_methods as creation
class TestFc(unittest.TestCase): class TestFc(unittest.TestCase):
def test_fc(self): def test_fc(self):
scope = core.Scope() scope = core.Scope()
place = core.CPUPlace()
x = scope.new_var("X") x = scope.new_var("X")
x_tensor = x.get_tensor() x_tensor = x.get_tensor()
x_tensor.set_dims([1000, 784]) x_tensor.set_dims([1000, 784])
x_tensor.alloc_float() x_tensor.alloc_float(place)
w = scope.new_var("W") w = scope.new_var("W")
w_tensor = w.get_tensor() w_tensor = w.get_tensor()
w_tensor.set_dims([784, 100]) w_tensor.set_dims([784, 100])
w_tensor.alloc_float() w_tensor.alloc_float(place)
w_tensor.set(numpy.random.random((784, 100)).astype("float32")) w_tensor.set(numpy.random.random((784, 100)).astype("float32"), place)
# Set a real numpy array here. # Set a real numpy array here.
# x_tensor.set(numpy.array([])) # x_tensor.set(numpy.array([]))
...@@ -32,7 +34,7 @@ class TestFc(unittest.TestCase): ...@@ -32,7 +34,7 @@ class TestFc(unittest.TestCase):
op.infer_shape(scope) op.infer_shape(scope)
self.assertEqual([1000, 100], tensor.shape()) self.assertEqual([1000, 100], tensor.shape())
ctx = core.DeviceContext.cpu_context() ctx = core.DeviceContext.create(place)
op.run(scope, ctx) op.run(scope, ctx)
......
import unittest
from op_test_util import OpTestMeta
import numpy as np
class TestMeanOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "mean"
self.X = np.random.random((32, 784)).astype("float32")
self.Out = np.mean(self.X)
if __name__ == '__main__':
unittest.main()
...@@ -8,8 +8,8 @@ class TestMulOp(unittest.TestCase): ...@@ -8,8 +8,8 @@ class TestMulOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "mul" self.type = "mul"
self.X = np.random.random((32, 784)).astype("float32") self.X = np.random.random((32, 84)).astype("float32")
self.Y = np.random.random((784, 100)).astype("float32") self.Y = np.random.random((84, 100)).astype("float32")
self.Out = np.dot(self.X, self.Y) self.Out = np.dot(self.X, self.Y)
......
...@@ -8,8 +8,8 @@ class TestRowwiseAddOp(unittest.TestCase): ...@@ -8,8 +8,8 @@ class TestRowwiseAddOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "rowwise_add" self.type = "rowwise_add"
self.X = np.random.random((32, 784)).astype("float32") self.X = np.random.random((32, 84)).astype("float32")
self.b = np.random.random(784).astype("float32") self.b = np.random.random(84).astype("float32")
self.Out = np.add(self.X, self.b) self.Out = np.add(self.X, self.b)
......
...@@ -8,8 +8,8 @@ class TestSGD(unittest.TestCase): ...@@ -8,8 +8,8 @@ class TestSGD(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "sgd" self.type = "sgd"
self.param = numpy.random.random((342, 345)).astype("float32") self.param = numpy.random.random((102, 105)).astype("float32")
self.grad = numpy.random.random((342, 345)).astype("float32") self.grad = numpy.random.random((102, 105)).astype("float32")
self.learning_rate = 0.1 self.learning_rate = 0.1
self.param_out = self.param - self.learning_rate * self.grad self.param_out = self.param - self.learning_rate * self.grad
......
import unittest import unittest
from op_test_util import OpTestMeta
import numpy as np import numpy as np
import paddle.v2.framework.core as core
import paddle.v2.framework.create_op_creation_methods as creation
from op_test_util import OpTestMeta
def stable_softmax(x): def stable_softmax(x):
...@@ -19,5 +23,63 @@ class TestSoftmaxOp(unittest.TestCase): ...@@ -19,5 +23,63 @@ class TestSoftmaxOp(unittest.TestCase):
self.Y = np.apply_along_axis(stable_softmax, 1, self.X) self.Y = np.apply_along_axis(stable_softmax, 1, self.X)
class TestSoftmaxGradOp(unittest.TestCase):
def test_softmax_grad(self):
op = creation.op_creations.softmax(X="X", Y="Y")
backward_op = core.Operator.backward(op, set())
self.assertEqual(backward_op.type(), "softmax_grad")
expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).'''
self.assertEqual(expected, str(backward_op))
batch_size = 3
class_num = 5
# Initialize X and add 1e-2 for numerical stability
Y = np.random.rand(batch_size, class_num).astype(np.float32)
Y = Y + 1e-2
dY = np.random.rand(batch_size, class_num).astype(np.float32)
# Reference implementation of cross entropy with soft labels
def label_softmax_grad(Y, dY):
dX = Y * 0.0
for i in range(batch_size):
d = np.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return dX
expected = label_softmax_grad(Y, dY)
scope = core.Scope()
places = []
places.append(core.CPUPlace())
if core.is_compile_gpu():
places.append(core.GPUPlace(0))
for place in places:
y = scope.new_var("Y")
y_tensor = y.get_tensor()
y_tensor.set_dims([batch_size, class_num])
y_tensor.alloc_float(place)
y_tensor.set(Y, place)
dy = scope.new_var("Y@GRAD")
dy_tensor = dy.get_tensor()
dy_tensor.set_dims([batch_size, class_num])
dy_tensor.alloc_float(place)
dy_tensor.set(dY, place)
x = scope.new_var("X")
dx = scope.new_var("X@GRAD")
tensor = scope.find_var("X@GRAD").get_tensor()
backward_op.infer_shape(scope)
self.assertEqual([batch_size, class_num], tensor.shape())
ctx = core.DeviceContext.create(place)
backward_op.run(scope, ctx)
actual = np.array(tensor)
np.testing.assert_almost_equal(actual, expected, decimal=3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -7,16 +7,17 @@ class TestScope(unittest.TestCase): ...@@ -7,16 +7,17 @@ class TestScope(unittest.TestCase):
def test_int_tensor(self): def test_int_tensor(self):
scope = core.Scope() scope = core.Scope()
var = scope.new_var("test_tensor") var = scope.new_var("test_tensor")
place = core.CPUPlace()
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([1000, 784]) tensor.set_dims([1000, 784])
tensor.alloc_int() tensor.alloc_int(place)
tensor_array = numpy.array(tensor) tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape) self.assertEqual((1000, 784), tensor_array.shape)
tensor_array[3, 9] = 1 tensor_array[3, 9] = 1
tensor_array[19, 11] = 2 tensor_array[19, 11] = 2
tensor.set(tensor_array) tensor.set(tensor_array, place)
tensor_array_2 = numpy.array(tensor) tensor_array_2 = numpy.array(tensor)
self.assertEqual(1.0, tensor_array_2[3, 9]) self.assertEqual(1.0, tensor_array_2[3, 9])
...@@ -25,16 +26,18 @@ class TestScope(unittest.TestCase): ...@@ -25,16 +26,18 @@ class TestScope(unittest.TestCase):
def test_float_tensor(self): def test_float_tensor(self):
scope = core.Scope() scope = core.Scope()
var = scope.new_var("test_tensor") var = scope.new_var("test_tensor")
place = core.CPUPlace()
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([1000, 784]) tensor.set_dims([1000, 784])
tensor.alloc_float() tensor.alloc_float(place)
tensor_array = numpy.array(tensor) tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape) self.assertEqual((1000, 784), tensor_array.shape)
tensor_array[3, 9] = 1.0 tensor_array[3, 9] = 1.0
tensor_array[19, 11] = 2.0 tensor_array[19, 11] = 2.0
tensor.set(tensor_array) tensor.set(tensor_array, place)
tensor_array_2 = numpy.array(tensor) tensor_array_2 = numpy.array(tensor)
self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) self.assertAlmostEqual(1.0, tensor_array_2[3, 9])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册