提交 2762959f 编写于 作者: L Liu Yiqun

Merge branch 'develop' into core_inference_prepare

...@@ -62,29 +62,33 @@ endif() ...@@ -62,29 +62,33 @@ endif()
## Then find the reference-cblas. www.netlib.org/blas/ ## Then find the reference-cblas. www.netlib.org/blas/
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas") "Folder contains reference-cblas")
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS if(NOT CMAKE_CROSSCOMPILING)
${REFERENCE_CBLAS_ROOT}/include set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
/usr/include ${REFERENCE_CBLAS_ROOT}/include
/usr/include/cblas /usr/include
) /usr/include/cblas
)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/lib set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
/usr/lib ${REFERENCE_CBLAS_ROOT}/lib
/usr/lib/blas/reference/ /usr/lib
/usr/lib/reference/ /usr/lib/blas/reference/
) /usr/lib/reference/
)
else()
# Diable the finding of reference cblas under host's system path
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
endif()
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) ${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER REFERENCE) set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
......
...@@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) ...@@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
IF(APPLE) IF(APPLE)
SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh) SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE() ELSE()
SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin) SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin)
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
extern_grpc extern_grpc
DEPENDS protobuf zlib DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git" GIT_REPOSITORY "https://github.com/grpc/grpc.git"
GIT_TAG "v1.8.x" GIT_TAG "v1.11.x"
PREFIX ${GRPC_SOURCES_DIR} PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -11,19 +11,20 @@ ...@@ -11,19 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
IF(MOBILE_INFERENCE) if(MOBILE_INFERENCE OR RPI)
return() return()
ENDIF() endif()
include (ExternalProject) include (ExternalProject)
# NOTE: snappy is needed when linking with recordio # NOTE: snappy is needed when linking with recordio
SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy) set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE) set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
ExternalProject_Add( ExternalProject_Add(
extern_snappy extern_snappy
...@@ -51,8 +52,7 @@ ExternalProject_Add( ...@@ -51,8 +52,7 @@ ExternalProject_Add(
) )
add_library(snappy STATIC IMPORTED GLOBAL) add_library(snappy STATIC IMPORTED GLOBAL)
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
"${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
include_directories(${SNAPPY_INCLUDE_DIR}) include_directories(${SNAPPY_INCLUDE_DIR})
add_dependencies(snappy extern_snappy) add_dependencies(snappy extern_snappy)
...@@ -11,9 +11,8 @@ ...@@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
IF(MOBILE_INFERENCE) IF(MOBILE_INFERENCE OR RPI)
return() return()
ENDIF() ENDIF()
...@@ -21,9 +20,11 @@ include (ExternalProject) ...@@ -21,9 +20,11 @@ include (ExternalProject)
# NOTE: snappy is needed when linking with recordio # NOTE: snappy is needed when linking with recordio
SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream) set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE) set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE)
set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
ExternalProject_Add( ExternalProject_Add(
extern_snappystream extern_snappystream
...@@ -51,8 +52,7 @@ ExternalProject_Add( ...@@ -51,8 +52,7 @@ ExternalProject_Add(
) )
add_library(snappystream STATIC IMPORTED GLOBAL) add_library(snappystream STATIC IMPORTED GLOBAL)
set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES})
"${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers. include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers. include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers.
......
...@@ -195,14 +195,7 @@ function(cc_library TARGET_NAME) ...@@ -195,14 +195,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc) list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc) add_dependencies(${TARGET_NAME} warpctc)
endif() endif()
if("${cc_library_DEPS}" MATCHES "ARCHIVE_START") target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif() endif()
...@@ -243,11 +236,7 @@ function(cc_test TARGET_NAME) ...@@ -243,11 +236,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
endif()
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set_property(GLOBAL PROPERTY FLUID_MODULES "") set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library # find all fluid modules is used for paddle fluid static library
function(find_fluid_modules TARGET_NAME) function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos) string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1) if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
...@@ -77,6 +92,23 @@ elseif (WITH_MKLML) ...@@ -77,6 +92,23 @@ elseif (WITH_MKLML)
) )
endif() endif()
if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
copy(snappy_lib
SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream")
copy(snappystream_lib
SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib")
copy(zlib_lib
SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
endif()
# paddle fluid module # paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid") set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid")
......
...@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY) ...@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif() endif()
add_subdirectory(testing) add_subdirectory(testing)
if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS) if(NOT MOBILE_INFERENCE AND NOT RPI)
add_subdirectory(fluid) add_subdirectory(fluid)
endif() endif()
...@@ -3,6 +3,7 @@ add_subdirectory(platform) ...@@ -3,6 +3,7 @@ add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(pybind) add_subdirectory(pybind)
add_subdirectory(inference)
add_subdirectory(string) add_subdirectory(string)
add_subdirectory(recordio) add_subdirectory(recordio)
# NOTE: please add subdirectory inference at last.
add_subdirectory(inference)
...@@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
COMMENT "Copy generated python proto into directory paddle/fluid/proto." COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto backward glog lod_rank_table feed_fetch_method) framework_proto glog lod_rank_table feed_fetch_method)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor) cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor)
......
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
namespace paddle {
namespace framework {
// Create the backward operator from a forward operator.
// TODO(yuyang18): Add more API reference comment.
extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars);
struct GradVarInfo {
GradVarInfo() {}
GradVarInfo(const std::string& name, int block_idx, int op_idx)
: name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
bool operator==(const GradVarInfo& b) const {
return name_ == b.name_ && block_idx_ == b.block_idx_ &&
op_idx_ == b.op_idx_;
}
std::string name_;
int block_idx_;
int op_idx_;
};
using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward(
ProgramDesc& program_desc, const VarDesc& target,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework
} // namespace paddle
此差异已折叠。
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/computation_op_handle.h"
#include <string>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() { ...@@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() {
} }
} }
op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get<Scope *>(), place_); op_->Run(*scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(), place_);
} }
std::string ComputationOpHandle::Name() const { return op_->Type(); } std::string ComputationOpHandle::Name() const { return op_->Type(); }
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h"
#include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() { ...@@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() {
for (size_t i = 0; i < scopes.size(); ++i) { for (size_t i = 0; i < scopes.size(); ++i) {
auto &scope = scopes[i]; auto &scope = scopes[i];
auto &t = scope->FindVar(var_name)->Get<framework::LoDTensor>(); auto &t = scope->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(var_name)
->Get<framework::LoDTensor>();
if (platform::is_gpu_place(var->place_)) { if (platform::is_gpu_place(var->place_)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]); TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]);
......
...@@ -24,6 +24,8 @@ namespace paddle { ...@@ -24,6 +24,8 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@";
class OpHandleBase { class OpHandleBase {
private: private:
DISABLE_COPY_AND_ASSIGN(OpHandleBase); DISABLE_COPY_AND_ASSIGN(OpHandleBase);
......
...@@ -15,13 +15,15 @@ ...@@ -15,13 +15,15 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/ssa_graph.h" #include "paddle/fluid/framework/details/ssa_graph.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
class SSAGraphExecutor { class SSAGraphExecutor {
DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor);
......
...@@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
ready_ops.clear(); ready_ops.clear();
}; };
// Create local scopes.
for (auto &scope : local_scopes_) {
auto &local_scope = scope->NewScope();
*scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>() = &local_scope;
}
// Step 3. Execution // Step 3. Execution
while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) { while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) {
// 1. Run All Ready ops // 1. Run All Ready ops
...@@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
PADDLE_ENFORCE(ready_ops.empty()); PADDLE_ENFORCE(ready_ops.empty());
PADDLE_ENFORCE(delayed_ops.empty()); PADDLE_ENFORCE(delayed_ops.empty());
PADDLE_ENFORCE(blocked_by_delayed_ops.empty()); PADDLE_ENFORCE(blocked_by_delayed_ops.empty());
++computation_count_;
auto sync_computation = [&] {
computation_count_ = 0;
// Wait All computational streams
for (auto p : this->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : local_scopes_) {
scope->DropKids();
}
};
// Wait FetchOps. // Wait FetchOps.
if (!fetch_ops.empty()) { if (!fetch_ops.empty()) {
fetch_ops.clear(); fetch_ops.clear();
sync_computation();
}
if (computation_count_ == max_async_computation) {
sync_computation();
}
// NOTE: the temp scope can be dropped lazily if needed.
// Drop tmp scopes;
for (auto &scope : local_scopes_) {
auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>();
kid = nullptr;
} }
return fetch_data; return fetch_data;
......
...@@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ...@@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::unique_ptr<platform::EnforceNotMet> exception_; std::unique_ptr<platform::EnforceNotMet> exception_;
std::atomic<int> running_ops_; std::atomic<int> running_ops_;
bool allow_op_delay_; bool allow_op_delay_;
size_t computation_count_{0};
size_t max_async_computation{100};
}; };
} // namespace details } // namespace details
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include <string> #include <string>
#include <tuple>
#include <vector> #include <vector>
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -41,6 +42,8 @@ class ParallelExecutorPrivate { ...@@ -41,6 +42,8 @@ class ParallelExecutorPrivate {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif #endif
std::vector<std::tuple<std::string, proto::VarType::Type, bool>> var_types_;
}; };
std::vector<Scope *> &ParallelExecutor::GetLocalScopes() { std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
...@@ -97,14 +100,9 @@ ParallelExecutor::ParallelExecutor( ...@@ -97,14 +100,9 @@ ParallelExecutor::ParallelExecutor(
allow_op_delay)); allow_op_delay));
// Step 3. Create vars in each scope; // Step 3. Create vars in each scope;
for (auto *scope : member_->local_scopes_) { for (auto *var : main_program.Block(0).AllVars()) {
for (auto *var : main_program.Block(0).AllVars()) { member_->var_types_.emplace_back(var->Name(), var->GetType(),
if (scope->FindVar(var->Name()) != nullptr) { var->Persistable());
continue;
}
InitializeVariable(scope->Var(var->Name()), var->GetType());
}
} }
} }
...@@ -163,9 +161,42 @@ void ParallelExecutor::Run( ...@@ -163,9 +161,42 @@ void ParallelExecutor::Run(
const std::unordered_map<std::string, LoDTensor> &feed_tensors) { const std::unordered_map<std::string, LoDTensor> &feed_tensors) {
platform::RecordBlock b(0); platform::RecordBlock b(0);
SplitTensorToPlaces(feed_tensors); SplitTensorToPlaces(feed_tensors);
// Create local scopes.
for (auto &scope : member_->local_scopes_) {
Scope &local_scope = scope->NewScope();
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>() =
&local_scope;
for (auto &name_type_pair : member_->var_types_) {
if (scope->FindVar(std::get<0>(name_type_pair)) != nullptr) {
continue;
}
if (std::get<2>(name_type_pair)) { // Persistable
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
} else {
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
}
}
}
auto fetch_data = member_->executor_->Run(fetch_tensors); auto fetch_data = member_->executor_->Run(fetch_tensors);
*member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() = *member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() =
fetch_data; fetch_data;
// Wait All computational streams
for (auto p : member_->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : member_->local_scopes_) {
auto &local_scope =
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
scope->DeleteScope(local_scope);
local_scope = nullptr;
}
} }
void ParallelExecutor::SplitTensorToPlaces( void ParallelExecutor::SplitTensorToPlaces(
......
...@@ -14,18 +14,17 @@ limitations under the License. */ ...@@ -14,18 +14,17 @@ limitations under the License. */
#include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/prune.h"
#include <gtest/gtest.h>
#include <string>
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include <gtest/gtest.h>
namespace f = paddle::framework; namespace f = paddle::framework;
namespace ops = paddle::operators;
void AddOp(const std::string &type, const f::VariableNameMap &inputs, void AddOp(const std::string &type, const f::VariableNameMap &inputs,
const f::VariableNameMap &outputs, f::AttributeMap attrs, const f::VariableNameMap &outputs, f::AttributeMap attrs,
......
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor prune init) set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(paddle_fluid_api cc_library(paddle_fluid_api
SRCS io.cc SRCS io.cc
...@@ -11,7 +11,7 @@ cc_library(paddle_fluid DEPS ${fluid_modules}) ...@@ -11,7 +11,7 @@ cc_library(paddle_fluid DEPS ${fluid_modules})
# Create shared library # Create shared library
cc_library(paddle_fluid_shared SHARED cc_library(paddle_fluid_shared SHARED
SRCS io.cc SRCS io.cc
DEPS ARCHIVE_START ${GLOB_OP_LIB} ${FLUID_CORE_MODULES} ARCHIVE_END) DEPS ${fluid_modules})
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE) if(NOT APPLE)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
......
...@@ -17,10 +17,16 @@ limitations under the License. */ ...@@ -17,10 +17,16 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
// Temporarilly add this function for exposing framework::InitDevices() when
// linking the inference shared library.
void Init(bool init_p2p) { framework::InitDevices(init_p2p); }
void ReadBinaryFile(const std::string& filename, std::string& contents) { void ReadBinaryFile(const std::string& filename, std::string& contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary); std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename); PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename);
......
...@@ -18,12 +18,15 @@ limitations under the License. */ ...@@ -18,12 +18,15 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
void Init(bool init_p2p);
void LoadPersistables(framework::Executor& executor, framework::Scope& scope, void LoadPersistables(framework::Executor& executor, framework::Scope& scope,
const framework::ProgramDesc& main_program, const framework::ProgramDesc& main_program,
const std::string& dirname, const std::string& dirname,
......
...@@ -17,7 +17,7 @@ function(inference_test TARGET_NAME) ...@@ -17,7 +17,7 @@ function(inference_test TARGET_NAME)
string(REGEX REPLACE "^_$" "" arg "${arg}") string(REGEX REPLACE "^_$" "" arg "${arg}")
cc_test(test_inference_${TARGET_NAME}${arg} cc_test(test_inference_${TARGET_NAME}${arg}
SRCS test_inference_${TARGET_NAME}.cc SRCS test_inference_${TARGET_NAME}.cc
DEPS ARCHIVE_START paddle_fluid ARCHIVE_END DEPS paddle_fluid
ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.inference.model) ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.inference.model)
set_tests_properties(test_inference_${TARGET_NAME}${arg} set_tests_properties(test_inference_${TARGET_NAME}${arg}
PROPERTIES DEPENDS test_${TARGET_NAME}) PROPERTIES DEPENDS test_${TARGET_NAME})
......
...@@ -100,7 +100,7 @@ function(op_library TARGET) ...@@ -100,7 +100,7 @@ function(op_library TARGET)
endif() endif()
# Define operators that don't need pybind here. # Define operators that don't need pybind here.
foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op") foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}") if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
...@@ -199,7 +199,6 @@ else() ...@@ -199,7 +199,6 @@ else()
set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op) set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op)
endif() endif()
op_library(cond_op DEPS framework_proto tensor net_op)
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(softmax_op DEPS softmax) op_library(softmax_op DEPS softmax)
...@@ -259,7 +258,6 @@ endforeach() ...@@ -259,7 +258,6 @@ endforeach()
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op) cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/scatter.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
using Scope = framework::Scope;
using Variable = framework::Variable;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DDim = framework::DDim;
framework::Scope& CondOp::AddSubScope(const Scope& scope) const {
auto sub_scopes_var = scope.FindVar("SubScopes");
PADDLE_ENFORCE_NOT_NULL(sub_scopes_var,
"Output(SubScopes) of CondOp should not be null.");
auto sub_scopes = sub_scopes_var->GetMutable<std::vector<Scope*>>();
auto& sub_scope = scope.NewScope();
sub_scopes->push_back(&sub_scope);
return sub_scope;
}
std::vector<framework::Scope*>& CondOp::GetSubScopes(
const framework::Scope& scope) const {
auto sub_scopes_var = scope.FindVar("SubScopes");
PADDLE_ENFORCE_NOT_NULL(sub_scopes_var,
"Output(SubScopes) of CondOp should not be null.");
return *sub_scopes_var->GetMutable<std::vector<framework::Scope*>>();
}
LoDTensor& CondOp::AddIndexTensor(const Scope& scope) const {
auto index_tensors_var = scope.FindVar("IndexTensors");
PADDLE_ENFORCE_NOT_NULL(index_tensors_var,
"Output(IndexTensors) of CondOp should not be null.");
auto& index_tensors =
*index_tensors_var->GetMutable<std::vector<LoDTensor>>();
index_tensors.push_back(LoDTensor());
return index_tensors.back();
}
std::vector<framework::LoDTensor>& CondOp::GetIndexTensors(
const framework::Scope& scope) const {
auto* index_tensors_var = scope.FindVar("IndexTensors");
PADDLE_ENFORCE_NOT_NULL(index_tensors_var,
"Output(IndexTensors) of CondOp should not be null.");
return *index_tensors_var->GetMutable<std::vector<framework::LoDTensor>>();
}
void CondOp::PrepareDataForSubnet(
const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const {
PADDLE_ENFORCE(!Inputs("Xs").empty(), "Inputs(Xs) of CondOp can't be empty.");
for (int i = 0; i < BRANCH_NUM; ++i) {
// Create two sub scopes for true and false branches
// sub_scopes[0] for the true branch
// sub_scopes[1] for the false branch
AddSubScope(scope);
// Create two tensors for true and false indices:
// index_tensors[0] for the true branch
// index_tensors[1] for the false branch
AddIndexTensor(scope);
}
Variable* cond_var = scope.FindVar(Input("Cond"));
PADDLE_ENFORCE_NOT_NULL(cond_var,
"Input(Cond) of CondOp should not be null.");
const LoDTensor* cond = cond_var->GetMutable<LoDTensor>();
// get the true/false index at runtime according to cond tensor
// index_vectors[0]: vector<int>, contains all index for cond[i] == true
// index_vectors[1]: vector<int>, contains all index for cond[i] == false
std::vector<std::vector<int>> index_vectors;
index_vectors.resize(BRANCH_NUM);
const int* cond_data = cond->data<int>();
for (int i = 0; i < cond->dims()[0]; ++i) {
if (cond_data[i])
index_vectors[TRUE_BRANCH].push_back(i);
else
index_vectors[FALSE_BRANCH].push_back(i);
}
// put index_vectors[0] and index_vectors[1] into two tensors:
// index_tensors[0] and index_tensors[1]
std::vector<framework::LoDTensor>& index_tensors = GetIndexTensors(scope);
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
for (int i = 0; i < BRANCH_NUM; ++i) {
DDim dim = {static_cast<int64_t>(index_vectors[i].size())};
int* index_tensor_data_ptr =
index_tensors[i].mutable_data<int>(dim, platform::CPUPlace());
memcpy(index_tensor_data_ptr, index_vectors[i].data(),
dim[0] * sizeof(int));
}
// create input in subscopes according to index_vectors
for (auto& input : Inputs("Xs")) {
Variable* var_parent = scope.FindVar(input);
PADDLE_ENFORCE_NOT_NULL(var_parent);
const auto* tensor_parent = &var_parent->Get<LoDTensor>();
for (int i = 0; i < BRANCH_NUM; ++i) {
Variable* var_child = sub_scopes[i]->FindVar(input);
PADDLE_ENFORCE_NOT_NULL(var_child);
auto* tensor_child = var_child->GetMutable<LoDTensor>();
// Resize child
DDim dim = tensor_parent->dims();
dim[0] = index_tensors[i].dims()[0];
tensor_child->mutable_data<float>(dim, platform::CPUPlace());
CPUGather<float>(dev_ctx, *tensor_parent, index_tensors[i], tensor_child);
}
}
// create output_tensors in subscope for sub_net
for (int i = 0; i < BRANCH_NUM; ++i) {
for (auto& output : (*sub_net_op_[i]).Outputs()) {
for (auto& var_name : output.second) {
sub_scopes[i]->Var(var_name);
}
}
}
}
void CondOp::MergeDataFromSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const {
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
const std::vector<framework::LoDTensor>& index_tensors =
GetIndexTensors(scope);
// Infer the output dim, out_dim[0] = true_dim[0] + false_dim[0]
PADDLE_ENFORCE(!Outputs("Outs").empty(),
"Outputs(Outs) of CondOp can't be empty.");
for (auto& output : Outputs("Outs")) {
const LoDTensor* tensor_t_out =
&sub_scopes[TRUE_BRANCH]->FindVar(output)->Get<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL");
const LoDTensor* tensor_f_out =
&sub_scopes[FALSE_BRANCH]->FindVar(output)->Get<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL");
auto* var_out = scope.FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_out, "Output not found");
LoDTensor* tensor_out = var_out->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_t_out,
"True output tensor should not be NULL");
DDim true_dim = tensor_t_out->dims();
DDim false_dim = tensor_f_out->dims();
true_dim[0] = 0;
false_dim[0] = 0;
PADDLE_ENFORCE_EQ(true_dim, false_dim,
"Outputs not of the same shape except the first dim");
DDim out_dim = tensor_t_out->dims();
out_dim[0] = tensor_t_out->dims()[0] + tensor_f_out->dims()[0];
tensor_out->Resize(out_dim);
tensor_out->mutable_data<float>(platform::CPUPlace());
}
// merge output results:
// output_tensor = true_output_tensor + false_output_tensor
for (auto& output : Outputs("Outs")) {
Variable* var_parent = scope.FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_parent);
auto* tensor_parent = var_parent->GetMutable<LoDTensor>();
for (int i = 0; i < BRANCH_NUM; ++i) {
Variable* var_child = sub_scopes[i]->FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_child);
auto* tensor_child = &var_child->Get<LoDTensor>();
ScatterAssign<float>(dev_ctx, *tensor_child, index_tensors[i],
tensor_parent);
}
}
}
void CondOp::RunImpl(const Scope& scope, const platform::Place& place) const {
// get device context from pool
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& dev_ctx = *pool.Get(place);
PrepareDataForSubnet(scope, dev_ctx);
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
for (int i = 0; i < BRANCH_NUM; ++i) {
sub_net_op_[i]->Run(*sub_scopes[i], place);
}
MergeDataFromSubnet(scope, dev_ctx);
}
class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
public:
CondOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Cond", "The condition, which is a bool vector");
AddInput("Xs", "Inputs of Subnets").AsDuplicable();
AddOutput("Outs", "Outputs of Cond_Op after merge").AsDuplicable();
AddOutput("SubScopes", "sub scopes for true and false branches");
AddOutput("IndexTensors", "Index Tensors contains indices for true/false");
AddComment(R"DOC(
Sample Dependent Conditional Operator.
Given Cond[i] as a 1/0 vector to indicate true/false:
Out[i] = subnet_true[i], if Cond[i] == true
Out[i] = subnet_false[i], if Cond[i] == false
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(cond, paddle::operators::CondOp,
paddle::operators::CondOpProtoAndCheckerMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/net_op.h"
namespace paddle {
namespace operators {
/*
* @brief CondOp is a dynamic if-else Operator
*
* It has a input tensor named cond indicating which netop each instance will
* run.
*
* if cond == 1, it will run true_net, which is a NetOp.
*
* if cond == 0, it will run false_net, which is another NetOp.
*/
class CondOp : public framework::OperatorBase {
public:
CondOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {
sub_net_op_.resize(BRANCH_NUM);
}
CondOp(const CondOp& o)
: framework::OperatorBase(
static_cast<const framework::OperatorBase&>(o)) {
// TODO(yuyang18): Implement copy ctor well.
PADDLE_THROW("Not implemented");
}
framework::Scope& AddSubScope(const framework::Scope& scope) const;
std::vector<framework::Scope*>& GetSubScopes(
const framework::Scope& scope) const;
framework::LoDTensor& AddIndexTensor(const framework::Scope& scope) const;
std::vector<framework::LoDTensor>& GetIndexTensors(
const framework::Scope& scope) const;
void PrepareDataForSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const;
void MergeDataFromSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const;
/*
* Set True Block
*/
void set_truenet(std::unique_ptr<OperatorBase>&& net) {
sub_net_op_[TRUE_BRANCH] = std::move(net);
}
/*
* Set False Block
*/
void set_falsenet(std::unique_ptr<OperatorBase>&& net) {
sub_net_op_[FALSE_BRANCH] = std::move(net);
}
private:
void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override;
private:
const int TRUE_BRANCH = 0;
const int FALSE_BRANCH = 1;
const int BRANCH_NUM = 2;
// sub_net_op_[0]: subnet_t
// sub_net_op_[1]: subnet_f
std::vector<std::unique_ptr<framework::OperatorBase>> sub_net_op_;
};
} // namespace operators
} // namespace paddle
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include <stdio.h> #include <stdio.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/host_vector.h> #include <thrust/host_vector.h>
#include <vector>
#include "paddle/fluid/operators/ctc_align_op.h" #include "paddle/fluid/operators/ctc_align_op.h"
namespace paddle { namespace paddle {
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string.h> #include <string.h>
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
...@@ -106,18 +107,18 @@ information. However, the output only shares the LoD information with input $X$. ...@@ -106,18 +107,18 @@ information. However, the output only shares the LoD information with input $X$.
protected: protected:
std::string comment_; std::string comment_;
void Replace(std::string& src, std::string from, std::string to) { void Replace(std::string* src, std::string from, std::string to) {
std::size_t len_from = std::strlen(from.c_str()); std::size_t len_from = std::strlen(from.c_str());
std::size_t len_to = std::strlen(to.c_str()); std::size_t len_to = std::strlen(to.c_str());
for (std::size_t pos = src.find(from); pos != std::string::npos; for (std::size_t pos = src->find(from); pos != std::string::npos;
pos = src.find(from, pos + len_to)) { pos = src->find(from, pos + len_to)) {
src.replace(pos, len_from, to); src->replace(pos, len_from, to);
} }
} }
void SetComment(std::string name, std::string equation) { void SetComment(std::string name, std::string equation) {
Replace(comment_, "{name}", name); Replace(&comment_, "{name}", name);
Replace(comment_, "{equation}", equation); Replace(&comment_, "{equation}", equation);
} }
}; };
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/gru_op.h" #include "paddle/fluid/operators/gru_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,15 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/gru_compute.h" #include "paddle/fluid/operators/math/gru_compute.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence2batch.h" #include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h" #include "paddle/fluid/operators/im2sequence_op.h"
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <vector>
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/label_smooth_op.h" #include "paddle/fluid/operators/label_smooth_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -100,7 +100,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -100,7 +100,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto x_row_max = EigenMatrix<T>::From(emission_row_max); auto x_row_max = EigenMatrix<T>::From(emission_row_max);
x_row_max.device(place) = x_row_max.device(place) =
x.maximum(Eigen::DSizes<int, 1>(1)) x.maximum(Eigen::DSizes<int, 1>(1))
.reshape(Eigen::DSizes<int, 2>(int(batch_size), 1)); .reshape(Eigen::DSizes<int, 2>(static_cast<int>(batch_size), 1));
auto x_exps = EigenMatrix<T>::From(*emission_exps); auto x_exps = EigenMatrix<T>::From(*emission_exps);
x_exps.device(place) = x_exps.device(place) =
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/logical_op.h" #include "paddle/fluid/operators/logical_op.h"
#include <string>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
namespace paddle { namespace paddle {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lrn_op.h" #include "paddle/fluid/operators/lrn_op.h"
#include <string>
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#endif #endif
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lstm_op.h" #include "paddle/fluid/operators/lstm_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h"
......
...@@ -18,6 +18,7 @@ https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.c ...@@ -18,6 +18,7 @@ https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.c
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cross_entropy_op.h" #include "paddle/fluid/operators/cross_entropy_op.h"
#include "paddle/fluid/operators/lstm_unit_op.h"
#include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/hostdevice.h" #include "paddle/fluid/platform/hostdevice.h"
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lstmp_op.h" #include "paddle/fluid/operators/lstmp_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h"
......
...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/matmul_op.h" #include "paddle/fluid/operators/matmul_op.h"
#include <algorithm>
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <algorithm>
#include <functional>
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/matmul.h" #include "paddle/fluid/operators/math/matmul.h"
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
* limitations under the License. */ * limitations under the License. */
#include "paddle/fluid/operators/maxout_op.h" #include "paddle/fluid/operators/maxout_op.h"
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/minus_op.h" #include "paddle/fluid/operators/minus_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/momentum_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/mul_op.h" #include "paddle/fluid/operators/mul_op.h"
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/net_op.h"
#include <set>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
const char NetOp::kAll[] = "all";
void NetOp::CompleteAddOp(bool calc) {
add_op_done_ = true;
if (!calc) return;
std::set<std::string> input_set;
std::set<std::string> output_set;
for (auto& op : ops_) {
for (auto& ipt : op->Inputs()) {
for (auto& var_name : ipt.second) {
// If input variable has been in output set, then it will be
// added into intermediate_outputs_. Otherwise, it will be
// added into input set.
if (Contains(output_set, var_name)) {
intermediate_outputs_.insert(var_name);
} else {
input_set.insert(var_name);
}
}
}
for (auto& opt : op->Outputs()) {
for (auto& var_name : opt.second) {
output_set.insert(var_name);
}
}
}
auto& inputs = inputs_[kAll];
inputs.reserve(input_set.size());
std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs));
auto& outputs = outputs_[kAll];
outputs.reserve(output_set.size());
std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs));
}
std::string NetOp::DebugStringEx(const framework::Scope* scope) const {
std::ostringstream os;
os << OperatorBase::DebugStringEx(scope) << std::endl;
for (auto& op : ops_) {
std::istringstream is(op->DebugStringEx(scope));
for (std::string line; std::getline(is, line);) {
os << " " << line << std::endl;
}
}
return os.str();
}
bool NetOp::IsNetOp() const { return true; }
std::vector<std::string> NetOp::OutputVars(bool has_intermediate) const {
std::vector<std::string> all;
for (auto& pair : this->outputs_) {
for (auto& var_name : pair.second) {
all.push_back(var_name);
}
}
if (has_intermediate) {
return all;
}
std::vector<std::string> ret_val;
for (auto& each : all) {
if (!Contains(intermediate_outputs_, each)) {
ret_val.push_back(each);
}
}
return ret_val;
}
NetOp::NetOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
std::unique_ptr<framework::OperatorBase> NetOp::Clone() const {
PADDLE_ENFORCE(
add_op_done_,
"Must clone a sealed NetOp, invoke Net::CompleteAddOp before clone");
return std::unique_ptr<OperatorBase>(new NetOp(*this));
}
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <set>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
/**
* @brief Network is also a type of Operator
*
* It will manage the operators it has.
*
* Network is the container and controller of a set of operators.
* A network object knows all Operators belonging to this network. Variables,
* which are inputs and outputs of these operators, are created and managed by a
* hierarchy of Scope objects.
*
* This is the base class of network, all the networks should implement the APIs
* it defines.
*/
class NetOp : public framework::OperatorBase {
public:
static const char kAll[];
NetOp()
: framework::OperatorBase("plain_net", framework::VariableNameMap{},
framework::VariableNameMap{},
framework::AttributeMap{}) {}
NetOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs);
NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) {
this->ops_.reserve(o.ops_.size());
std::transform(
o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_),
[](const std::unique_ptr<framework::OperatorBase>& op) {
return std::unique_ptr<framework::OperatorBase>(op->Clone());
});
this->CompleteAddOp();
}
bool SupportGPU() const override {
for (auto& op : ops_) {
if (!op->SupportGPU()) {
return false;
}
}
return true;
}
void AppendOp(const framework::OperatorBase& op) { AppendOp(op.Clone()); }
/**
* @brief Add an operator by ptr
*/
void AppendOp(std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot AppendOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
ops_.push_back(std::move(op));
}
void InsertOp(size_t pos, std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, std::move(op));
}
void InsertOp(size_t pos, const framework::OperatorBase& op) {
InsertOp(pos, op.Clone());
}
void CompleteAddOp(bool calculate = true);
std::string DebugStringEx(
const framework::Scope* scope = nullptr) const override;
bool IsNetOp() const override;
std::vector<std::string> OutputVars(bool has_intermediate) const override;
std::unique_ptr<framework::OperatorBase> Clone() const override;
std::vector<std::unique_ptr<framework::OperatorBase>> ops_;
private:
/**
* @brief Run the network.
*
* Run all the operators with the `scope`, if no scope is provided, default
* scope will be used instead. If no OpContext is provicded, default context
* will be used.
*/
void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override {
for (auto& op : ops_) {
op->Run(scope, place);
}
}
bool add_op_done_{false};
std::set<std::string> intermediate_outputs_;
template <typename T, typename KeyType>
static bool Contains(T container, KeyType key) {
return container.find(key) != container.end();
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/net_op.h"
#include <gtest/gtest.h>
namespace paddle {
namespace operators {
using Scope = framework::Scope;
using DeviceContext = platform::DeviceContext;
static int run_cnt = 0;
class TestOp : public framework::OperatorBase {
public:
using framework::OperatorBase::OperatorBase;
DEFINE_OP_CLONE_METHOD(TestOp);
private:
void RunImpl(const Scope& scope,
const platform::Place& place) const override {
++run_cnt;
}
};
template <typename T>
void AssertSameVectorWithoutOrder(const std::vector<T>& expected,
const std::vector<T>& actual) {
ASSERT_EQ(expected.size(), actual.size());
std::unordered_set<T> expected_set;
for (auto& tmp : expected) {
expected_set.insert(tmp);
}
for (auto& act : actual) {
ASSERT_NE(expected_set.end(), expected_set.find(act));
}
}
TEST(OpKernel, all) {
auto net = std::make_shared<NetOp>();
ASSERT_NE(net, nullptr);
net->AppendOp(std::unique_ptr<TestOp>(
new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, framework::AttributeMap{})));
net->AppendOp(std::unique_ptr<TestOp>(
new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}},
{{"Out", {"z"}}}, framework::AttributeMap{})));
net->CompleteAddOp();
AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"},
net->Inputs(NetOp::kAll));
AssertSameVectorWithoutOrder({"y", "z"}, net->Outputs(NetOp::kAll));
auto final_outs = net->OutputVars(false);
ASSERT_EQ(final_outs.size(), 1UL);
ASSERT_EQ(final_outs[0], "z");
}
TEST(NetOp, insert_op) {
NetOp net;
auto op1 = std::unique_ptr<framework::NOP>(
new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, framework::AttributeMap{}));
net.AppendOp(*op1);
net.InsertOp(0, *op1);
ASSERT_EQ(2UL, net.ops_.size());
net.InsertOp(2, std::move(op1));
ASSERT_EQ(3UL, net.ops_.size());
}
TEST(NetOp, Clone) {
NetOp net;
net.AppendOp(std::unique_ptr<framework::NOP>(new framework::NOP{
"empty", framework::VariableNameMap{}, framework::VariableNameMap{},
framework::AttributeMap{}}));
net.AppendOp(std::unique_ptr<framework::NOP>(new framework::NOP{
"empty2", framework::VariableNameMap{}, framework::VariableNameMap{},
framework::AttributeMap{}}));
net.CompleteAddOp(true);
auto new_net_op = net.Clone();
ASSERT_NE(new_net_op, nullptr);
ASSERT_TRUE(new_net_op->IsNetOp());
auto* new_net = static_cast<NetOp*>(new_net_op.get());
ASSERT_EQ(2UL, new_net->ops_.size());
ASSERT_EQ(new_net->ops_[0]->Type(), "empty");
ASSERT_EQ(new_net->ops_[1]->Type(), "empty2");
}
} // namespace operators
} // namespace paddle
...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/prelu_op.h" #include "paddle/fluid/operators/prelu_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/scale_op.h" #include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/split_op.h" #include "paddle/fluid/operators/split_op.h"
#include "paddle/fluid/operators/net_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -2,13 +2,13 @@ if(WITH_PYTHON) ...@@ -2,13 +2,13 @@ if(WITH_PYTHON)
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
hip_library(paddle_pybind SHARED hip_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor parallel_executor
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
else() else()
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor parallel_executor
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID) if(NOT APPLE AND NOT ANDROID)
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <string> #include <string>
#include <tuple> #include <tuple>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
...@@ -125,23 +124,6 @@ void BindProgramDesc(pybind11::module *m) { ...@@ -125,23 +124,6 @@ void BindProgramDesc(pybind11::module *m) {
}) })
.def("append_block", &pd::ProgramDesc::AppendBlock, .def("append_block", &pd::ProgramDesc::AppendBlock,
pybind11::return_value_policy::reference) pybind11::return_value_policy::reference)
.def("append_backward",
[](pd::ProgramDesc &program_desc, const pd::VarDesc &target,
const std::unordered_set<std::string> &no_grad_vars) {
pd::ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars);
std::unordered_map<
std::string, std::tuple<std::string /* grad_var_name */,
int /* block_idx */, int /* op_idx */>>
retv;
for (auto it = param_grad_map.begin(); it != param_grad_map.end();
++it) {
const auto &grad_info = it->second;
retv[it->first] = std::make_tuple(
grad_info.name_, grad_info.block_idx_, grad_info.op_idx_);
}
return retv;
})
.def("block", &pd::ProgramDesc::MutableBlock, .def("block", &pd::ProgramDesc::MutableBlock,
pybind11::return_value_policy::reference) pybind11::return_value_policy::reference)
.def("num_blocks", &pd::ProgramDesc::Size) .def("num_blocks", &pd::ProgramDesc::Size)
......
...@@ -20,9 +20,6 @@ limitations under the License. */ ...@@ -20,9 +20,6 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/channel.h" #include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
...@@ -31,18 +28,18 @@ limitations under the License. */ ...@@ -31,18 +28,18 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/const_value.h" #include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/pybind.h" #include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/pybind/pybind.h" // NOLINT
#include "paddle/fluid/pybind/recordio.h" #include "paddle/fluid/pybind/recordio.h"
#include "paddle/fluid/pybind/tensor_py.h" #include "paddle/fluid/pybind/tensor_py.h"
...@@ -239,11 +236,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -239,11 +236,6 @@ All parameter, weight, gradient are variables in Paddle.
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
#endif #endif
.def("get_net",
[](Variable &self) -> operators::NetOp * {
return self.GetMutable<operators::NetOp>();
},
py::return_value_policy::reference)
.def("get_reader", .def("get_reader",
[](Variable &self) -> framework::ReaderHolder * { [](Variable &self) -> framework::ReaderHolder * {
PADDLE_ENFORCE(self.IsType<framework::ReaderHolder>()); PADDLE_ENFORCE(self.IsType<framework::ReaderHolder>());
...@@ -388,11 +380,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -388,11 +380,6 @@ All parameter, weight, gradient are variables in Paddle.
desc.InitializationErrorString()); desc.InitializationErrorString());
return OpRegistry::CreateOp(desc); return OpRegistry::CreateOp(desc);
}) })
.def("backward",
[](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) {
return Backward(forwardOp, no_grad_vars).release();
})
.def("run", .def("run",
[](OperatorBase &self, const Scope &scope, [](OperatorBase &self, const Scope &scope,
const platform::CPUPlace &place) { self.Run(scope, place); }) const platform::CPUPlace &place) { self.Run(scope, place); })
...@@ -420,42 +407,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -420,42 +407,6 @@ All parameter, weight, gradient are variables in Paddle.
[](const OperatorBase &op) { return op.OutputVars(false); }) [](const OperatorBase &op) { return op.OutputVars(false); })
.def("support_gpu", &OperatorBase::SupportGPU); .def("support_gpu", &OperatorBase::SupportGPU);
py::class_<operators::NetOp, OperatorBase>(m, "Net")
.def_static("create",
[]() -> operators::NetOp * {
auto *retv = new operators::NetOp;
retv->SetType("plain_net");
return retv;
})
.def("append_op", [](operators::NetOp &self,
const OperatorBase &op) { self.AppendOp(op); })
.def("complete_add_op", &operators::NetOp::CompleteAddOp)
.def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
self->CompleteAddOp();
});
// cond_op
py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
.def_static("create",
[](py::bytes protobin) -> operators::CondOp * {
proto::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
auto cond_op = OpRegistry::CreateOp(desc);
return static_cast<operators::CondOp *>(cond_op.release());
})
.def("set_truenet",
[](operators::CondOp &self, const operators::NetOp &net) -> void {
self.set_truenet(net.Clone());
})
.def("set_falsenet",
[](operators::CondOp &self, const operators::NetOp &net) -> void {
self.set_falsenet(net.Clone());
});
py::class_<framework::Executor>(m, "Executor") py::class_<framework::Executor>(m, "Executor")
.def(py::init<const platform::Place &>()) .def(py::init<const platform::Place &>())
.def("run", .def("run",
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
#include "paddle/fluid/recordio/chunk.h" #include "paddle/fluid/recordio/chunk.h"
#include <zlib.h>
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <sstream> #include <sstream>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "snappy_stream/include/snappystream.hpp" #include "snappystream.hpp"
#include "zlib/include/zlib.h"
namespace paddle { namespace paddle {
namespace recordio { namespace recordio {
......
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/recordio/header.h" #include "paddle/fluid/recordio/header.h"
#include <string>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
......
...@@ -231,7 +231,7 @@ function gen_fluid_inference_lib() { ...@@ -231,7 +231,7 @@ function gen_fluid_inference_lib() {
Deploying fluid inference library ... Deploying fluid inference library ...
======================================== ========================================
EOF EOF
make inference_lib_dist make -j `nproc` inference_lib_dist
fi fi
} }
......
...@@ -1119,24 +1119,6 @@ class Program(object): ...@@ -1119,24 +1119,6 @@ class Program(object):
def current_block(self): def current_block(self):
return self.blocks[self.current_block_idx] return self.blocks[self.current_block_idx]
def append_backward(self, target, no_grad_set=None):
"""
return map(param_name -> (grad_name, block_index, op_index))
"""
assert isinstance(target, Variable)
if no_grad_set is None:
no_grad_set = set()
try:
param_to_grad_info = self.desc.append_backward(target.desc,
no_grad_set)
except Exception as e:
raise core.EnforceNotMet(
str(e) + "\nCurrent protobuf is\n{0}".format(
self.to_string(False)))
self.sync_with_cpp()
return param_to_grad_info
def create_block(self, parent_idx=None): def create_block(self, parent_idx=None):
new_block_idx = len(self.blocks) new_block_idx = len(self.blocks)
parent = self.current_block() if parent_idx is None else self.block( parent = self.current_block() if parent_idx is None else self.block(
...@@ -1201,6 +1183,8 @@ class Parameter(Variable): ...@@ -1201,6 +1183,8 @@ class Parameter(Variable):
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
self.do_model_average = kwargs.get('do_model_average', None)
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
...@@ -1221,7 +1205,7 @@ class Parameter(Variable): ...@@ -1221,7 +1205,7 @@ class Parameter(Variable):
if with_details: if with_details:
res_str = Variable.to_string(self, throw_on_error, True) res_str = Variable.to_string(self, throw_on_error, True)
additional_attr = ("trainable", "optimize_attr", "regularizer", additional_attr = ("trainable", "optimize_attr", "regularizer",
"gradient_clip_attr") "gradient_clip_attr", "do_model_average")
for attr_name in additional_attr: for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name, res_str += "%s: %s\n" % (attr_name,
str(getattr(self, attr_name))) str(getattr(self, attr_name)))
......
...@@ -1516,7 +1516,8 @@ def batch_norm(input, ...@@ -1516,7 +1516,8 @@ def batch_norm(input,
in_place=False, in_place=False,
name=None, name=None,
moving_mean_name=None, moving_mean_name=None,
moving_variance_name=None): moving_variance_name=None,
do_model_average_for_mean_and_var=False):
""" """
This function helps create an operator to implement This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters. the BatchNorm layer using the configurations from the input parameters.
...@@ -1547,7 +1548,10 @@ def batch_norm(input, ...@@ -1547,7 +1548,10 @@ def batch_norm(input,
mean = helper.create_parameter( mean = helper.create_parameter(
attr=ParamAttr( attr=ParamAttr(
name=moving_mean_name, initializer=Constant(0.0), trainable=False), name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=input.dtype) dtype=input.dtype)
mean.stop_gradient = True mean.stop_gradient = True
...@@ -1556,7 +1560,8 @@ def batch_norm(input, ...@@ -1556,7 +1560,8 @@ def batch_norm(input,
attr=ParamAttr( attr=ParamAttr(
name=moving_variance_name, name=moving_variance_name,
initializer=Constant(1.0), initializer=Constant(1.0),
trainable=False), trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=input.dtype) dtype=input.dtype)
variance.stop_gradient = True variance.stop_gradient = True
...@@ -3374,14 +3379,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -3374,14 +3379,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
Here are some examples to explain it. Here are some examples to explain it.
1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
is [6, 8], the reshape operator will transform x into a 2-D tensor with is [6, 8], the reshape operator will transform x into a 2-D tensor with
shape [6, 8] and leaving x's data unchanged. shape [6, 8] and leaving x's data unchanged.
2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
specified is [2, 3, -1, 2], the reshape operator will transform x into a specified is [2, 3, -1, 2], the reshape operator will transform x into a
4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this 4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this
case, one dimension of the target shape is set to -1, the value of this case, one dimension of the target shape is set to -1, the value of this
dimension is inferred from the total element number of x and remaining dimension is inferred from the total element number of x and remaining
dimensions. dimensions.
3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
...@@ -3615,7 +3620,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None): ...@@ -3615,7 +3620,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
def pad(x, paddings, pad_value=0., name=None): def pad(x, paddings, pad_value=0., name=None):
""" """
Pads a tensor with a constant value given by :attr:`pad_value`, and the Pads a tensor with a constant value given by :attr:`pad_value`, and the
padded width is specified by :attr:`paddings`. padded width is specified by :attr:`paddings`.
Specifically, the number of values padded before the contents of :attr:`x` Specifically, the number of values padded before the contents of :attr:`x`
in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number
...@@ -3643,7 +3648,7 @@ def pad(x, paddings, pad_value=0., name=None): ...@@ -3643,7 +3648,7 @@ def pad(x, paddings, pad_value=0., name=None):
x (Variable): The input tensor variable. x (Variable): The input tensor variable.
paddings (list): A list of integers. Its elements specify the padded paddings (list): A list of integers. Its elements specify the padded
width before and after for each dimension in turn. width before and after for each dimension in turn.
The length of :attr:paddings must be The length of :attr:paddings must be
:math:`rank(x) \\times 2`. :math:`rank(x) \\times 2`.
pad_value (float): The constant value used to pad. pad_value (float): The constant value used to pad.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
from collections import defaultdict from collections import defaultdict
from paddle.fluid.framework import Program from paddle.fluid.framework import Program
import framework import framework
...@@ -818,8 +818,8 @@ class ModelAverage(Optimizer): ...@@ -818,8 +818,8 @@ class ModelAverage(Optimizer):
min_average_window, max_average_window and current update times. min_average_window, max_average_window and current update times.
Args: Args:
params_grads: A list of parameter-grad variable pairs.
average_window_rate: The rate of average window. average_window_rate: The rate of average window.
params_grads: A list of parameter-grad variable pairs.
min_average_window: The minimum size of average window. min_average_window: The minimum size of average window.
max_average_window: The maximum size of average window. max_average_window: The maximum size of average window.
...@@ -840,8 +840,8 @@ class ModelAverage(Optimizer): ...@@ -840,8 +840,8 @@ class ModelAverage(Optimizer):
""" """
def __init__(self, def __init__(self,
params_grads,
average_window_rate, average_window_rate,
params_grads=None,
min_average_window=10000, min_average_window=10000,
max_average_window=10000, max_average_window=10000,
**kwargs): **kwargs):
...@@ -849,24 +849,37 @@ class ModelAverage(Optimizer): ...@@ -849,24 +849,37 @@ class ModelAverage(Optimizer):
self.average_window = average_window_rate self.average_window = average_window_rate
self.min_average_window = min_average_window self.min_average_window = min_average_window
self.max_average_window = max_average_window self.max_average_window = max_average_window
self.params_grads = params_grads
self.params_grads = [] if params_grads is None else params_grads
params = {}
for param, grad in self.params_grads:
if param.do_model_average != False:
params[param.name] = (param, grad)
for param in framework.default_main_program().global_block(
).all_parameters():
if param.name not in params and param.do_model_average != False:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
dtype=param.dtype,
persistable=False,
stop_gradient=True)
params[param.name] = (param, grad)
self.params_grads = params.values()
for param, grad in self.params_grads: for param, grad in self.params_grads:
if grad is not None: self._append_average_accumulate_op(param)
self._append_average_accumulate_op(param)
self.apply_program = Program() self.apply_program = Program()
block = self.apply_program.global_block() block = self.apply_program.global_block()
with program_guard(main_program=self.apply_program): with program_guard(main_program=self.apply_program):
for param_grad in self.params_grads: for param_grad in self.params_grads:
if param_grad[1] is not None: self._add_average_apply_op(block, param_grad)
self._add_average_apply_op(block, param_grad)
self.restore_program = Program() self.restore_program = Program()
block = self.restore_program.global_block() block = self.restore_program.global_block()
with program_guard(main_program=self.restore_program): with program_guard(main_program=self.restore_program):
for param_grad in self.params_grads: for param_grad in self.params_grads:
if param_grad[1] is not None: self._add_average_restore_op(block, param_grad)
self._add_average_restore_op(block, param_grad)
def _add_average_apply_op(self, block, param_grad): def _add_average_apply_op(self, block, param_grad):
param = block.clone_variable(param_grad[0]) param = block.clone_variable(param_grad[0])
......
...@@ -28,13 +28,15 @@ class ParamAttr(object): ...@@ -28,13 +28,15 @@ class ParamAttr(object):
learning_rate=1.0, learning_rate=1.0,
regularizer=None, regularizer=None,
trainable=True, trainable=True,
gradient_clip=None): gradient_clip=None,
do_model_average=None):
self.name = name self.name = name
self.initializer = initializer self.initializer = initializer
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.regularizer = regularizer self.regularizer = regularizer
self.trainable = trainable self.trainable = trainable
self.gradient_clip = gradient_clip self.gradient_clip = gradient_clip
self.model_average = do_model_average
def set_default_initializer(self, initializer): def set_default_initializer(self, initializer):
if initializer is None: if initializer is None:
...@@ -80,7 +82,8 @@ class ParamAttr(object): ...@@ -80,7 +82,8 @@ class ParamAttr(object):
}, },
'regularizer': self.regularizer, 'regularizer': self.regularizer,
'trainable': self.trainable, 'trainable': self.trainable,
'gradient_clip_attr': self.gradient_clip 'gradient_clip_attr': self.gradient_clip,
'model_average': self.model_average
} }
if with_initializer: if with_initializer:
kwargs['initializer'] = self.initializer kwargs['initializer'] = self.initializer
...@@ -90,7 +93,7 @@ class ParamAttr(object): ...@@ -90,7 +93,7 @@ class ParamAttr(object):
class WeightNormParamAttr(ParamAttr): class WeightNormParamAttr(ParamAttr):
""" """
Used for weight normalization. Any field in ParamAttr can also be set here. Used for weight normalization. Any field in ParamAttr can also be set here.
Besides, an extra field dim can be set to indicate the dimension except Besides, an extra field dim can be set to indicate the dimension except
which to normalize. which to normalize.
""" """
# List to record the parameters reparameterized by weight normalization. # List to record the parameters reparameterized by weight normalization.
......
...@@ -14,23 +14,13 @@ ...@@ -14,23 +14,13 @@
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
import paddle.fluid as fluid
from op_test import OpTest
from paddle.fluid.framework import grad_var_name from paddle.fluid.framework import grad_var_name
def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars():
var = scope.var(input)
var.get_tensor()
for output in backward_op.output_vars():
var = scope.var(output)
var.get_tensor()
return backward_op
def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): def _reference_testing(x, scale, offset, mean, var, epsilon, data_format):
x_shape = x.shape x_shape = x.shape
if len(x_shape) == 2: if len(x_shape) == 2:
...@@ -64,11 +54,6 @@ def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): ...@@ -64,11 +54,6 @@ def _reference_testing(x, scale, offset, mean, var, epsilon, data_format):
def _reference_training(x, scale, offset, epsilon, data_format): def _reference_training(x, scale, offset, epsilon, data_format):
x_shape = x.shape x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
if data_format == "NCHW": if data_format == "NCHW":
n, c, h, w = x.shape n, c, h, w = x.shape
...@@ -88,8 +73,6 @@ def _reference_training(x, scale, offset, epsilon, data_format): ...@@ -88,8 +73,6 @@ def _reference_training(x, scale, offset, epsilon, data_format):
offset_tile = np.reshape(offset, (1, c, 1, 1)) offset_tile = np.reshape(offset, (1, c, 1, 1))
offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) offset_tile = np.reshape(offset_tile, (1, c, 1, 1))
y = normalized * scale_tile + offset_tile y = normalized * scale_tile + offset_tile
if len(x_shape) == 2:
y = np.reshape(y, (y.shape[0], y.shape[1]))
return y, mean, var return y, mean, var
elif data_format == "NHWC": elif data_format == "NHWC":
x_square = x * x x_square = x * x
...@@ -100,59 +83,42 @@ def _reference_training(x, scale, offset, epsilon, data_format): ...@@ -100,59 +83,42 @@ def _reference_training(x, scale, offset, epsilon, data_format):
var = x_square_sum / element_count - mean * mean var = x_square_sum / element_count - mean * mean
normalized = (x - mean) / np.sqrt(var + epsilon) normalized = (x - mean) / np.sqrt(var + epsilon)
y = normalized * scale + offset y = normalized * scale + offset
if len(x_shape) == 2:
y = np.reshape(y, x_shape)
return y, mean, var return y, mean, var
else: else:
raise ValueError("Unknown data order.") raise ValueError("Unknown data order.")
def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format):
# Use the following formulas to calculate gradients: # Use the following formulas to calculate gradients:
# grad_scale = # grad_scale =
# sum(grad_y * (x - mean)) * rsqrt(var + epsilon) # sum(grad_y * (x - mean)) * rsqrt(var + epsilon)
# #
# grad_offset = sum(output_y) # grad_offset = sum(output_y)
# #
# grad_x = # x_grad =
# 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) - # 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], grad_y.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], 1, 1, grad_y.shape[1]))
if data_format == "NCHW": if data_format == "NCHW":
x = np.transpose(x, (0, 2, 3, 1)) x = np.transpose(x, (0, 2, 3, 1))
grad_y = np.transpose(grad_y, (0, 2, 3, 1)) y_grad = np.transpose(y_grad, (0, 2, 3, 1))
# raise ValueError("data_format must be NHWC, got %s." % data_format) x_grad = scale * (y_grad - np.mean(
grad_x = scale * (grad_y - np.mean( y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean(
grad_y, axis=(0, 1, 2)) - (x - mean) * np.mean( y_grad * (x - mean), axis=(0, 1, 2)) /
grad_y * (x - mean), axis=(0, 1, 2)) /
(var + epsilon)) / np.sqrt(var + epsilon) (var + epsilon)) / np.sqrt(var + epsilon)
grad_scale = np.sum(grad_y * (x - mean) / np.sqrt(var + epsilon), grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon),
axis=(0, 1, 2)) axis=(0, 1, 2))
grad_offset = np.sum(grad_y, axis=(0, 1, 2)) grad_offset = np.sum(y_grad, axis=(0, 1, 2))
# transfer back to N, C, H, W # transfer back to N, C, H, W
if data_format == "NCHW": if data_format == "NCHW":
grad_x = np.transpose(grad_x, (0, 3, 1, 2)) x_grad = np.transpose(x_grad, (0, 3, 1, 2))
x = np.transpose(x, (0, 3, 1, 2)) x = np.transpose(x, (0, 3, 1, 2))
grad_y = np.transpose(grad_y, (0, 3, 1, 2)) y_grad = np.transpose(y_grad, (0, 3, 1, 2))
if len(x_shape) == 2: return x_grad, grad_scale, grad_offset
grad_x = np.reshape(grad_x, x_shape)
return grad_x, grad_scale, grad_offset
def create_or_get_tensor(scope, var_name, var, place): def create_or_get_tensor(scope, var_name, var, place):
...@@ -186,7 +152,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None): ...@@ -186,7 +152,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None):
__set_tensor__(output, data) __set_tensor__(output, data)
class TestBatchNormOpInference(OpTest): class TestBatchNormOpInference(unittest.TestCase):
def setUp(self): def setUp(self):
self.dtype = np.float32 self.dtype = np.float32
...@@ -304,231 +270,121 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): ...@@ -304,231 +270,121 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference):
self.check_with_place(place, data_format, self.dtype, [2, 3]) self.check_with_place(place, data_format, self.dtype, [2, 3])
class TestBatchNormOpTraining(OpTest): class TestBatchNormOpTraining(unittest.TestCase):
def __assert_close(self, tensor, np_array, msg, atol=1e-4): def __assert_close(self, tensor, np_array, msg, atol=1e-4):
if not np.allclose(np.array(tensor), np_array, atol=atol):
import pdb
pdb.set_trace()
self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg)
def test_python_testing(self):
data_format = "NHWC"
epsilon = 0.00001
n, h, w, c = 2, 3, 4, 5
x_shape = [n, h, w, c]
scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance,
epsilon, "NHWC")
# running N, C, H, W case
# should produce the same results
x_shape2 = [n, c, h, w]
x_val2 = np.transpose(x_val, (0, 3, 1, 2))
y_out2 = _reference_testing(x_val2, scale_val, bias_val, mean, variance,
epsilon, "NCHW")
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1))
self.__assert_close(y_out, y_out2_trans, "inference output")
print 'python: NHWC, NCHW, inference checking passed'
def test_python_training(self):
data_format = "NHWC"
epsilon = 0.00001
momentum = 0.9
# N, H, W, C: 2, 3, 4, 2
n, h, w, c = 2, 3, 4, 5
x_shape = [n, h, w, c]
scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
# run forward
y_out, saved_mean, var_ref = _reference_training(
x_val, scale_val, bias_val, epsilon, "NHWC")
#
mean_out = saved_mean * (1. - momentum) + momentum * mean
variance_out = var_ref * (1. - momentum) + momentum * variance
saved_variance = 1. / np.sqrt(var_ref + epsilon)
# running N, C, H, W case
# should produce the same results
x_shape2 = [n, c, h, w]
x_val2 = np.transpose(x_val, (0, 3, 1, 2))
y_out2, saved_mean2, var_ref2 = _reference_training(
x_val2, scale_val, bias_val, epsilon, "NCHW")
self.__assert_close(saved_mean, saved_mean2, "batch mean")
self.__assert_close(var_ref, var_ref2, "batch variance")
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1))
self.__assert_close(y_out, y_out2_trans, "batch output")
print 'python: NHWC, NCHW, forward checking passed'
# test backward now
# NHWC
self.y_grad = np.random.random_sample(x_shape).astype(np.float32)
y_grad = self.y_grad
# y_grad = np.ones(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, "NHWC")
# NCHW
y_grad2 = np.transpose(y_grad, (0, 3, 1, 2))
# y_grad2 = np.ones(x_shape2).astype(np.float32)
x_grad_ref2, scale_grad_ref2, bias_grad_ref2 = _reference_grad(
x_val2, y_grad2, scale_val, saved_mean2, var_ref2, epsilon, "NCHW")
self.__assert_close(scale_grad_ref, scale_grad_ref2, "scale gradient")
self.__assert_close(bias_grad_ref, bias_grad_ref2, "bias gradient")
x_grad_transpose = np.transpose(x_grad_ref2, (0, 2, 3, 1))
self.__assert_close(x_grad_ref, x_grad_transpose, "x gradient")
print 'python: NHWC, NCHW, backward checking passed'
def test_forward_backward(self): def test_forward_backward(self):
def test_with_place(place, data_layout, shape): def test_with_place(place, data_layout, shape):
# attr # attr
epsilon = 0.00001 epsilon = 0.00001
momentum = 0.9 momentum = 0.9
if data_layout == "NCHW":
if len(shape) == 2: n, c, h, w = shape[0], shape[1], shape[2], shape[3]
x_shape = shape
c = shape[1]
else: else:
# n, h, w, c = 2, 3, 4, 2
n, h, w, c = shape[0], shape[1], shape[2], shape[3] n, h, w, c = shape[0], shape[1], shape[2], shape[3]
if data_format == "NHWC":
x_shape = [n, h, w, c]
elif data_format == "NCHW":
x_shape = [n, c, h, w]
else:
raise ValueError("Unknown data type.")
scale_shape = [c] scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32) np.random.seed(123)
scale_val = np.random.random_sample(scale_shape).astype(np.float32) x = np.random.random_sample(shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32)
bias = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32)
# run forward # run forward
y_out, saved_mean, var_ref = _reference_training( y, saved_mean, var_ref = _reference_training(x, scale, bias,
x_val, scale_val, bias_val, epsilon, data_format) epsilon, data_layout)
# update moving mean and variance
mean_out = saved_mean * (1. - momentum) + momentum * mean mean_out = saved_mean * (1. - momentum) + momentum * mean
variance_out = var_ref * (1. - momentum) + momentum * variance variance_out = var_ref * (1. - momentum) + momentum * variance
saved_variance = 1. / np.sqrt(var_ref + epsilon) saved_variance = 1. / np.sqrt(var_ref + epsilon)
# for gradient test
# y_grad = np.ones(x_shape).astype(np.float32)
y_grad = np.zeros(x_shape).astype(np.float32)
if len(y_grad.shape) == 2:
y_grad[0, 0] = 1.
else:
y_grad[0, 0, 0, 0] = 1.
# y_grad = np.random.random_sample(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon,
data_format)
scope = core.Scope()
# create input
x_tensor = create_or_get_tensor(scope, "x_val", x_val, place)
scale_tensor = create_or_get_tensor(scope, "scale_val", scale_val,
place)
bias_tensor = create_or_get_tensor(scope, "bias_val", bias_val,
place)
mean_tensor = create_or_get_tensor(scope, "mean", mean, place)
variance_tensor = create_or_get_tensor(scope, "variance", variance,
place)
# create output
y_tensor = create_or_get_tensor(scope, "y_out", None, place)
saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None,
place)
saved_variance_tensor = create_or_get_tensor(
scope, "saved_variance", None, place)
mean_out_tensor = mean_tensor
variance_out_tensor = variance_tensor
batch_norm_op = Operator(
"batch_norm",
# inputs
X="x_val",
Scale="scale_val",
Bias="bias_val",
Mean="mean",
Variance="variance",
# outputs
Y="y_out",
MeanOut="mean",
VarianceOut="variance",
SavedMean="saved_mean",
SavedVariance="saved_variance",
# attrs
is_test=False,
data_layout=data_layout,
momentum=momentum,
epsilon=epsilon)
batch_norm_op.run(scope, place)
# check forward result
self.__assert_close(y_tensor, y_out, "y_out")
self.__assert_close(saved_mean_tensor, saved_mean, "saved_mean")
self.__assert_close(saved_variance_tensor, saved_variance,
"saved_variance")
self.__assert_close(mean_out_tensor, mean_out, "mean_out")
if isinstance(place, core.CUDAPlace):
atol = 5e-2
else:
atol = 1e-4
self.__assert_close(variance_out_tensor, variance_out,
"variance_out", atol)
print "op test forward passed: ", str(place), data_layout
# run backward # run backward
batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set()) y_grad = np.random.random_sample(shape).astype(np.float32)
set_output_grad( x_grad, scale_grad, bias_grad = _reference_grad(
scope, x, y_grad, scale, saved_mean, var_ref, epsilon, data_format)
["y_out", "mean", "variance", "saved_mean", "saved_variance"],
place, var_dict = locals()
feed_dict={"y_out": y_grad}) var_dict['y@GRAD'] = y_grad
batch_norm_op_grad.run(scope, place)
var_names = [
x_grad_tensor = create_or_get_tensor(scope, 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean',
grad_var_name("x_val"), None, 'saved_variance'
place) ]
scale_grad_tensor = create_or_get_tensor(scope, ground_truth = {name: var_dict[name] for name in var_names}
grad_var_name("scale_val"),
None, place) program = fluid.Program()
bias_grad_tensor = create_or_get_tensor(scope, with fluid.program_guard(program):
grad_var_name("bias_val"), block = program.global_block()
None, place) for name in ground_truth:
block.create_var(
name=name,
dtype='float32',
shape=ground_truth[name].shape)
bn_op = block.append_op(
type="batch_norm",
inputs={
"X": block.var('x'),
"Scale": block.var('scale'),
"Bias": block.var('bias'),
"Mean": block.var('mean'),
"Variance": block.var('variance')
},
outputs={
"Y": block.var('y'),
"MeanOut": block.var('mean'), # share the same memory
"VarianceOut":
block.var('variance'), # share the same memory
"SavedMean": block.var('saved_mean'),
"SavedVariance": block.var('saved_variance')
},
attrs={
"momentum": momentum,
"epsilon": epsilon,
"is_test": False,
"data_layout": data_layout
})
block.create_var(name='y@GRAD', dtype='float32', shape=y.shape)
# generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
bn_op.desc, set(), [])
grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(grad_op_desc)
for var_name in grad_op_desc.output_arg_names():
block.desc.var(var_name.encode("ascii"))
grad_op_desc.infer_var_type(block.desc)
grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32)
exe = fluid.Executor(place)
out = exe.run(
program,
feed={
name: var_dict[name]
for name in
['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD']
},
fetch_list=[
'y', 'mean', 'variance', 'saved_mean', 'saved_variance',
'x@GRAD', 'scale@GRAD', 'bias@GRAD'
])
self.__assert_close(y, out[0], "y")
self.__assert_close(mean_out, out[1], "mean")
self.__assert_close(variance_out, out[2], "variance", 1e-3)
self.__assert_close(saved_mean, out[3], "saved_mean")
self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3)
self.__assert_close(x_grad, out[5], "x_grad")
self.__assert_close(scale_grad, out[6], "scale_grad")
self.__assert_close(bias_grad, out[7], "bias_grad")
# check gradient output print "op test forward passed: ", str(place), data_layout
self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad")
self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad")
self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad")
print "op test backward passed: ", str(place), data_layout
places = [core.CPUPlace()] places = [core.CPUPlace()]
if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"):
...@@ -537,7 +393,6 @@ class TestBatchNormOpTraining(OpTest): ...@@ -537,7 +393,6 @@ class TestBatchNormOpTraining(OpTest):
for place in places: for place in places:
for data_format in ["NCHW", "NHWC"]: for data_format in ["NCHW", "NHWC"]:
test_with_place(place, data_format, [2, 3, 4, 5]) test_with_place(place, data_format, [2, 3, 4, 5])
test_with_place(place, data_format, [2, 3])
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import paddle.fluid.core as core
import unittest
import numpy as np
from paddle.fluid.op import Operator, CondOp
class PySimpleCond(object):
'''
A simple implementation of dynamic if-else based on numpy
'''
def __init__(self):
array = [1] * 10
for i in range(1, 10, 2):
array[i] = 0
self.cond = np.array(array)
self.x = np.ones(shape=(10, 1)).astype("float32")
def forward(self):
self.index_t = np.where(self.cond == 1)
self.index_f = np.where(self.cond == 0)
y_t = self.x[self.index_t]
y_f = self.x[self.index_f]
y_t = y_t * 2.
y_f = y_f * (-2.)
output = np.zeros(shape=(10, 1))
output[self.index_t] = y_t
output[self.index_f] = y_f
return output
class PySimpleCondTest(unittest.TestCase):
def setUp(self):
self.condnn = PySimpleCond()
def test_forward(self):
output = self.condnn.forward()
def create_tensor(scope, name, shape, np_data):
tensor = scope.var(name).get_tensor()
tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace())
return tensor
class TestCondOp(unittest.TestCase):
'''
Test CondOp
equation:
cond = [True, False, True, False, ...]
y[index_t] = x[index_t] * 2.
y[index_f] = x[index_f] * -2.
outputs:
y
'''
def setUp(self):
self.py_cond = PySimpleCond()
def forward(self):
self.scope = core.Scope()
self.create_global_variables()
self.create_cond_op()
self.create_sub_net()
self.condop.run(self.scope, core.CPUPlace())
return np.array(self.scope.find_var("Out").get_tensor())
def create_global_variables(self):
x_np_data = self.py_cond.x
create_tensor(self.scope, "X", [10, 1], x_np_data)
cond_np_data = self.py_cond.cond.astype("int32")
create_tensor(self.scope, "cond", [10, 1], cond_np_data)
self.scope.var("SubScopes")
self.scope.var("IndexTensors")
self.scope.var("Out")
def create_cond_op(self):
self.condop = CondOp(
Cond="cond",
Xs=["X"],
Outs=["Out"],
SubScopes="SubScopes",
IndexTensors="IndexTensors")
def create_sub_net(self):
truenet = core.Net.create()
scale_op_t = Operator("scale", X='X', Out='Out', scale=2.)
truenet.append_op(scale_op_t)
truenet.complete_add_op(True)
self.condop.set_truenet(truenet)
falsenet = core.Net.create()
scale_op_t = Operator("scale", X='X', Out='Out', scale=-2.)
falsenet.append_op(scale_op_t)
falsenet.complete_add_op(True)
self.condop.set_falsenet(falsenet)
def test_forward(self):
print 'test cond op forward'
pd_output = self.forward()
py_output = self.py_cond.forward()
print 'pd_output', pd_output
print
print 'py_output', py_output
self.assertEqual(pd_output.shape, py_output.shape)
print 'test passed'
return 0
if __name__ == "__main__":
unittest.main()
...@@ -15,10 +15,8 @@ import unittest ...@@ -15,10 +15,8 @@ import unittest
import numpy as np import numpy as np
from operator import mul from operator import mul
from op_test import OpTest
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.op import Operator import paddle.fluid as fluid
from paddle.fluid.framework import grad_var_name
np.random.random(123) np.random.random(123)
...@@ -70,161 +68,93 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1): ...@@ -70,161 +68,93 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1):
return grad_x, d_scale, d_bias return grad_x, d_scale, d_bias
def get_backward_op(scope, op, no_grad_set): class TestLayerNormdOp(unittest.TestCase):
backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars():
var = scope.var(input)
var.get_tensor()
for output in backward_op.output_vars():
var = scope.var(output)
var.get_tensor()
return backward_op
def create_or_get_tensor(scope, var_name, var, place):
tensor = scope.var(var_name).get_tensor()
if var is not None:
assert isinstance(var, np.ndarray)
tensor.set_lod([[]])
tensor.set_dims(var.shape)
tensor.set(var, place)
return tensor
def set_output_grad(scope, outputs, place, feed_dict=None):
def __set_tensor__(name, data=None):
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.var(grad_var_name(name)).get_tensor()
out_dtype = out_tensor.dtype()
if data is None:
if out_dtype == core.VarDesc.VarType.FP64:
data = np.ones(out_tensor.shape(), dtype=np.float64)
elif out_dtype == core.VarDesc.VarType.FP32:
data = np.ones(out_tensor.shape(), dtype=np.float32)
else:
raise ValueError("Not supported data type " + str(out_dtype))
grad_tensor.set(data, place)
for output in outputs:
data = None
if output in feed_dict:
data = feed_dict[output]
__set_tensor__(output, data)
class TestLayerNormdOp(OpTest):
def __assert_close(self, tensor, np_array, msg, atol=1e-4): def __assert_close(self, tensor, np_array, msg, atol=1e-4):
self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg)
def __assert_grad_close(self,
tensor,
np_array,
name,
place,
max_relative_error=0.02):
a = np.array(tensor)
b = np_array
abs_a = np.abs(a)
abs_a[abs_a < 1e-5] = 1
diff_mat = np.abs(a - b) / abs_a
max_diff = np.max(diff_mat)
def err_msg():
offset = np.argmax(diff_mat > max_relative_error)
return ("%s Variable %s max gradient diff %f over limit %f, "
"the first error element is %d, %f, %f") % (
"Gradient Check On %s" % str(place), name, max_diff,
max_relative_error, offset, a.flatten()[offset],
b.flatten()[offset])
self.assertLessEqual(max_diff, max_relative_error, err_msg())
def check_forward_backward(self, shape, begin_norm_axis): def check_forward_backward(self, shape, begin_norm_axis):
def test_with_place(place, shape, begin_norm_axis=1): def test_with_place(place, shape, begin_norm_axis):
# setUp
assert begin_norm_axis > 0 and begin_norm_axis < len(
shape), 'begin_norm_axis must be between 0 and len(shape)-1.'
# attr # attr
epsilon = 0.00001 epsilon = 0.00001
x_shape = shape x_shape = shape
D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1)
scale_shape = [D] scale_shape = [D]
x_val = np.random.random_sample(x_shape).astype(np.float32) np.random.seed(123)
scale_val = np.random.random_sample(scale_shape).astype(np.float32) x = np.random.random_sample(x_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32)
bias = np.random.random_sample(scale_shape).astype(np.float32)
y_grad = np.random.random_sample(x_shape).astype(np.float32) y_grad = np.random.random_sample(x_shape).astype(np.float32)
# run forward # reference forward & backward
y_out, saved_mean, var_ref = _reference_layer_norm_naive( y, mean, variance = _reference_layer_norm_naive(
x_val, scale_val, bias_val, epsilon, begin_norm_axis) x, scale, bias, epsilon, begin_norm_axis)
naive_fw = {"Y": y_out, "Mean": saved_mean, "Variance": var_ref} x_grad, scale_grad, bias_grad = _reference_layer_norm_grad(
x, y_grad, scale, mean, variance, begin_norm_axis)
# get gradient
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_layer_norm_grad( var_dict = locals()
x_val, y_grad, scale_val, saved_mean, var_ref, begin_norm_axis) var_dict['y@GRAD'] = y_grad
naive_grad = { var_names = [
"X": x_grad_ref, 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'y@GRAD'
"Scale": scale_grad_ref, ]
"Bias": bias_grad_ref ground_truth = {name: var_dict[name] for name in var_names}
}
program = fluid.Program()
scope = core.Scope() with fluid.program_guard(program):
block = program.global_block()
# create input for name in ground_truth:
input_map = {"X": x_val, "Scale": scale_val, "Bias": bias_val} block.create_var(
for i_name in input_map: name=name,
create_or_get_tensor(scope, i_name, input_map[i_name], place) dtype='float32',
shape=ground_truth[name].shape)
# create output layer_norm_op = block.append_op(
output_map = {"Y": None, "Mean": None, "Variance": None} type="layer_norm",
output_tensor = {} inputs={
for o_name in output_map: "X": block.var('x'),
output_tensor[o_name] = create_or_get_tensor( "Scale": block.var('scale'),
scope, o_name, output_map[o_name], place) "Bias": block.var('bias'),
},
layer_norm_op = Operator( outputs={
"layer_norm", "Y": block.var('y'),
# inputs "Mean": block.var('mean'), # share the same memory
X="X", "Variance":
Scale="Scale", block.var('variance'), # share the same memory
Bias="Bias", },
# outputs attrs={
Y="Y", "epsilon": epsilon,
Mean="Mean", "begin_norm_axis": begin_norm_axis
Variance="Variance", })
# attrs
epsilon=epsilon, # generate backward op_desc
begin_norm_axis=begin_norm_axis) grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
layer_norm_op.desc, set(), [])
layer_norm_op.run(scope, place) grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
# check forward result new_op_desc.copy_from(grad_op_desc)
atol = 5e-2 if isinstance(place, core.CUDAPlace) else 1e-4 for var_name in grad_op_desc.output_arg_names():
for o_tensor in output_tensor: block.desc.var(var_name.encode("ascii"))
self.__assert_close(output_tensor[o_tensor], naive_fw[o_tensor], grad_op_desc.infer_var_type(block.desc)
o_tensor, atol) grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
# run backward grad_var = block.desc.find_var(arg.encode("ascii"))
layer_norm_op_grad = get_backward_op(scope, layer_norm_op, set()) grad_var.set_dtype(core.VarDesc.VarType.FP32)
set_output_grad(
scope, ["Y", "Mean", "Variance"], exe = fluid.Executor(place)
place, out = exe.run(program,
feed_dict={"Y": y_grad}) feed={
layer_norm_op_grad.run(scope, place) name: var_dict[name]
for name in ['x', 'scale', 'bias', 'y@GRAD']
# get output },
grad_tensor = {} fetch_list=[
for o_name in naive_grad: 'y', 'mean', 'variance', 'x@GRAD',
grad_tensor[o_name] = x_ = create_or_get_tensor( 'scale@GRAD', 'bias@GRAD'
scope, grad_var_name(o_name), None, place) ])
self.__assert_close(y, out[0], "y")
# check gradient output self.__assert_close(mean, out[1], "mean")
for o_grad in naive_grad: self.__assert_close(variance, out[2], "variance", 1e-3)
self.__assert_grad_close(grad_tensor[o_grad], self.__assert_close(x_grad, out[3], "x_grad")
naive_grad[o_grad], o_grad + "@GRAD", self.__assert_close(scale_grad, out[4], "scale_grad", 1e-3)
place) self.__assert_close(bias_grad, out[5], "bias_grad")
places = [core.CPUPlace()] places = [core.CPUPlace()]
if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"):
...@@ -237,15 +167,6 @@ class TestLayerNormdOp(OpTest): ...@@ -237,15 +167,6 @@ class TestLayerNormdOp(OpTest):
self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1)
self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3)
def test_check_forward_backward_with_scale(self):
pass # TODO(zcd)
def test_check_forward_backward_with_bias(self):
pass # TODO(zcd)
def test_check_forward_backward(self):
pass # TODO(zcd)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -32,7 +32,6 @@ class TestBook(unittest.TestCase): ...@@ -32,7 +32,6 @@ class TestBook(unittest.TestCase):
cost = layers.square_error_cost(input=y_predict, label=y) cost = layers.square_error_cost(input=y_predict, label=y)
avg_cost = layers.mean(cost) avg_cost = layers.mean(cost)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost)
print(str(program)) print(str(program))
...@@ -94,8 +93,6 @@ class TestBook(unittest.TestCase): ...@@ -94,8 +93,6 @@ class TestBook(unittest.TestCase):
cost = layers.cross_entropy(input=predict, label=label) cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(cost) avg_cost = layers.mean(cost)
program.append_backward(avg_cost)
print(str(program)) print(str(program))
def test_word_embedding(self): def test_word_embedding(self):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import unittest
def fc(X, W, Y):
ret_v = core.Net.create()
ret_v.append_op(Operator("mul", X="X", Y="W", Out="pre_activation"))
ret_v.append_op(Operator("sigmoid", X="pre_activation", Out=Y))
ret_v.complete_add_op(True)
return ret_v
class TestNet(unittest.TestCase):
def test_net_all(self):
net = core.Net.create()
op1 = Operator("sum", X=["X", "Y"], Out="Out")
net.append_op(op1)
net2 = core.Net.create()
net2.append_op(fc(X="X", W="w", Y="fc.out"))
net2.complete_add_op(True)
net.append_op(net2)
net.complete_add_op(True)
expected = '''
Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}.
Op(sum), inputs:{X[X, Y]}, outputs:{Out[Out]}.
Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}.
Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Out[fc.out]}.
'''
self.assertEqual(expected, "\n" + str(net))
if __name__ == "__main__":
unittest.main()
...@@ -473,7 +473,7 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): ...@@ -473,7 +473,7 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
loss = simple_fc_net(True) loss = simple_fc_net(True)
test_program = main.clone(for_test=True) test_program = main.clone(for_test=True)
opt = fluid.optimizer.SGD(learning_rate=0.0001) opt = fluid.optimizer.SGD(learning_rate=0.001)
opt.minimize(loss) opt.minimize(loss)
batch_size = 32 batch_size = 32
...@@ -500,4 +500,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): ...@@ -500,4 +500,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
train_loss, = train_exe.run([loss.name], feed_dict=feed_dict) train_loss, = train_exe.run([loss.name], feed_dict=feed_dict)
train_loss = numpy.array(train_loss) train_loss = numpy.array(train_loss)
self.assertTrue(numpy.allclose(train_loss, test_loss)) self.assertTrue(
numpy.allclose(
train_loss, test_loss, atol=1e-8),
"Train loss: " + str(train_loss) + "\n Test loss:" +
str(test_loss))
...@@ -87,57 +87,6 @@ class TestProgram(unittest.TestCase): ...@@ -87,57 +87,6 @@ class TestProgram(unittest.TestCase):
print(prog) print(prog)
print(prog_restored) print(prog_restored)
def test_append_backward(self):
prog = Program()
block = prog.global_block()
mul_x = block.create_var(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
mul_op = block.append_op(
type="mul",
inputs={"X": [mul_x],
"Y": mul_y},
outputs={"Out": [mul_out]},
attrs={"x_num_col_dims": 1})
add_y = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="add.y")
add_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="add.out")
add_op = block.append_op(
type="elementwise_add",
inputs={"X": mul_out,
"Y": add_y},
outputs={"Out": add_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": add_out}, outputs={"Out": mean_out})
self.assertEqual(mul_op.idx, 0)
self.assertEqual(add_op.idx, 1)
param_to_grad = prog.append_backward(mean_out, set())
for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out",
"mean.out"):
self.assertEqual(param_to_grad[var_name][0],
grad_var_name(var_name))
self.assertEqual(param_to_grad[var_name][1], 0)
expect_ops = [
"mul", "elementwise_add", "mean", "fill_constant", "mean_grad",
"elementwise_add_grad", "mul_grad"
]
actual_ops = []
for op in block.ops:
actual_ops.append(op.type)
self.assertEqual(actual_ops, expect_ops)
def test_program_clone_with_parameter(self): def test_program_clone_with_parameter(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册