提交 8b597d9d 编写于 作者: C chengduoZH

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/add_gather_and_BCast_op_handle
...@@ -62,29 +62,33 @@ endif() ...@@ -62,29 +62,33 @@ endif()
## Then find the reference-cblas. www.netlib.org/blas/ ## Then find the reference-cblas. www.netlib.org/blas/
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas") "Folder contains reference-cblas")
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS if(NOT CMAKE_CROSSCOMPILING)
${REFERENCE_CBLAS_ROOT}/include set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
/usr/include ${REFERENCE_CBLAS_ROOT}/include
/usr/include/cblas /usr/include
) /usr/include/cblas
)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/lib set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
/usr/lib ${REFERENCE_CBLAS_ROOT}/lib
/usr/lib/blas/reference/ /usr/lib
/usr/lib/reference/ /usr/lib/blas/reference/
) /usr/lib/reference/
)
else()
# Diable the finding of reference cblas under host's system path
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
endif()
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) ${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER REFERENCE) set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
......
...@@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) ...@@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
IF(APPLE) IF(APPLE)
SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh) SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE() ELSE()
SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin) SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin)
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
extern_grpc extern_grpc
DEPENDS protobuf zlib DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git" GIT_REPOSITORY "https://github.com/grpc/grpc.git"
GIT_TAG "v1.8.x" GIT_TAG "v1.11.x"
PREFIX ${GRPC_SOURCES_DIR} PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -11,19 +11,20 @@ ...@@ -11,19 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
IF(MOBILE_INFERENCE) if(MOBILE_INFERENCE OR RPI)
return() return()
ENDIF() endif()
include (ExternalProject) include (ExternalProject)
# NOTE: snappy is needed when linking with recordio # NOTE: snappy is needed when linking with recordio
SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy) set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE) set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
ExternalProject_Add( ExternalProject_Add(
extern_snappy extern_snappy
...@@ -51,8 +52,7 @@ ExternalProject_Add( ...@@ -51,8 +52,7 @@ ExternalProject_Add(
) )
add_library(snappy STATIC IMPORTED GLOBAL) add_library(snappy STATIC IMPORTED GLOBAL)
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
"${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
include_directories(${SNAPPY_INCLUDE_DIR}) include_directories(${SNAPPY_INCLUDE_DIR})
add_dependencies(snappy extern_snappy) add_dependencies(snappy extern_snappy)
...@@ -11,9 +11,8 @@ ...@@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
IF(MOBILE_INFERENCE) IF(MOBILE_INFERENCE OR RPI)
return() return()
ENDIF() ENDIF()
...@@ -21,9 +20,11 @@ include (ExternalProject) ...@@ -21,9 +20,11 @@ include (ExternalProject)
# NOTE: snappy is needed when linking with recordio # NOTE: snappy is needed when linking with recordio
SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream) set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE) set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE)
set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
ExternalProject_Add( ExternalProject_Add(
extern_snappystream extern_snappystream
...@@ -51,8 +52,7 @@ ExternalProject_Add( ...@@ -51,8 +52,7 @@ ExternalProject_Add(
) )
add_library(snappystream STATIC IMPORTED GLOBAL) add_library(snappystream STATIC IMPORTED GLOBAL)
set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES})
"${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers. include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers. include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers.
......
...@@ -195,14 +195,7 @@ function(cc_library TARGET_NAME) ...@@ -195,14 +195,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc) list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc) add_dependencies(${TARGET_NAME} warpctc)
endif() endif()
if("${cc_library_DEPS}" MATCHES "ARCHIVE_START") target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif() endif()
...@@ -243,11 +236,7 @@ function(cc_test TARGET_NAME) ...@@ -243,11 +236,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
endif()
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set_property(GLOBAL PROPERTY FLUID_MODULES "") set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library # find all fluid modules is used for paddle fluid static library
function(find_fluid_modules TARGET_NAME) function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos) string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1) if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
...@@ -77,6 +92,23 @@ elseif (WITH_MKLML) ...@@ -77,6 +92,23 @@ elseif (WITH_MKLML)
) )
endif() endif()
if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
copy(snappy_lib
SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream")
copy(snappystream_lib
SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib")
copy(zlib_lib
SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
endif()
# paddle fluid module # paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid") set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid")
......
...@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY) ...@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif() endif()
add_subdirectory(testing) add_subdirectory(testing)
if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS) if(NOT MOBILE_INFERENCE AND NOT RPI)
add_subdirectory(fluid) add_subdirectory(fluid)
endif() endif()
...@@ -3,6 +3,7 @@ add_subdirectory(platform) ...@@ -3,6 +3,7 @@ add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(pybind) add_subdirectory(pybind)
add_subdirectory(inference)
add_subdirectory(string) add_subdirectory(string)
add_subdirectory(recordio) add_subdirectory(recordio)
# NOTE: please add subdirectory inference at last.
add_subdirectory(inference)
...@@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
COMMENT "Copy generated python proto into directory paddle/fluid/proto." COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto backward glog lod_rank_table feed_fetch_method) framework_proto glog lod_rank_table feed_fetch_method)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor) cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor)
......
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
namespace paddle {
namespace framework {
// Create the backward operator from a forward operator.
// TODO(yuyang18): Add more API reference comment.
extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars);
struct GradVarInfo {
GradVarInfo() {}
GradVarInfo(const std::string& name, int block_idx, int op_idx)
: name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
bool operator==(const GradVarInfo& b) const {
return name_ == b.name_ && block_idx_ == b.block_idx_ &&
op_idx_ == b.op_idx_;
}
std::string name_;
int block_idx_;
int op_idx_;
};
using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward(
ProgramDesc& program_desc, const VarDesc& target,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework
} // namespace paddle
此差异已折叠。
...@@ -92,7 +92,7 @@ class BlockDesc { ...@@ -92,7 +92,7 @@ class BlockDesc {
/* /*
* Remove Op and its input/output variables. * Remove Op and its input/output variables.
* Note that for either input or ouput variable, if it is also an input or * Note that for either input or output variable, if it is also an input or
* output variable of other ops, we should remain it. * output variable of other ops, we should remain it.
*/ */
void RemoveOp(size_t s, size_t e); void RemoveOp(size_t s, size_t e);
......
...@@ -48,7 +48,7 @@ void BroadcastOpHandle::RunImpl() { ...@@ -48,7 +48,7 @@ void BroadcastOpHandle::RunImpl() {
auto out_scope_idx = out_handle->scope_idx_; auto out_scope_idx = out_handle->scope_idx_;
PADDLE_ENFORCE_LT(out_scope_idx, local_scopes_.size(), PADDLE_ENFORCE_LT(out_scope_idx, local_scopes_.size(),
"%s is not the the local_scopes ", out_handle->name_); "%s is not in the local_scopes ", out_handle->name_);
auto *s = local_scopes_[out_scope_idx]; auto *s = local_scopes_[out_scope_idx];
auto out_var = s->FindVar(out_handle->name_); auto out_var = s->FindVar(out_handle->name_);
PADDLE_ENFORCE_EQ(out_p.which(), in_place.which(), PADDLE_ENFORCE_EQ(out_p.which(), in_place.which(),
......
...@@ -57,15 +57,12 @@ class BroadcastTester : public ::testing::Test { ...@@ -57,15 +57,12 @@ class BroadcastTester : public ::testing::Test {
} }
} }
template <class T>
void BroadcastInitOp(int input_scope_idx) { void BroadcastInitOp(int input_scope_idx) {
for (size_t j = 0; j < gpu_list_.size(); ++j) { for (size_t j = 0; j < gpu_list_.size(); ++j) {
local_scope_.push_back(&g_scope_.NewScope()); local_scope_.push_back(&g_scope_.NewScope());
auto* out_var = local_scope_[j]->Var("out"); local_scope_[j]->Var("out");
out_var->GetMutable<T>();
} }
auto* in_var = local_scope_[input_scope_idx]->Var("input"); local_scope_[input_scope_idx]->Var("input");
in_var->GetMutable<T>();
bc_op_handle_ = new f::details::BroadcastOpHandle(local_scope_, gpu_list_); bc_op_handle_ = new f::details::BroadcastOpHandle(local_scope_, gpu_list_);
...@@ -84,7 +81,6 @@ class BroadcastTester : public ::testing::Test { ...@@ -84,7 +81,6 @@ class BroadcastTester : public ::testing::Test {
out_var_handle->name_ = "out"; out_var_handle->name_ = "out";
out_var_handle->version_ = 2; out_var_handle->version_ = 2;
out_var_handle->scope_idx_ = j; out_var_handle->scope_idx_ = j;
out_var_handle->generated_op_ = bc_op_handle_;
bc_op_handle_->AddOutput(out_var_handle); bc_op_handle_->AddOutput(out_var_handle);
} }
} }
...@@ -109,7 +105,7 @@ class BroadcastTester : public ::testing::Test { ...@@ -109,7 +105,7 @@ class BroadcastTester : public ::testing::Test {
void TestBroadcastLodTensor() { void TestBroadcastLodTensor() {
int input_scope_idx = 0; int input_scope_idx = 0;
BroadcastInitOp<f::LoDTensor>(input_scope_idx); BroadcastInitOp(input_scope_idx);
auto in_var = local_scope_[input_scope_idx]->Var("input"); auto in_var = local_scope_[input_scope_idx]->Var("input");
auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>(); auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>();
...@@ -148,7 +144,7 @@ class BroadcastTester : public ::testing::Test { ...@@ -148,7 +144,7 @@ class BroadcastTester : public ::testing::Test {
void TestBroadcastSelectedRows() { void TestBroadcastSelectedRows() {
int input_scope_idx = 0; int input_scope_idx = 0;
BroadcastInitOp<f::SelectedRows>(input_scope_idx); BroadcastInitOp(input_scope_idx);
auto in_var = local_scope_[input_scope_idx]->Var("input"); auto in_var = local_scope_[input_scope_idx]->Var("input");
auto in_selected_rows = in_var->GetMutable<f::SelectedRows>(); auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/computation_op_handle.h"
#include <string>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() { ...@@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() {
} }
} }
op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get<Scope *>(), place_); op_->Run(*scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(), place_);
} }
std::string ComputationOpHandle::Name() const { return op_->Type(); } std::string ComputationOpHandle::Name() const { return op_->Type(); }
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h"
#include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() { ...@@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() {
for (size_t i = 0; i < scopes.size(); ++i) { for (size_t i = 0; i < scopes.size(); ++i) {
auto &scope = scopes[i]; auto &scope = scopes[i];
auto &t = scope->FindVar(var_name)->Get<framework::LoDTensor>(); auto &t = scope->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(var_name)
->Get<framework::LoDTensor>();
if (platform::is_gpu_place(var->place_)) { if (platform::is_gpu_place(var->place_)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]); TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]);
......
...@@ -28,6 +28,12 @@ void GatherOpHandle::RunImpl() { ...@@ -28,6 +28,12 @@ void GatherOpHandle::RunImpl() {
"The number of inputs should be equal to the number of place."); "The number of inputs should be equal to the number of place.");
PADDLE_ENFORCE_EQ(this->outputs_.size(), 1, PADDLE_ENFORCE_EQ(this->outputs_.size(), 1,
"The number of output should be one."); "The number of output should be one.");
auto in_0_handle = static_cast<VarHandle *>(inputs_[0]);
auto pre_in_var =
local_scopes_[in_0_handle->scope_idx_]->FindVar(in_0_handle->name_);
PADDLE_ENFORCE(pre_in_var->IsType<framework::SelectedRows>(),
"Currently, gather_op only can gather SelectedRows.");
auto pre_place = in_0_handle->place_;
// Wait input done, this Wait is asynchronous operation // Wait input done, this Wait is asynchronous operation
for (auto *in : inputs_) { for (auto *in : inputs_) {
...@@ -36,10 +42,6 @@ void GatherOpHandle::RunImpl() { ...@@ -36,10 +42,6 @@ void GatherOpHandle::RunImpl() {
in->generated_op_->Wait(dev_ctxes_[p]); in->generated_op_->Wait(dev_ctxes_[p]);
} }
} }
auto in_0_handle = static_cast<VarHandle *>(inputs_[0]);
auto pre_in_var =
local_scopes_[in_0_handle->scope_idx_]->FindVar(in_0_handle->name_);
auto pre_place = in_0_handle->place_;
std::vector<int64_t> out_rows; std::vector<int64_t> out_rows;
std::vector<Tensor *> in_tensors; std::vector<Tensor *> in_tensors;
...@@ -110,9 +112,9 @@ void GatherOpHandle::RunImpl() { ...@@ -110,9 +112,9 @@ void GatherOpHandle::RunImpl() {
s = e; s = e;
} }
} else if (pre_in_var->IsType<framework::LoDTensor>()) { } else if (pre_in_var->IsType<framework::LoDTensor>()) {
// gather LoDTensor ??? PADDLE_THROW("Currently, Var only can be SelectedRows.");
} else { } else {
PADDLE_THROW("Var should be LoDTensor or SelectedRows."); PADDLE_THROW("Var should be SelectedRows.");
} }
} }
......
...@@ -56,15 +56,12 @@ class GatherTester : public ::testing::Test { ...@@ -56,15 +56,12 @@ class GatherTester : public ::testing::Test {
} }
} }
template <class T>
void InitGatherOp(int input_scope_idx) { void InitGatherOp(int input_scope_idx) {
for (size_t j = 0; j < gpu_list_.size(); ++j) { for (size_t j = 0; j < gpu_list_.size(); ++j) {
local_scope_.push_back(&g_scope_.NewScope()); local_scope_.push_back(&g_scope_.NewScope());
auto* out_var = local_scope_[j]->Var("input"); local_scope_[j]->Var("input");
out_var->GetMutable<T>();
} }
auto* in_var = local_scope_[input_scope_idx]->Var("out"); local_scope_[input_scope_idx]->Var("out");
in_var->GetMutable<T>();
gather_op_handle_ = new f::details::GatherOpHandle(local_scope_, gpu_list_); gather_op_handle_ = new f::details::GatherOpHandle(local_scope_, gpu_list_);
...@@ -106,11 +103,9 @@ class GatherTester : public ::testing::Test { ...@@ -106,11 +103,9 @@ class GatherTester : public ::testing::Test {
} }
} }
void TestGatherLodTensor() {}
void TestGatherSelectedRows() { void TestGatherSelectedRows() {
int output_scope_idx = 0; int output_scope_idx = 0;
InitGatherOp<f::SelectedRows>(output_scope_idx); InitGatherOp(output_scope_idx);
int height = kDims[0] * 2; int height = kDims[0] * 2;
std::vector<int64_t> rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1, std::vector<int64_t> rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1,
...@@ -169,21 +164,12 @@ class GatherTester : public ::testing::Test { ...@@ -169,21 +164,12 @@ class GatherTester : public ::testing::Test {
f::details::GatherOpHandle* gather_op_handle_; f::details::GatherOpHandle* gather_op_handle_;
}; };
// TEST_F(GatherTester, TestCPUGatherTestLodTensor) {
// InitCtxOnGpu(false);
// TestGatherLodTensor();
//}
TEST_F(GatherTester, TestCPUGatherTestSelectedRows) { TEST_F(GatherTester, TestCPUGatherTestSelectedRows) {
InitCtxOnGpu(false); InitCtxOnGpu(false);
TestGatherSelectedRows(); TestGatherSelectedRows();
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
// TEST_F(GatherTester, TestGPUGatherTestLodTensor) {
// InitCtxOnGpu(true);
// TestGatherLodTensor();
//}
TEST_F(GatherTester, TestGPUGatherTestSelectedRows) { TEST_F(GatherTester, TestGPUGatherTestSelectedRows) {
InitCtxOnGpu(true); InitCtxOnGpu(true);
......
...@@ -32,6 +32,8 @@ namespace details { ...@@ -32,6 +32,8 @@ namespace details {
// temporarily place it in op_handle. // temporarily place it in op_handle.
Tensor *GetTensorFromVar(Variable *in_var); Tensor *GetTensorFromVar(Variable *in_var);
constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@";
class OpHandleBase { class OpHandleBase {
private: private:
DISABLE_COPY_AND_ASSIGN(OpHandleBase); DISABLE_COPY_AND_ASSIGN(OpHandleBase);
......
...@@ -15,13 +15,15 @@ ...@@ -15,13 +15,15 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/ssa_graph.h" #include "paddle/fluid/framework/details/ssa_graph.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
class SSAGraphExecutor { class SSAGraphExecutor {
DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor);
......
...@@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
ready_ops.clear(); ready_ops.clear();
}; };
// Create local scopes.
for (auto &scope : local_scopes_) {
auto &local_scope = scope->NewScope();
*scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>() = &local_scope;
}
// Step 3. Execution // Step 3. Execution
while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) { while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) {
// 1. Run All Ready ops // 1. Run All Ready ops
...@@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ...@@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
PADDLE_ENFORCE(ready_ops.empty()); PADDLE_ENFORCE(ready_ops.empty());
PADDLE_ENFORCE(delayed_ops.empty()); PADDLE_ENFORCE(delayed_ops.empty());
PADDLE_ENFORCE(blocked_by_delayed_ops.empty()); PADDLE_ENFORCE(blocked_by_delayed_ops.empty());
++computation_count_;
auto sync_computation = [&] {
computation_count_ = 0;
// Wait All computational streams
for (auto p : this->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : local_scopes_) {
scope->DropKids();
}
};
// Wait FetchOps. // Wait FetchOps.
if (!fetch_ops.empty()) { if (!fetch_ops.empty()) {
fetch_ops.clear(); fetch_ops.clear();
sync_computation();
}
if (computation_count_ == max_async_computation) {
sync_computation();
}
// NOTE: the temp scope can be dropped lazily if needed.
// Drop tmp scopes;
for (auto &scope : local_scopes_) {
auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>();
kid = nullptr;
} }
return fetch_data; return fetch_data;
......
...@@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ...@@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::unique_ptr<platform::EnforceNotMet> exception_; std::unique_ptr<platform::EnforceNotMet> exception_;
std::atomic<int> running_ops_; std::atomic<int> running_ops_;
bool allow_op_delay_; bool allow_op_delay_;
size_t computation_count_{0};
size_t max_async_computation{100};
}; };
} // namespace details } // namespace details
......
...@@ -46,7 +46,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) { ...@@ -46,7 +46,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) {
} }
} }
static DDim GetDims(const Scope& scope, const std::string& name) { static DDim GetDims(const Scope& scope, const std::string& name,
bool get_actual_dim = false) {
Variable* var = scope.FindVar(name); Variable* var = scope.FindVar(name);
if (var == nullptr) { if (var == nullptr) {
return DDim({-1}); return DDim({-1});
...@@ -55,7 +56,11 @@ static DDim GetDims(const Scope& scope, const std::string& name) { ...@@ -55,7 +56,11 @@ static DDim GetDims(const Scope& scope, const std::string& name) {
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims(); return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
return var->Get<SelectedRows>().GetCompleteDims(); if (get_actual_dim) {
return var->Get<SelectedRows>().value().dims();
} else {
return var->Get<SelectedRows>().GetCompleteDims();
}
} else { } else {
return DDim({-1}); return DDim({-1});
} }
...@@ -129,7 +134,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -129,7 +134,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < input.second.size(); ++i) { for (size_t i = 0; i < input.second.size(); ++i) {
ss << input.second[i]; ss << input.second[i];
if (scope) { if (scope) {
ss << "[" << GetDims(*scope, input.second[i]) << "]"; ss << "[" << GetDims(*scope, input.second[i], true) << "]";
ss << "(" << GetLoD(*scope, input.second[i]) << ")"; ss << "(" << GetLoD(*scope, input.second[i]) << ")";
} }
if (i != input.second.size() - 1) { if (i != input.second.size() - 1) {
...@@ -149,7 +154,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -149,7 +154,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < output.second.size(); ++i) { for (size_t i = 0; i < output.second.size(); ++i) {
ss << output.second[i]; ss << output.second[i];
if (scope) { if (scope) {
ss << "[" << GetDims(*scope, output.second[i]) << "]"; ss << "[" << GetDims(*scope, output.second[i], true) << "]";
ss << "(" << GetLoD(*scope, output.second[i]) << ")"; ss << "(" << GetLoD(*scope, output.second[i]) << ")";
} }
if (i != output.second.size() - 1) { if (i != output.second.size() - 1) {
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include <string> #include <string>
#include <tuple>
#include <vector> #include <vector>
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -41,6 +42,8 @@ class ParallelExecutorPrivate { ...@@ -41,6 +42,8 @@ class ParallelExecutorPrivate {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif #endif
std::vector<std::tuple<std::string, proto::VarType::Type, bool>> var_types_;
}; };
std::vector<Scope *> &ParallelExecutor::GetLocalScopes() { std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
...@@ -97,14 +100,9 @@ ParallelExecutor::ParallelExecutor( ...@@ -97,14 +100,9 @@ ParallelExecutor::ParallelExecutor(
allow_op_delay)); allow_op_delay));
// Step 3. Create vars in each scope; // Step 3. Create vars in each scope;
for (auto *scope : member_->local_scopes_) { for (auto *var : main_program.Block(0).AllVars()) {
for (auto *var : main_program.Block(0).AllVars()) { member_->var_types_.emplace_back(var->Name(), var->GetType(),
if (scope->FindVar(var->Name()) != nullptr) { var->Persistable());
continue;
}
InitializeVariable(scope->Var(var->Name()), var->GetType());
}
} }
} }
...@@ -163,9 +161,42 @@ void ParallelExecutor::Run( ...@@ -163,9 +161,42 @@ void ParallelExecutor::Run(
const std::unordered_map<std::string, LoDTensor> &feed_tensors) { const std::unordered_map<std::string, LoDTensor> &feed_tensors) {
platform::RecordBlock b(0); platform::RecordBlock b(0);
SplitTensorToPlaces(feed_tensors); SplitTensorToPlaces(feed_tensors);
// Create local scopes.
for (auto &scope : member_->local_scopes_) {
Scope &local_scope = scope->NewScope();
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>() =
&local_scope;
for (auto &name_type_pair : member_->var_types_) {
if (scope->FindVar(std::get<0>(name_type_pair)) != nullptr) {
continue;
}
if (std::get<2>(name_type_pair)) { // Persistable
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
} else {
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
}
}
}
auto fetch_data = member_->executor_->Run(fetch_tensors); auto fetch_data = member_->executor_->Run(fetch_tensors);
*member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() = *member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() =
fetch_data; fetch_data;
// Wait All computational streams
for (auto p : member_->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : member_->local_scopes_) {
auto &local_scope =
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
scope->DeleteScope(local_scope);
local_scope = nullptr;
}
} }
void ParallelExecutor::SplitTensorToPlaces( void ParallelExecutor::SplitTensorToPlaces(
......
...@@ -14,18 +14,17 @@ limitations under the License. */ ...@@ -14,18 +14,17 @@ limitations under the License. */
#include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/prune.h"
#include <gtest/gtest.h>
#include <string>
#include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include <gtest/gtest.h>
namespace f = paddle::framework; namespace f = paddle::framework;
namespace ops = paddle::operators;
void AddOp(const std::string &type, const f::VariableNameMap &inputs, void AddOp(const std::string &type, const f::VariableNameMap &inputs,
const f::VariableNameMap &outputs, f::AttributeMap attrs, const f::VariableNameMap &outputs, f::AttributeMap attrs,
......
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor prune init) set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(paddle_fluid_api cc_library(paddle_fluid_api
SRCS io.cc SRCS io.cc
...@@ -11,7 +11,7 @@ cc_library(paddle_fluid DEPS ${fluid_modules}) ...@@ -11,7 +11,7 @@ cc_library(paddle_fluid DEPS ${fluid_modules})
# Create shared library # Create shared library
cc_library(paddle_fluid_shared SHARED cc_library(paddle_fluid_shared SHARED
SRCS io.cc SRCS io.cc
DEPS ARCHIVE_START ${GLOB_OP_LIB} ${FLUID_CORE_MODULES} ARCHIVE_END) DEPS ${fluid_modules})
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE) if(NOT APPLE)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
......
...@@ -17,10 +17,16 @@ limitations under the License. */ ...@@ -17,10 +17,16 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
// Temporarilly add this function for exposing framework::InitDevices() when
// linking the inference shared library.
void Init(bool init_p2p) { framework::InitDevices(init_p2p); }
void ReadBinaryFile(const std::string& filename, std::string& contents) { void ReadBinaryFile(const std::string& filename, std::string& contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary); std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename); PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename);
......
...@@ -18,12 +18,15 @@ limitations under the License. */ ...@@ -18,12 +18,15 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
void Init(bool init_p2p);
void LoadPersistables(framework::Executor& executor, framework::Scope& scope, void LoadPersistables(framework::Executor& executor, framework::Scope& scope,
const framework::ProgramDesc& main_program, const framework::ProgramDesc& main_program,
const std::string& dirname, const std::string& dirname,
......
...@@ -17,7 +17,7 @@ function(inference_test TARGET_NAME) ...@@ -17,7 +17,7 @@ function(inference_test TARGET_NAME)
string(REGEX REPLACE "^_$" "" arg "${arg}") string(REGEX REPLACE "^_$" "" arg "${arg}")
cc_test(test_inference_${TARGET_NAME}${arg} cc_test(test_inference_${TARGET_NAME}${arg}
SRCS test_inference_${TARGET_NAME}.cc SRCS test_inference_${TARGET_NAME}.cc
DEPS ARCHIVE_START paddle_fluid ARCHIVE_END DEPS paddle_fluid
ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.inference.model) ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.inference.model)
set_tests_properties(test_inference_${TARGET_NAME}${arg} set_tests_properties(test_inference_${TARGET_NAME}${arg}
PROPERTIES DEPENDS test_${TARGET_NAME}) PROPERTIES DEPENDS test_${TARGET_NAME})
......
...@@ -100,7 +100,7 @@ function(op_library TARGET) ...@@ -100,7 +100,7 @@ function(op_library TARGET)
endif() endif()
# Define operators that don't need pybind here. # Define operators that don't need pybind here.
foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op") foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}") if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
...@@ -199,7 +199,6 @@ else() ...@@ -199,7 +199,6 @@ else()
set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op) set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op)
endif() endif()
op_library(cond_op DEPS framework_proto tensor net_op)
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(softmax_op DEPS softmax) op_library(softmax_op DEPS softmax)
...@@ -259,7 +258,6 @@ endforeach() ...@@ -259,7 +258,6 @@ endforeach()
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op) cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op)
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/concat_op.h" #include "paddle/fluid/operators/concat_op.h"
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -34,7 +35,10 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -34,7 +35,10 @@ class ConcatOp : public framework::OperatorWithKernel {
size_t axis = static_cast<size_t>(ctx->Attrs().Get<int>("axis")); size_t axis = static_cast<size_t>(ctx->Attrs().Get<int>("axis"));
const size_t n = ins.size(); const size_t n = ins.size();
PADDLE_ENFORCE_GT(n, 1, "Input tensors count should > 1."); PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
if (n == 1) {
VLOG(3) << "Warning: concat op have only one input, may waste memory";
}
auto out_dims = ins[0]; auto out_dims = ins[0];
size_t in_zero_dims_size = out_dims.size(); size_t in_zero_dims_size = out_dims.size();
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/scatter.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
using Scope = framework::Scope;
using Variable = framework::Variable;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DDim = framework::DDim;
framework::Scope& CondOp::AddSubScope(const Scope& scope) const {
auto sub_scopes_var = scope.FindVar("SubScopes");
PADDLE_ENFORCE_NOT_NULL(sub_scopes_var,
"Output(SubScopes) of CondOp should not be null.");
auto sub_scopes = sub_scopes_var->GetMutable<std::vector<Scope*>>();
auto& sub_scope = scope.NewScope();
sub_scopes->push_back(&sub_scope);
return sub_scope;
}
std::vector<framework::Scope*>& CondOp::GetSubScopes(
const framework::Scope& scope) const {
auto sub_scopes_var = scope.FindVar("SubScopes");
PADDLE_ENFORCE_NOT_NULL(sub_scopes_var,
"Output(SubScopes) of CondOp should not be null.");
return *sub_scopes_var->GetMutable<std::vector<framework::Scope*>>();
}
LoDTensor& CondOp::AddIndexTensor(const Scope& scope) const {
auto index_tensors_var = scope.FindVar("IndexTensors");
PADDLE_ENFORCE_NOT_NULL(index_tensors_var,
"Output(IndexTensors) of CondOp should not be null.");
auto& index_tensors =
*index_tensors_var->GetMutable<std::vector<LoDTensor>>();
index_tensors.push_back(LoDTensor());
return index_tensors.back();
}
std::vector<framework::LoDTensor>& CondOp::GetIndexTensors(
const framework::Scope& scope) const {
auto* index_tensors_var = scope.FindVar("IndexTensors");
PADDLE_ENFORCE_NOT_NULL(index_tensors_var,
"Output(IndexTensors) of CondOp should not be null.");
return *index_tensors_var->GetMutable<std::vector<framework::LoDTensor>>();
}
void CondOp::PrepareDataForSubnet(
const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const {
PADDLE_ENFORCE(!Inputs("Xs").empty(), "Inputs(Xs) of CondOp can't be empty.");
for (int i = 0; i < BRANCH_NUM; ++i) {
// Create two sub scopes for true and false branches
// sub_scopes[0] for the true branch
// sub_scopes[1] for the false branch
AddSubScope(scope);
// Create two tensors for true and false indices:
// index_tensors[0] for the true branch
// index_tensors[1] for the false branch
AddIndexTensor(scope);
}
Variable* cond_var = scope.FindVar(Input("Cond"));
PADDLE_ENFORCE_NOT_NULL(cond_var,
"Input(Cond) of CondOp should not be null.");
const LoDTensor* cond = cond_var->GetMutable<LoDTensor>();
// get the true/false index at runtime according to cond tensor
// index_vectors[0]: vector<int>, contains all index for cond[i] == true
// index_vectors[1]: vector<int>, contains all index for cond[i] == false
std::vector<std::vector<int>> index_vectors;
index_vectors.resize(BRANCH_NUM);
const int* cond_data = cond->data<int>();
for (int i = 0; i < cond->dims()[0]; ++i) {
if (cond_data[i])
index_vectors[TRUE_BRANCH].push_back(i);
else
index_vectors[FALSE_BRANCH].push_back(i);
}
// put index_vectors[0] and index_vectors[1] into two tensors:
// index_tensors[0] and index_tensors[1]
std::vector<framework::LoDTensor>& index_tensors = GetIndexTensors(scope);
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
for (int i = 0; i < BRANCH_NUM; ++i) {
DDim dim = {static_cast<int64_t>(index_vectors[i].size())};
int* index_tensor_data_ptr =
index_tensors[i].mutable_data<int>(dim, platform::CPUPlace());
memcpy(index_tensor_data_ptr, index_vectors[i].data(),
dim[0] * sizeof(int));
}
// create input in subscopes according to index_vectors
for (auto& input : Inputs("Xs")) {
Variable* var_parent = scope.FindVar(input);
PADDLE_ENFORCE_NOT_NULL(var_parent);
const auto* tensor_parent = &var_parent->Get<LoDTensor>();
for (int i = 0; i < BRANCH_NUM; ++i) {
Variable* var_child = sub_scopes[i]->FindVar(input);
PADDLE_ENFORCE_NOT_NULL(var_child);
auto* tensor_child = var_child->GetMutable<LoDTensor>();
// Resize child
DDim dim = tensor_parent->dims();
dim[0] = index_tensors[i].dims()[0];
tensor_child->mutable_data<float>(dim, platform::CPUPlace());
CPUGather<float>(dev_ctx, *tensor_parent, index_tensors[i], tensor_child);
}
}
// create output_tensors in subscope for sub_net
for (int i = 0; i < BRANCH_NUM; ++i) {
for (auto& output : (*sub_net_op_[i]).Outputs()) {
for (auto& var_name : output.second) {
sub_scopes[i]->Var(var_name);
}
}
}
}
void CondOp::MergeDataFromSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const {
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
const std::vector<framework::LoDTensor>& index_tensors =
GetIndexTensors(scope);
// Infer the output dim, out_dim[0] = true_dim[0] + false_dim[0]
PADDLE_ENFORCE(!Outputs("Outs").empty(),
"Outputs(Outs) of CondOp can't be empty.");
for (auto& output : Outputs("Outs")) {
const LoDTensor* tensor_t_out =
&sub_scopes[TRUE_BRANCH]->FindVar(output)->Get<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL");
const LoDTensor* tensor_f_out =
&sub_scopes[FALSE_BRANCH]->FindVar(output)->Get<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL");
auto* var_out = scope.FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_out, "Output not found");
LoDTensor* tensor_out = var_out->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_t_out,
"True output tensor should not be NULL");
DDim true_dim = tensor_t_out->dims();
DDim false_dim = tensor_f_out->dims();
true_dim[0] = 0;
false_dim[0] = 0;
PADDLE_ENFORCE_EQ(true_dim, false_dim,
"Outputs not of the same shape except the first dim");
DDim out_dim = tensor_t_out->dims();
out_dim[0] = tensor_t_out->dims()[0] + tensor_f_out->dims()[0];
tensor_out->Resize(out_dim);
tensor_out->mutable_data<float>(platform::CPUPlace());
}
// merge output results:
// output_tensor = true_output_tensor + false_output_tensor
for (auto& output : Outputs("Outs")) {
Variable* var_parent = scope.FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_parent);
auto* tensor_parent = var_parent->GetMutable<LoDTensor>();
for (int i = 0; i < BRANCH_NUM; ++i) {
Variable* var_child = sub_scopes[i]->FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_child);
auto* tensor_child = &var_child->Get<LoDTensor>();
ScatterAssign<float>(dev_ctx, *tensor_child, index_tensors[i],
tensor_parent);
}
}
}
void CondOp::RunImpl(const Scope& scope, const platform::Place& place) const {
// get device context from pool
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& dev_ctx = *pool.Get(place);
PrepareDataForSubnet(scope, dev_ctx);
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
for (int i = 0; i < BRANCH_NUM; ++i) {
sub_net_op_[i]->Run(*sub_scopes[i], place);
}
MergeDataFromSubnet(scope, dev_ctx);
}
class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
public:
CondOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Cond", "The condition, which is a bool vector");
AddInput("Xs", "Inputs of Subnets").AsDuplicable();
AddOutput("Outs", "Outputs of Cond_Op after merge").AsDuplicable();
AddOutput("SubScopes", "sub scopes for true and false branches");
AddOutput("IndexTensors", "Index Tensors contains indices for true/false");
AddComment(R"DOC(
Sample Dependent Conditional Operator.
Given Cond[i] as a 1/0 vector to indicate true/false:
Out[i] = subnet_true[i], if Cond[i] == true
Out[i] = subnet_false[i], if Cond[i] == false
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(cond, paddle::operators::CondOp,
paddle::operators::CondOpProtoAndCheckerMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/net_op.h"
namespace paddle {
namespace operators {
/*
* @brief CondOp is a dynamic if-else Operator
*
* It has a input tensor named cond indicating which netop each instance will
* run.
*
* if cond == 1, it will run true_net, which is a NetOp.
*
* if cond == 0, it will run false_net, which is another NetOp.
*/
class CondOp : public framework::OperatorBase {
public:
CondOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {
sub_net_op_.resize(BRANCH_NUM);
}
CondOp(const CondOp& o)
: framework::OperatorBase(
static_cast<const framework::OperatorBase&>(o)) {
// TODO(yuyang18): Implement copy ctor well.
PADDLE_THROW("Not implemented");
}
framework::Scope& AddSubScope(const framework::Scope& scope) const;
std::vector<framework::Scope*>& GetSubScopes(
const framework::Scope& scope) const;
framework::LoDTensor& AddIndexTensor(const framework::Scope& scope) const;
std::vector<framework::LoDTensor>& GetIndexTensors(
const framework::Scope& scope) const;
void PrepareDataForSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const;
void MergeDataFromSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const;
/*
* Set True Block
*/
void set_truenet(std::unique_ptr<OperatorBase>&& net) {
sub_net_op_[TRUE_BRANCH] = std::move(net);
}
/*
* Set False Block
*/
void set_falsenet(std::unique_ptr<OperatorBase>&& net) {
sub_net_op_[FALSE_BRANCH] = std::move(net);
}
private:
void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override;
private:
const int TRUE_BRANCH = 0;
const int FALSE_BRANCH = 1;
const int BRANCH_NUM = 2;
// sub_net_op_[0]: subnet_t
// sub_net_op_[1]: subnet_f
std::vector<std::unique_ptr<framework::OperatorBase>> sub_net_op_;
};
} // namespace operators
} // namespace paddle
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include <stdio.h> #include <stdio.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/host_vector.h> #include <thrust/host_vector.h>
#include <vector>
#include "paddle/fluid/operators/ctc_align_op.h" #include "paddle/fluid/operators/ctc_align_op.h"
namespace paddle { namespace paddle {
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string.h> #include <string.h>
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
......
...@@ -161,6 +161,7 @@ class RequestPrefetch final : public RequestBase { ...@@ -161,6 +161,7 @@ class RequestPrefetch final : public RequestBase {
::grpc::ByteBuffer reply; ::grpc::ByteBuffer reply;
std::string var_name = request_->OutVarname(); std::string var_name = request_->OutVarname();
VLOG(3) << "prefetch var " << var_name;
auto var_desc = program_->Block(0).FindVar(var_name); auto var_desc = program_->Block(0).FindVar(var_name);
framework::Scope* local_scope = &scope_->NewScope(); framework::Scope* local_scope = &scope_->NewScope();
auto* var = local_scope->FindVar(var_name); auto* var = local_scope->FindVar(var_name);
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
...@@ -106,18 +107,18 @@ information. However, the output only shares the LoD information with input $X$. ...@@ -106,18 +107,18 @@ information. However, the output only shares the LoD information with input $X$.
protected: protected:
std::string comment_; std::string comment_;
void Replace(std::string& src, std::string from, std::string to) { void Replace(std::string* src, std::string from, std::string to) {
std::size_t len_from = std::strlen(from.c_str()); std::size_t len_from = std::strlen(from.c_str());
std::size_t len_to = std::strlen(to.c_str()); std::size_t len_to = std::strlen(to.c_str());
for (std::size_t pos = src.find(from); pos != std::string::npos; for (std::size_t pos = src->find(from); pos != std::string::npos;
pos = src.find(from, pos + len_to)) { pos = src->find(from, pos + len_to)) {
src.replace(pos, len_from, to); src->replace(pos, len_from, to);
} }
} }
void SetComment(std::string name, std::string equation) { void SetComment(std::string name, std::string equation) {
Replace(comment_, "{name}", name); Replace(&comment_, "{name}", name);
Replace(comment_, "{equation}", equation); Replace(&comment_, "{equation}", equation);
} }
}; };
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/gru_op.h" #include "paddle/fluid/operators/gru_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,15 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/gru_compute.h" #include "paddle/fluid/operators/math/gru_compute.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence2batch.h" #include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h" #include "paddle/fluid/operators/im2sequence_op.h"
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <vector>
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/label_smooth_op.h" #include "paddle/fluid/operators/label_smooth_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -100,7 +100,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -100,7 +100,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto x_row_max = EigenMatrix<T>::From(emission_row_max); auto x_row_max = EigenMatrix<T>::From(emission_row_max);
x_row_max.device(place) = x_row_max.device(place) =
x.maximum(Eigen::DSizes<int, 1>(1)) x.maximum(Eigen::DSizes<int, 1>(1))
.reshape(Eigen::DSizes<int, 2>(int(batch_size), 1)); .reshape(Eigen::DSizes<int, 2>(static_cast<int>(batch_size), 1));
auto x_exps = EigenMatrix<T>::From(*emission_exps); auto x_exps = EigenMatrix<T>::From(*emission_exps);
x_exps.device(place) = x_exps.device(place) =
......
...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <ostream> #include <ostream>
#include <thread> #include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/operators/listen_and_serv_op.h" #include "paddle/fluid/operators/listen_and_serv_op.h"
...@@ -88,8 +89,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -88,8 +89,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
auto ins = Inputs("X"); auto ins = Inputs("X");
auto fan_in = Attr<int>("Fanin"); auto fan_in = Attr<int>("Fanin");
auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock); auto *optimize_block = Attr<framework::BlockDesc *>(kOptimizeBlock);
auto *program = block->Program(); auto *prefetch_block = Attr<framework::BlockDesc *>(kPrefetchBlock);
auto *program = optimize_block->Program();
size_t num_blocks = program->Size(); size_t num_blocks = program->Size();
PADDLE_ENFORCE_GE(num_blocks, 2, PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks"); "server program should have at least 2 blocks");
...@@ -97,18 +99,25 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -97,18 +99,25 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
framework::Executor executor(dev_place); framework::Executor executor(dev_place);
std::vector<int> block_list; std::vector<int> block_list;
for (size_t blkid = 1; blkid < num_blocks; ++blkid) { for (size_t blkid = 1; blkid < num_blocks; ++blkid) {
block_list.push_back(blkid); if (blkid != prefetch_block->ID()) {
block_list.push_back(blkid);
}
} }
auto prepared = executor.Prepare(*program, block_list); auto optimize_prepared = executor.Prepare(*program, block_list);
// Insert placeholder for block0 which holds current op itself. // Insert placeholder for block0 which holds current op itself.
prepared.insert(prepared.begin(), optimize_prepared.insert(
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr)); optimize_prepared.begin(),
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr));
rpc_service_->SetScope(&recv_scope); rpc_service_->SetScope(&recv_scope);
rpc_service_->SetDevCtx(&dev_ctx); rpc_service_->SetDevCtx(&dev_ctx);
// TODO(qiao) set proper fields for table lookup and update // TODO(qiao) set proper fields for table lookup and update
rpc_service_->SetExecutor(&executor); rpc_service_->SetExecutor(&executor);
rpc_service_->SetPrefetchBlkdId(0); VLOG(3) << "prefetch block id is " << prefetch_block->ID();
auto prefetch_prepared = executor.Prepare(*program, prefetch_block->ID());
rpc_service_->SetPrefetchBlkdId(prefetch_block->ID());
rpc_service_->SetPrefetchPreparedCtx(prefetch_prepared.get());
prefetch_prepared.release();
rpc_service_->SetProgram(program); rpc_service_->SetProgram(program);
// start the server listening after all member initialized. // start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_)); server_thread_.reset(new std::thread(RunServer, rpc_service_));
...@@ -166,16 +175,18 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -166,16 +175,18 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
parallel_blkids.push_back(1); parallel_blkids.push_back(1);
double ts = detail::GetTimestamp(); double ts = detail::GetTimestamp();
for (size_t blkid = 2; blkid < num_blocks; ++blkid) { for (size_t blkid = 2; blkid < num_blocks; ++blkid) {
if (program->Block(blkid).Parent() != last_parent_blkid) { if (blkid != prefetch_block->ID()) {
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program, if (program->Block(blkid).Parent() != last_parent_blkid) {
&recv_scope); ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared,
parallel_blkids.clear(); program, &recv_scope);
last_parent_blkid = program->Block(blkid).Parent(); parallel_blkids.clear();
last_parent_blkid = program->Block(blkid).Parent();
}
parallel_blkids.push_back(blkid);
} }
parallel_blkids.push_back(blkid);
} }
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program, ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared,
&recv_scope); program, &recv_scope);
VLOG(2) << "run all blocks spent " << detail::GetTimestamp() - ts << "(ms)"; VLOG(2) << "run all blocks spent " << detail::GetTimestamp() - ts << "(ms)";
// Reset the received sparse variables, the sum operator would not // Reset the received sparse variables, the sum operator would not
...@@ -211,6 +222,8 @@ from send_op and send back variables to recv_op. ...@@ -211,6 +222,8 @@ from send_op and send back variables to recv_op.
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); .AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
AddAttr<framework::BlockDesc *>(kOptimizeBlock, AddAttr<framework::BlockDesc *>(kOptimizeBlock,
"BlockID to run on server side."); "BlockID to run on server side.");
AddAttr<framework::BlockDesc *>(kPrefetchBlock,
"prefetch block to run on server side.");
AddAttr<int>("Fanin", "How many clients send to this server.") AddAttr<int>("Fanin", "How many clients send to this server.")
.SetDefault(1); .SetDefault(1);
} }
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <stdint.h> #include <stdint.h>
#include <ostream> #include <ostream>
#include <string>
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
...@@ -27,6 +28,7 @@ namespace paddle { ...@@ -27,6 +28,7 @@ namespace paddle {
namespace operators { namespace operators {
constexpr char kOptimizeBlock[] = "OptimizeBlock"; constexpr char kOptimizeBlock[] = "OptimizeBlock";
constexpr char kPrefetchBlock[] = "PrefetchBlock";
void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service); void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service);
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/logical_op.h" #include "paddle/fluid/operators/logical_op.h"
#include <string>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
namespace paddle { namespace paddle {
......
...@@ -78,6 +78,9 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -78,6 +78,9 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
"(boolean, default false) " "(boolean, default false) "
"Sparse update.") "Sparse update.")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("is_distributed",
"(boolean, default false) distributed lookup table.")
.SetDefault(false);
AddAttr<int64_t>("padding_idx", AddAttr<int64_t>("padding_idx",
"(int64, default -1) " "(int64, default -1) "
"If the value is -1, it makes no effect to lookup. " "If the value is -1, it makes no effect to lookup. "
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lrn_op.h" #include "paddle/fluid/operators/lrn_op.h"
#include <string>
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#endif #endif
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lstm_op.h" #include "paddle/fluid/operators/lstm_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h"
......
...@@ -18,6 +18,7 @@ https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.c ...@@ -18,6 +18,7 @@ https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.c
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cross_entropy_op.h" #include "paddle/fluid/operators/cross_entropy_op.h"
#include "paddle/fluid/operators/lstm_unit_op.h"
#include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/hostdevice.h" #include "paddle/fluid/platform/hostdevice.h"
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/lstmp_op.h" #include "paddle/fluid/operators/lstmp_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h"
......
...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/matmul_op.h" #include "paddle/fluid/operators/matmul_op.h"
#include <algorithm>
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <algorithm>
#include <functional>
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/matmul.h" #include "paddle/fluid/operators/math/matmul.h"
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
* limitations under the License. */ * limitations under the License. */
#include "paddle/fluid/operators/maxout_op.h" #include "paddle/fluid/operators/maxout_op.h"
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/minus_op.h" #include "paddle/fluid/operators/minus_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/momentum_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/mul_op.h" #include "paddle/fluid/operators/mul_op.h"
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/net_op.h"
#include <set>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
const char NetOp::kAll[] = "all";
void NetOp::CompleteAddOp(bool calc) {
add_op_done_ = true;
if (!calc) return;
std::set<std::string> input_set;
std::set<std::string> output_set;
for (auto& op : ops_) {
for (auto& ipt : op->Inputs()) {
for (auto& var_name : ipt.second) {
// If input variable has been in output set, then it will be
// added into intermediate_outputs_. Otherwise, it will be
// added into input set.
if (Contains(output_set, var_name)) {
intermediate_outputs_.insert(var_name);
} else {
input_set.insert(var_name);
}
}
}
for (auto& opt : op->Outputs()) {
for (auto& var_name : opt.second) {
output_set.insert(var_name);
}
}
}
auto& inputs = inputs_[kAll];
inputs.reserve(input_set.size());
std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs));
auto& outputs = outputs_[kAll];
outputs.reserve(output_set.size());
std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs));
}
std::string NetOp::DebugStringEx(const framework::Scope* scope) const {
std::ostringstream os;
os << OperatorBase::DebugStringEx(scope) << std::endl;
for (auto& op : ops_) {
std::istringstream is(op->DebugStringEx(scope));
for (std::string line; std::getline(is, line);) {
os << " " << line << std::endl;
}
}
return os.str();
}
bool NetOp::IsNetOp() const { return true; }
std::vector<std::string> NetOp::OutputVars(bool has_intermediate) const {
std::vector<std::string> all;
for (auto& pair : this->outputs_) {
for (auto& var_name : pair.second) {
all.push_back(var_name);
}
}
if (has_intermediate) {
return all;
}
std::vector<std::string> ret_val;
for (auto& each : all) {
if (!Contains(intermediate_outputs_, each)) {
ret_val.push_back(each);
}
}
return ret_val;
}
NetOp::NetOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
std::unique_ptr<framework::OperatorBase> NetOp::Clone() const {
PADDLE_ENFORCE(
add_op_done_,
"Must clone a sealed NetOp, invoke Net::CompleteAddOp before clone");
return std::unique_ptr<OperatorBase>(new NetOp(*this));
}
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <set>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
/**
* @brief Network is also a type of Operator
*
* It will manage the operators it has.
*
* Network is the container and controller of a set of operators.
* A network object knows all Operators belonging to this network. Variables,
* which are inputs and outputs of these operators, are created and managed by a
* hierarchy of Scope objects.
*
* This is the base class of network, all the networks should implement the APIs
* it defines.
*/
class NetOp : public framework::OperatorBase {
public:
static const char kAll[];
NetOp()
: framework::OperatorBase("plain_net", framework::VariableNameMap{},
framework::VariableNameMap{},
framework::AttributeMap{}) {}
NetOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs);
NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) {
this->ops_.reserve(o.ops_.size());
std::transform(
o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_),
[](const std::unique_ptr<framework::OperatorBase>& op) {
return std::unique_ptr<framework::OperatorBase>(op->Clone());
});
this->CompleteAddOp();
}
bool SupportGPU() const override {
for (auto& op : ops_) {
if (!op->SupportGPU()) {
return false;
}
}
return true;
}
void AppendOp(const framework::OperatorBase& op) { AppendOp(op.Clone()); }
/**
* @brief Add an operator by ptr
*/
void AppendOp(std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot AppendOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
ops_.push_back(std::move(op));
}
void InsertOp(size_t pos, std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, std::move(op));
}
void InsertOp(size_t pos, const framework::OperatorBase& op) {
InsertOp(pos, op.Clone());
}
void CompleteAddOp(bool calculate = true);
std::string DebugStringEx(
const framework::Scope* scope = nullptr) const override;
bool IsNetOp() const override;
std::vector<std::string> OutputVars(bool has_intermediate) const override;
std::unique_ptr<framework::OperatorBase> Clone() const override;
std::vector<std::unique_ptr<framework::OperatorBase>> ops_;
private:
/**
* @brief Run the network.
*
* Run all the operators with the `scope`, if no scope is provided, default
* scope will be used instead. If no OpContext is provicded, default context
* will be used.
*/
void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override {
for (auto& op : ops_) {
op->Run(scope, place);
}
}
bool add_op_done_{false};
std::set<std::string> intermediate_outputs_;
template <typename T, typename KeyType>
static bool Contains(T container, KeyType key) {
return container.find(key) != container.end();
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/net_op.h"
#include <gtest/gtest.h>
namespace paddle {
namespace operators {
using Scope = framework::Scope;
using DeviceContext = platform::DeviceContext;
static int run_cnt = 0;
class TestOp : public framework::OperatorBase {
public:
using framework::OperatorBase::OperatorBase;
DEFINE_OP_CLONE_METHOD(TestOp);
private:
void RunImpl(const Scope& scope,
const platform::Place& place) const override {
++run_cnt;
}
};
template <typename T>
void AssertSameVectorWithoutOrder(const std::vector<T>& expected,
const std::vector<T>& actual) {
ASSERT_EQ(expected.size(), actual.size());
std::unordered_set<T> expected_set;
for (auto& tmp : expected) {
expected_set.insert(tmp);
}
for (auto& act : actual) {
ASSERT_NE(expected_set.end(), expected_set.find(act));
}
}
TEST(OpKernel, all) {
auto net = std::make_shared<NetOp>();
ASSERT_NE(net, nullptr);
net->AppendOp(std::unique_ptr<TestOp>(
new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, framework::AttributeMap{})));
net->AppendOp(std::unique_ptr<TestOp>(
new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}},
{{"Out", {"z"}}}, framework::AttributeMap{})));
net->CompleteAddOp();
AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"},
net->Inputs(NetOp::kAll));
AssertSameVectorWithoutOrder({"y", "z"}, net->Outputs(NetOp::kAll));
auto final_outs = net->OutputVars(false);
ASSERT_EQ(final_outs.size(), 1UL);
ASSERT_EQ(final_outs[0], "z");
}
TEST(NetOp, insert_op) {
NetOp net;
auto op1 = std::unique_ptr<framework::NOP>(
new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, framework::AttributeMap{}));
net.AppendOp(*op1);
net.InsertOp(0, *op1);
ASSERT_EQ(2UL, net.ops_.size());
net.InsertOp(2, std::move(op1));
ASSERT_EQ(3UL, net.ops_.size());
}
TEST(NetOp, Clone) {
NetOp net;
net.AppendOp(std::unique_ptr<framework::NOP>(new framework::NOP{
"empty", framework::VariableNameMap{}, framework::VariableNameMap{},
framework::AttributeMap{}}));
net.AppendOp(std::unique_ptr<framework::NOP>(new framework::NOP{
"empty2", framework::VariableNameMap{}, framework::VariableNameMap{},
framework::AttributeMap{}}));
net.CompleteAddOp(true);
auto new_net_op = net.Clone();
ASSERT_NE(new_net_op, nullptr);
ASSERT_TRUE(new_net_op->IsNetOp());
auto* new_net = static_cast<NetOp*>(new_net_op.get());
ASSERT_EQ(2UL, new_net->ops_.size());
ASSERT_EQ(new_net->ops_[0]->Type(), "empty");
ASSERT_EQ(new_net->ops_[1]->Type(), "empty2");
}
} // namespace operators
} // namespace paddle
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <future> #include <future> // NOLINT
#include <ostream> #include <ostream>
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
...@@ -50,8 +50,8 @@ class PrefetchOp : public framework::OperatorBase { ...@@ -50,8 +50,8 @@ class PrefetchOp : public framework::OperatorBase {
for (size_t i = 0; i < ins.size(); i++) { for (size_t i = 0; i < ins.size(); i++) {
if (NeedSend(scope, ins[i])) { if (NeedSend(scope, ins[i])) {
VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << "to get " VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << " to get "
<< outs[i] << "back"; << outs[i] << " back";
rpc_client->AsyncPrefetchVariable(epmap[i], ctx, scope, ins[i], rpc_client->AsyncPrefetchVariable(epmap[i], ctx, scope, ins[i],
outs[i]); outs[i]);
} else { } else {
...@@ -71,7 +71,7 @@ class PrefetchOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -71,7 +71,7 @@ class PrefetchOpMaker : public framework::OpProtoAndCheckerMaker {
"(RPCClient) The RPC client object which will be" "(RPCClient) The RPC client object which will be"
"initialized at most once."); "initialized at most once.");
AddOutput("Out", AddOutput("Out",
"(SelectedRows) result " "(LoDTensor) result "
"to be fetched from parameter server") "to be fetched from parameter server")
.AsDuplicable(); .AsDuplicable();
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
......
...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/prelu_op.h" #include "paddle/fluid/operators/prelu_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/scale_op.h" #include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include <unistd.h> #include <unistd.h>
#include <string> #include <string>
#include <thread> #include <thread> // NOLINT
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
...@@ -37,11 +37,11 @@ namespace m = paddle::operators::math; ...@@ -37,11 +37,11 @@ namespace m = paddle::operators::math;
std::unique_ptr<f::OperatorBase> listen_and_serv_op; std::unique_ptr<f::OperatorBase> listen_and_serv_op;
int selected_port; int selected_port;
void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) { void InitTensorsInScope(const p::CPUPlace &place, f::Scope *scope) {
p::CPUDeviceContext ctx(place); p::CPUDeviceContext ctx(place);
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
auto var_name = paddle::string::Sprintf("x%d", i); auto var_name = paddle::string::Sprintf("x%d", i);
auto var = scope.Var(var_name); auto var = scope->Var(var_name);
auto tensor = var->GetMutable<f::LoDTensor>(); auto tensor = var->GetMutable<f::LoDTensor>();
tensor->Resize({10, 10}); tensor->Resize({10, 10});
float *expect = tensor->mutable_data<float>(place); float *expect = tensor->mutable_data<float>(place);
...@@ -50,20 +50,20 @@ void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) { ...@@ -50,20 +50,20 @@ void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) {
} }
} }
auto out_var = scope.Var("Out"); auto out_var = scope->Var("Out");
auto out_tensor = out_var->GetMutable<f::LoDTensor>(); auto out_tensor = out_var->GetMutable<f::LoDTensor>();
out_tensor->Resize({10, 10}); out_tensor->Resize({10, 10});
out_tensor->mutable_data<float>(place); // allocate out_tensor->mutable_data<float>(place); // allocate
} }
void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) { void InitSelectedRowsInScope(const p::CPUPlace &place, f::Scope *scope) {
p::CPUDeviceContext ctx(place); p::CPUDeviceContext ctx(place);
int64_t height = 10; int64_t height = 10;
int64_t row_numel = 10; int64_t row_numel = 10;
m::SetConstant<p::CPUDeviceContext, float> set_one; m::SetConstant<p::CPUDeviceContext, float> set_one;
// init x0 // init x0
std::vector<int64_t> rows0{0, 4, 7}; std::vector<int64_t> rows0{0, 4, 7};
auto x0_var = scope.Var("x0"); auto x0_var = scope->Var("x0");
auto x0 = x0_var->GetMutable<f::SelectedRows>(); auto x0 = x0_var->GetMutable<f::SelectedRows>();
x0->set_rows(rows0); x0->set_rows(rows0);
x0->set_height(height); x0->set_height(height);
...@@ -74,7 +74,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) { ...@@ -74,7 +74,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) {
// init x1 // init x1
std::vector<int64_t> rows1{2, 9}; std::vector<int64_t> rows1{2, 9};
auto x1_var = scope.Var("x1"); auto x1_var = scope->Var("x1");
auto x1 = x1_var->GetMutable<f::SelectedRows>(); auto x1 = x1_var->GetMutable<f::SelectedRows>();
x1->set_rows(rows1); x1->set_rows(rows1);
x1->set_height(height); x1->set_height(height);
...@@ -83,7 +83,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) { ...@@ -83,7 +83,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) {
f::make_ddim({static_cast<int64_t>(rows1.size()), row_numel}), place); f::make_ddim({static_cast<int64_t>(rows1.size()), row_numel}), place);
set_one(ctx, x1_value, 1.0); set_one(ctx, x1_value, 1.0);
auto out_var = scope.Var("Out"); auto out_var = scope->Var("Out");
auto out = out_var->GetMutable<f::SelectedRows>(); auto out = out_var->GetMutable<f::SelectedRows>();
auto out_value = out->mutable_value(); auto out_value = out->mutable_value();
out->set_height(height); out->set_height(height);
...@@ -117,15 +117,16 @@ void StartServerNet(bool is_sparse) { ...@@ -117,15 +117,16 @@ void StartServerNet(bool is_sparse) {
f::Scope scope; f::Scope scope;
p::CPUPlace place; p::CPUPlace place;
if (is_sparse) { if (is_sparse) {
InitSelectedRowsInScope(scope, place); InitSelectedRowsInScope(place, &scope);
} else { } else {
InitTensorsInScope(scope, place); InitTensorsInScope(place, &scope);
} }
// sub program run in listen_and_serv_op, for simple test we use sum // sub program run in listen_and_serv_op, for simple test we use sum
f::ProgramDesc program; f::ProgramDesc program;
const auto &root_block = program.Block(0); const auto &root_block = program.Block(0);
auto *optimize_block = program.AppendBlock(root_block); auto *optimize_block = program.AppendBlock(root_block);
auto *prefetch_block = program.AppendBlock(root_block);
// X for server side tensors, RX for received tensers, must be of same shape. // X for server side tensors, RX for received tensers, must be of same shape.
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block); AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block);
...@@ -135,6 +136,7 @@ void StartServerNet(bool is_sparse) { ...@@ -135,6 +136,7 @@ void StartServerNet(bool is_sparse) {
attrs.insert({"ParamList", std::vector<std::string>({"Out"})}); attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
attrs.insert({"GradList", std::vector<std::string>({"x1"})}); attrs.insert({"GradList", std::vector<std::string>({"x1"})});
attrs.insert({"OptimizeBlock", optimize_block}); attrs.insert({"OptimizeBlock", optimize_block});
attrs.insert({"PrefetchBlock", prefetch_block});
listen_and_serv_op = listen_and_serv_op =
f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs); f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs);
LOG(INFO) << "selected port before run " << selected_port; LOG(INFO) << "selected port before run " << selected_port;
...@@ -148,7 +150,7 @@ TEST(SendRecvOp, CPUDense) { ...@@ -148,7 +150,7 @@ TEST(SendRecvOp, CPUDense) {
// local net // local net
f::Scope scope; f::Scope scope;
p::CPUPlace place; p::CPUPlace place;
InitTensorsInScope(scope, place); InitTensorsInScope(place, &scope);
// create rpc client var // create rpc client var
scope.Var("RPC_CLIENT_VAR"); scope.Var("RPC_CLIENT_VAR");
...@@ -191,7 +193,7 @@ TEST(SendRecvOp, CPUSparse) { ...@@ -191,7 +193,7 @@ TEST(SendRecvOp, CPUSparse) {
f::Scope scope; f::Scope scope;
p::CPUPlace place; p::CPUPlace place;
p::CPUDeviceContext ctx(place); p::CPUDeviceContext ctx(place);
InitSelectedRowsInScope(scope, place); InitSelectedRowsInScope(place, &scope);
scope.Var("RPC_CLIENT_VAR"); scope.Var("RPC_CLIENT_VAR");
f::AttributeMap attrs; f::AttributeMap attrs;
selected_port = static_cast<paddle::operators::ListenAndServOp *>( selected_port = static_cast<paddle::operators::ListenAndServOp *>(
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <future> #include <future> // NOLINT
#include <ostream> #include <ostream>
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
...@@ -36,7 +36,7 @@ class SendVarsOp : public framework::OperatorBase { ...@@ -36,7 +36,7 @@ class SendVarsOp : public framework::OperatorBase {
auto ins = Inputs("X"); auto ins = Inputs("X");
std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap"); std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap");
int sync_send = Attr<int>("sync_sent"); int sync_send = Attr<int>("sync_send");
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(place); auto& ctx = *pool.Get(place);
......
...@@ -35,8 +35,8 @@ class SGDOp : public framework::OperatorWithKernel { ...@@ -35,8 +35,8 @@ class SGDOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 element"); "Learning rate should have 1 element");
auto param_dim = ctx->GetInputDim("Param"); auto param_dim = ctx->GetInputDim("Param");
// TODO(qijun): check dimensions of Param and Grad at complie // TODO(qijun): check dimensions of Param and Grad at compile
// and run time. // and runtime.
ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("ParamOut", param_dim);
} }
......
...@@ -48,11 +48,11 @@ class SplitIdsOp : public framework::OperatorWithKernel { ...@@ -48,11 +48,11 @@ class SplitIdsOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutputs("Out"), "SplitIdsOp must has output Out."); PADDLE_ENFORCE(ctx->HasOutputs("Out"), "SplitIdsOp must has output Out.");
auto ids_var_type = ctx->GetInputsVarType("Ids").front(); auto ids_var_type = ctx->GetInputsVarType("Ids").front();
PADDLE_ENFORCE_EQ(ids_var_type, framework::proto::VarType::LOD_TENSOR);
auto ids_dims = ctx->GetInputDim("Ids"); auto ids_dims = ctx->GetInputDim("Ids");
PADDLE_ENFORCE_EQ(ids_dims.size(), 2); if (ids_var_type == framework::proto::VarType::LOD_TENSOR) {
PADDLE_ENFORCE_EQ(ids_dims[1], 1); PADDLE_ENFORCE_EQ(ids_dims.size(), 2);
PADDLE_ENFORCE_EQ(ids_dims[1], 1);
}
} }
}; };
...@@ -60,8 +60,9 @@ class SplitIdsOpInferVarType : public framework::VarTypeInference { ...@@ -60,8 +60,9 @@ class SplitIdsOpInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDesc &op_desc, void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override { framework::BlockDesc *block) const override {
auto *input_var = block->Var(op_desc.Input("Ids")[0]);
for (auto &out_var : op_desc.Output("Out")) { for (auto &out_var : op_desc.Output("Out")) {
block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR); block->Var(out_var)->SetType(input_var->GetType());
} }
} }
}; };
...@@ -73,4 +74,5 @@ namespace ops = paddle::operators; ...@@ -73,4 +74,5 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(split_ids, ops::SplitIdsOp, ops::SplitIdsOpMaker, REGISTER_OPERATOR(split_ids, ops::SplitIdsOp, ops::SplitIdsOpMaker,
ops::SplitIdsOpInferVarType); ops::SplitIdsOpInferVarType);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
split_ids, ops::SplitIdsOpKernel<paddle::platform::CPUPlace, int64_t>); split_ids, ops::SplitIdsOpKernel<paddle::platform::CPUPlace, int64_t>,
ops::SplitIdsOpKernel<paddle::platform::CPUPlace, float>);
...@@ -24,35 +24,63 @@ namespace operators { ...@@ -24,35 +24,63 @@ namespace operators {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SplitIdsOpKernel : public framework::OpKernel<T> { class SplitIdsOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
if (!platform::is_cpu_place(place)) { if (!platform::is_cpu_place(place)) {
PADDLE_THROW("SplitIds do not support GPU kernel"); PADDLE_THROW("SplitIds do not support GPU kernel");
} }
auto& ids_dims = ctx.Input<framework::LoDTensor>("Ids")->dims(); const auto *ids_var = ctx.InputVar("Ids");
const T* ids = ctx.Input<framework::LoDTensor>("Ids")->data<T>(); if (ids_var->IsType<framework::LoDTensor>()) {
auto outs = ctx.MultiOutput<framework::LoDTensor>("Out"); const auto &ids_dims = ctx.Input<framework::LoDTensor>("Ids")->dims();
const size_t shard_num = outs.size(); const T *ids = ctx.Input<framework::LoDTensor>("Ids")->data<T>();
auto outs = ctx.MultiOutput<framework::LoDTensor>("Out");
const size_t shard_num = outs.size();
std::vector<std::vector<T>> out_ids; std::vector<std::vector<T>> out_ids;
out_ids.resize(outs.size()); out_ids.resize(outs.size());
// split id by their shard_num. // split id by their shard_num.
for (int i = 0; i < ids_dims[0]; ++i) { for (int i = 0; i < ids_dims[0]; ++i) {
T id = ids[i]; T id = ids[i];
size_t shard_id = static_cast<size_t>(id) % shard_num; size_t shard_id = static_cast<size_t>(id) % shard_num;
out_ids[shard_id].push_back(id); out_ids[shard_id].push_back(id);
} }
// create tensor for each shard and send to parameter server
for (size_t i = 0; i < out_ids.size(); ++i) {
auto *shard_t = outs[i];
std::vector<T> ids = out_ids[i];
auto *shard_data = shard_t->mutable_data<T>(
framework::make_ddim({static_cast<int64_t>(ids.size()), 1}), place);
for (size_t i = 0; i < ids.size(); ++i) {
shard_data[i] = ids[i];
}
}
} else if (ids_var->IsType<framework::SelectedRows>()) {
const auto *ids_selected_rows = ctx.Input<framework::SelectedRows>("Ids");
auto &ids_dims = ids_selected_rows->value().dims();
PADDLE_ENFORCE_EQ(ids_dims[0], ids_selected_rows->rows().size(), "");
const T *ids = ids_selected_rows->value().data<T>();
const auto &ids_rows = ids_selected_rows->rows();
auto outs = ctx.MultiOutput<framework::SelectedRows>("Out");
const size_t shard_num = outs.size();
// get rows for outputs
for (auto &id : ids_rows) {
size_t shard_id = static_cast<size_t>(id) % shard_num;
outs[shard_id]->mutable_rows()->push_back(id);
}
// create tensor for each shard and send to parameter server int64_t row_width = ids_dims[1];
for (size_t i = 0; i < out_ids.size(); ++i) { for (auto &out : outs) {
auto* shard_t = outs[i]; out->set_height(ids_selected_rows->height());
std::vector<T> ids = out_ids[i]; framework::DDim ddim = framework::make_ddim(
auto* shard_data = shard_t->mutable_data<T>( {static_cast<int64_t>(out->rows().size()), row_width});
framework::make_ddim({static_cast<int64_t>(ids.size()), 1}), place); T *output = out->mutable_value()->mutable_data<T>(ddim, place);
for (size_t i = 0; i < ids.size(); ++i) { for (size_t i = 0; i < ddim[0]; ++i) {
shard_data[i] = ids[i]; memcpy(output + i * row_width, ids + out->rows()[i] * row_width,
row_width * sizeof(T));
}
} }
} }
} }
......
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/split_op.h" #include "paddle/fluid/operators/split_op.h"
#include "paddle/fluid/operators/net_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -10,9 +10,11 @@ See the License for the specific language governing permissions and ...@@ -10,9 +10,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/sum_op.h" #include "paddle/fluid/operators/sum_op.h"
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
...@@ -37,7 +39,10 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -37,7 +39,10 @@ class SumOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputsDim("X"); auto x_dims = ctx->GetInputsDim("X");
size_t N = x_dims.size(); size_t N = x_dims.size();
PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1."); PADDLE_ENFORCE_GT(N, 0, "Input tensors count should > 0.");
if (N == 1) {
VLOG(3) << "Warning: sum have only one input, may waste memory";
}
framework::DDim in_dim({0}); framework::DDim in_dim({0});
for (auto& x_dim : x_dims) { for (auto& x_dim : x_dims) {
......
...@@ -2,13 +2,13 @@ if(WITH_PYTHON) ...@@ -2,13 +2,13 @@ if(WITH_PYTHON)
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
hip_library(paddle_pybind SHARED hip_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor parallel_executor
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
else() else()
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor parallel_executor
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID) if(NOT APPLE AND NOT ANDROID)
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <string> #include <string>
#include <tuple> #include <tuple>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
...@@ -125,23 +124,6 @@ void BindProgramDesc(pybind11::module *m) { ...@@ -125,23 +124,6 @@ void BindProgramDesc(pybind11::module *m) {
}) })
.def("append_block", &pd::ProgramDesc::AppendBlock, .def("append_block", &pd::ProgramDesc::AppendBlock,
pybind11::return_value_policy::reference) pybind11::return_value_policy::reference)
.def("append_backward",
[](pd::ProgramDesc &program_desc, const pd::VarDesc &target,
const std::unordered_set<std::string> &no_grad_vars) {
pd::ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars);
std::unordered_map<
std::string, std::tuple<std::string /* grad_var_name */,
int /* block_idx */, int /* op_idx */>>
retv;
for (auto it = param_grad_map.begin(); it != param_grad_map.end();
++it) {
const auto &grad_info = it->second;
retv[it->first] = std::make_tuple(
grad_info.name_, grad_info.block_idx_, grad_info.op_idx_);
}
return retv;
})
.def("block", &pd::ProgramDesc::MutableBlock, .def("block", &pd::ProgramDesc::MutableBlock,
pybind11::return_value_policy::reference) pybind11::return_value_policy::reference)
.def("num_blocks", &pd::ProgramDesc::Size) .def("num_blocks", &pd::ProgramDesc::Size)
......
...@@ -20,9 +20,6 @@ limitations under the License. */ ...@@ -20,9 +20,6 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/channel.h" #include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
...@@ -31,18 +28,18 @@ limitations under the License. */ ...@@ -31,18 +28,18 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/const_value.h" #include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/pybind.h" #include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/pybind/pybind.h" // NOLINT
#include "paddle/fluid/pybind/recordio.h" #include "paddle/fluid/pybind/recordio.h"
#include "paddle/fluid/pybind/tensor_py.h" #include "paddle/fluid/pybind/tensor_py.h"
...@@ -239,11 +236,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -239,11 +236,6 @@ All parameter, weight, gradient are variables in Paddle.
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
#endif #endif
.def("get_net",
[](Variable &self) -> operators::NetOp * {
return self.GetMutable<operators::NetOp>();
},
py::return_value_policy::reference)
.def("get_reader", .def("get_reader",
[](Variable &self) -> framework::ReaderHolder * { [](Variable &self) -> framework::ReaderHolder * {
PADDLE_ENFORCE(self.IsType<framework::ReaderHolder>()); PADDLE_ENFORCE(self.IsType<framework::ReaderHolder>());
...@@ -388,11 +380,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -388,11 +380,6 @@ All parameter, weight, gradient are variables in Paddle.
desc.InitializationErrorString()); desc.InitializationErrorString());
return OpRegistry::CreateOp(desc); return OpRegistry::CreateOp(desc);
}) })
.def("backward",
[](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) {
return Backward(forwardOp, no_grad_vars).release();
})
.def("run", .def("run",
[](OperatorBase &self, const Scope &scope, [](OperatorBase &self, const Scope &scope,
const platform::CPUPlace &place) { self.Run(scope, place); }) const platform::CPUPlace &place) { self.Run(scope, place); })
...@@ -420,42 +407,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -420,42 +407,6 @@ All parameter, weight, gradient are variables in Paddle.
[](const OperatorBase &op) { return op.OutputVars(false); }) [](const OperatorBase &op) { return op.OutputVars(false); })
.def("support_gpu", &OperatorBase::SupportGPU); .def("support_gpu", &OperatorBase::SupportGPU);
py::class_<operators::NetOp, OperatorBase>(m, "Net")
.def_static("create",
[]() -> operators::NetOp * {
auto *retv = new operators::NetOp;
retv->SetType("plain_net");
return retv;
})
.def("append_op", [](operators::NetOp &self,
const OperatorBase &op) { self.AppendOp(op); })
.def("complete_add_op", &operators::NetOp::CompleteAddOp)
.def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
self->CompleteAddOp();
});
// cond_op
py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
.def_static("create",
[](py::bytes protobin) -> operators::CondOp * {
proto::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
auto cond_op = OpRegistry::CreateOp(desc);
return static_cast<operators::CondOp *>(cond_op.release());
})
.def("set_truenet",
[](operators::CondOp &self, const operators::NetOp &net) -> void {
self.set_truenet(net.Clone());
})
.def("set_falsenet",
[](operators::CondOp &self, const operators::NetOp &net) -> void {
self.set_falsenet(net.Clone());
});
py::class_<framework::Executor>(m, "Executor") py::class_<framework::Executor>(m, "Executor")
.def(py::init<const platform::Place &>()) .def(py::init<const platform::Place &>())
.def("run", .def("run",
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
#include "paddle/fluid/recordio/chunk.h" #include "paddle/fluid/recordio/chunk.h"
#include <zlib.h>
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <sstream> #include <sstream>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "snappy_stream/include/snappystream.hpp" #include "snappystream.hpp"
#include "zlib/include/zlib.h"
namespace paddle { namespace paddle {
namespace recordio { namespace recordio {
......
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/recordio/header.h" #include "paddle/fluid/recordio/header.h"
#include <string>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
......
...@@ -231,7 +231,7 @@ function gen_fluid_inference_lib() { ...@@ -231,7 +231,7 @@ function gen_fluid_inference_lib() {
Deploying fluid inference library ... Deploying fluid inference library ...
======================================== ========================================
EOF EOF
make inference_lib_dist make -j `nproc` inference_lib_dist
fi fi
} }
......
...@@ -1119,24 +1119,6 @@ class Program(object): ...@@ -1119,24 +1119,6 @@ class Program(object):
def current_block(self): def current_block(self):
return self.blocks[self.current_block_idx] return self.blocks[self.current_block_idx]
def append_backward(self, target, no_grad_set=None):
"""
return map(param_name -> (grad_name, block_index, op_index))
"""
assert isinstance(target, Variable)
if no_grad_set is None:
no_grad_set = set()
try:
param_to_grad_info = self.desc.append_backward(target.desc,
no_grad_set)
except Exception as e:
raise core.EnforceNotMet(
str(e) + "\nCurrent protobuf is\n{0}".format(
self.to_string(False)))
self.sync_with_cpp()
return param_to_grad_info
def create_block(self, parent_idx=None): def create_block(self, parent_idx=None):
new_block_idx = len(self.blocks) new_block_idx = len(self.blocks)
parent = self.current_block() if parent_idx is None else self.block( parent = self.current_block() if parent_idx is None else self.block(
...@@ -1201,6 +1183,8 @@ class Parameter(Variable): ...@@ -1201,6 +1183,8 @@ class Parameter(Variable):
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
self.do_model_average = kwargs.get('do_model_average', None)
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
...@@ -1221,7 +1205,7 @@ class Parameter(Variable): ...@@ -1221,7 +1205,7 @@ class Parameter(Variable):
if with_details: if with_details:
res_str = Variable.to_string(self, throw_on_error, True) res_str = Variable.to_string(self, throw_on_error, True)
additional_attr = ("trainable", "optimize_attr", "regularizer", additional_attr = ("trainable", "optimize_attr", "regularizer",
"gradient_clip_attr") "gradient_clip_attr", "do_model_average")
for attr_name in additional_attr: for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name, res_str += "%s: %s\n" % (attr_name,
str(getattr(self, attr_name))) str(getattr(self, attr_name)))
......
...@@ -218,6 +218,7 @@ def fc(input, ...@@ -218,6 +218,7 @@ def fc(input,
def embedding(input, def embedding(input,
size, size,
is_sparse=False, is_sparse=False,
is_distributed=False,
padding_idx=None, padding_idx=None,
param_attr=None, param_attr=None,
dtype='float32'): dtype='float32'):
...@@ -268,8 +269,11 @@ def embedding(input, ...@@ -268,8 +269,11 @@ def embedding(input,
inputs={'Ids': input, inputs={'Ids': input,
'W': w}, 'W': w},
outputs={'Out': tmp}, outputs={'Out': tmp},
attrs={'is_sparse': is_sparse, attrs={
'padding_idx': padding_idx}) 'is_sparse': is_sparse,
'is_distributed': is_distributed,
'padding_idx': padding_idx
})
return tmp return tmp
...@@ -1516,7 +1520,8 @@ def batch_norm(input, ...@@ -1516,7 +1520,8 @@ def batch_norm(input,
in_place=False, in_place=False,
name=None, name=None,
moving_mean_name=None, moving_mean_name=None,
moving_variance_name=None): moving_variance_name=None,
do_model_average_for_mean_and_var=False):
""" """
This function helps create an operator to implement This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters. the BatchNorm layer using the configurations from the input parameters.
...@@ -1547,7 +1552,10 @@ def batch_norm(input, ...@@ -1547,7 +1552,10 @@ def batch_norm(input,
mean = helper.create_parameter( mean = helper.create_parameter(
attr=ParamAttr( attr=ParamAttr(
name=moving_mean_name, initializer=Constant(0.0), trainable=False), name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=input.dtype) dtype=input.dtype)
mean.stop_gradient = True mean.stop_gradient = True
...@@ -1556,7 +1564,8 @@ def batch_norm(input, ...@@ -1556,7 +1564,8 @@ def batch_norm(input,
attr=ParamAttr( attr=ParamAttr(
name=moving_variance_name, name=moving_variance_name,
initializer=Constant(1.0), initializer=Constant(1.0),
trainable=False), trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape, shape=param_shape,
dtype=input.dtype) dtype=input.dtype)
variance.stop_gradient = True variance.stop_gradient = True
...@@ -3374,14 +3383,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): ...@@ -3374,14 +3383,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
Here are some examples to explain it. Here are some examples to explain it.
1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
is [6, 8], the reshape operator will transform x into a 2-D tensor with is [6, 8], the reshape operator will transform x into a 2-D tensor with
shape [6, 8] and leaving x's data unchanged. shape [6, 8] and leaving x's data unchanged.
2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
specified is [2, 3, -1, 2], the reshape operator will transform x into a specified is [2, 3, -1, 2], the reshape operator will transform x into a
4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this 4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this
case, one dimension of the target shape is set to -1, the value of this case, one dimension of the target shape is set to -1, the value of this
dimension is inferred from the total element number of x and remaining dimension is inferred from the total element number of x and remaining
dimensions. dimensions.
3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape 3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
...@@ -3615,7 +3624,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None): ...@@ -3615,7 +3624,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
def pad(x, paddings, pad_value=0., name=None): def pad(x, paddings, pad_value=0., name=None):
""" """
Pads a tensor with a constant value given by :attr:`pad_value`, and the Pads a tensor with a constant value given by :attr:`pad_value`, and the
padded width is specified by :attr:`paddings`. padded width is specified by :attr:`paddings`.
Specifically, the number of values padded before the contents of :attr:`x` Specifically, the number of values padded before the contents of :attr:`x`
in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number
...@@ -3643,7 +3652,7 @@ def pad(x, paddings, pad_value=0., name=None): ...@@ -3643,7 +3652,7 @@ def pad(x, paddings, pad_value=0., name=None):
x (Variable): The input tensor variable. x (Variable): The input tensor variable.
paddings (list): A list of integers. Its elements specify the padded paddings (list): A list of integers. Its elements specify the padded
width before and after for each dimension in turn. width before and after for each dimension in turn.
The length of :attr:paddings must be The length of :attr:paddings must be
:math:`rank(x) \\times 2`. :math:`rank(x) \\times 2`.
pad_value (float): The constant value used to pad. pad_value (float): The constant value used to pad.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
from collections import defaultdict from collections import defaultdict
from paddle.fluid.framework import Program from paddle.fluid.framework import Program
import framework import framework
...@@ -818,8 +818,8 @@ class ModelAverage(Optimizer): ...@@ -818,8 +818,8 @@ class ModelAverage(Optimizer):
min_average_window, max_average_window and current update times. min_average_window, max_average_window and current update times.
Args: Args:
params_grads: A list of parameter-grad variable pairs.
average_window_rate: The rate of average window. average_window_rate: The rate of average window.
params_grads: A list of parameter-grad variable pairs.
min_average_window: The minimum size of average window. min_average_window: The minimum size of average window.
max_average_window: The maximum size of average window. max_average_window: The maximum size of average window.
...@@ -840,8 +840,8 @@ class ModelAverage(Optimizer): ...@@ -840,8 +840,8 @@ class ModelAverage(Optimizer):
""" """
def __init__(self, def __init__(self,
params_grads,
average_window_rate, average_window_rate,
params_grads=None,
min_average_window=10000, min_average_window=10000,
max_average_window=10000, max_average_window=10000,
**kwargs): **kwargs):
...@@ -849,24 +849,37 @@ class ModelAverage(Optimizer): ...@@ -849,24 +849,37 @@ class ModelAverage(Optimizer):
self.average_window = average_window_rate self.average_window = average_window_rate
self.min_average_window = min_average_window self.min_average_window = min_average_window
self.max_average_window = max_average_window self.max_average_window = max_average_window
self.params_grads = params_grads
self.params_grads = [] if params_grads is None else params_grads
params = {}
for param, grad in self.params_grads:
if param.do_model_average != False:
params[param.name] = (param, grad)
for param in framework.default_main_program().global_block(
).all_parameters():
if param.name not in params and param.do_model_average != False:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
dtype=param.dtype,
persistable=False,
stop_gradient=True)
params[param.name] = (param, grad)
self.params_grads = params.values()
for param, grad in self.params_grads: for param, grad in self.params_grads:
if grad is not None: self._append_average_accumulate_op(param)
self._append_average_accumulate_op(param)
self.apply_program = Program() self.apply_program = Program()
block = self.apply_program.global_block() block = self.apply_program.global_block()
with program_guard(main_program=self.apply_program): with program_guard(main_program=self.apply_program):
for param_grad in self.params_grads: for param_grad in self.params_grads:
if param_grad[1] is not None: self._add_average_apply_op(block, param_grad)
self._add_average_apply_op(block, param_grad)
self.restore_program = Program() self.restore_program = Program()
block = self.restore_program.global_block() block = self.restore_program.global_block()
with program_guard(main_program=self.restore_program): with program_guard(main_program=self.restore_program):
for param_grad in self.params_grads: for param_grad in self.params_grads:
if param_grad[1] is not None: self._add_average_restore_op(block, param_grad)
self._add_average_restore_op(block, param_grad)
def _add_average_apply_op(self, block, param_grad): def _add_average_apply_op(self, block, param_grad):
param = block.clone_variable(param_grad[0]) param = block.clone_variable(param_grad[0])
......
...@@ -28,13 +28,15 @@ class ParamAttr(object): ...@@ -28,13 +28,15 @@ class ParamAttr(object):
learning_rate=1.0, learning_rate=1.0,
regularizer=None, regularizer=None,
trainable=True, trainable=True,
gradient_clip=None): gradient_clip=None,
do_model_average=None):
self.name = name self.name = name
self.initializer = initializer self.initializer = initializer
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.regularizer = regularizer self.regularizer = regularizer
self.trainable = trainable self.trainable = trainable
self.gradient_clip = gradient_clip self.gradient_clip = gradient_clip
self.model_average = do_model_average
def set_default_initializer(self, initializer): def set_default_initializer(self, initializer):
if initializer is None: if initializer is None:
...@@ -80,7 +82,8 @@ class ParamAttr(object): ...@@ -80,7 +82,8 @@ class ParamAttr(object):
}, },
'regularizer': self.regularizer, 'regularizer': self.regularizer,
'trainable': self.trainable, 'trainable': self.trainable,
'gradient_clip_attr': self.gradient_clip 'gradient_clip_attr': self.gradient_clip,
'model_average': self.model_average
} }
if with_initializer: if with_initializer:
kwargs['initializer'] = self.initializer kwargs['initializer'] = self.initializer
...@@ -90,7 +93,7 @@ class ParamAttr(object): ...@@ -90,7 +93,7 @@ class ParamAttr(object):
class WeightNormParamAttr(ParamAttr): class WeightNormParamAttr(ParamAttr):
""" """
Used for weight normalization. Any field in ParamAttr can also be set here. Used for weight normalization. Any field in ParamAttr can also be set here.
Besides, an extra field dim can be set to indicate the dimension except Besides, an extra field dim can be set to indicate the dimension except
which to normalize. which to normalize.
""" """
# List to record the parameters reparameterized by weight normalization. # List to record the parameters reparameterized by weight normalization.
......
...@@ -14,23 +14,13 @@ ...@@ -14,23 +14,13 @@
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
import paddle.fluid as fluid
from op_test import OpTest
from paddle.fluid.framework import grad_var_name from paddle.fluid.framework import grad_var_name
def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars():
var = scope.var(input)
var.get_tensor()
for output in backward_op.output_vars():
var = scope.var(output)
var.get_tensor()
return backward_op
def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): def _reference_testing(x, scale, offset, mean, var, epsilon, data_format):
x_shape = x.shape x_shape = x.shape
if len(x_shape) == 2: if len(x_shape) == 2:
...@@ -64,11 +54,6 @@ def _reference_testing(x, scale, offset, mean, var, epsilon, data_format): ...@@ -64,11 +54,6 @@ def _reference_testing(x, scale, offset, mean, var, epsilon, data_format):
def _reference_training(x, scale, offset, epsilon, data_format): def _reference_training(x, scale, offset, epsilon, data_format):
x_shape = x.shape x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
if data_format == "NCHW": if data_format == "NCHW":
n, c, h, w = x.shape n, c, h, w = x.shape
...@@ -88,8 +73,6 @@ def _reference_training(x, scale, offset, epsilon, data_format): ...@@ -88,8 +73,6 @@ def _reference_training(x, scale, offset, epsilon, data_format):
offset_tile = np.reshape(offset, (1, c, 1, 1)) offset_tile = np.reshape(offset, (1, c, 1, 1))
offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) offset_tile = np.reshape(offset_tile, (1, c, 1, 1))
y = normalized * scale_tile + offset_tile y = normalized * scale_tile + offset_tile
if len(x_shape) == 2:
y = np.reshape(y, (y.shape[0], y.shape[1]))
return y, mean, var return y, mean, var
elif data_format == "NHWC": elif data_format == "NHWC":
x_square = x * x x_square = x * x
...@@ -100,59 +83,42 @@ def _reference_training(x, scale, offset, epsilon, data_format): ...@@ -100,59 +83,42 @@ def _reference_training(x, scale, offset, epsilon, data_format):
var = x_square_sum / element_count - mean * mean var = x_square_sum / element_count - mean * mean
normalized = (x - mean) / np.sqrt(var + epsilon) normalized = (x - mean) / np.sqrt(var + epsilon)
y = normalized * scale + offset y = normalized * scale + offset
if len(x_shape) == 2:
y = np.reshape(y, x_shape)
return y, mean, var return y, mean, var
else: else:
raise ValueError("Unknown data order.") raise ValueError("Unknown data order.")
def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format):
# Use the following formulas to calculate gradients: # Use the following formulas to calculate gradients:
# grad_scale = # grad_scale =
# sum(grad_y * (x - mean)) * rsqrt(var + epsilon) # sum(grad_y * (x - mean)) * rsqrt(var + epsilon)
# #
# grad_offset = sum(output_y) # grad_offset = sum(output_y)
# #
# grad_x = # x_grad =
# 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) - # 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], grad_y.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], 1, 1, grad_y.shape[1]))
if data_format == "NCHW": if data_format == "NCHW":
x = np.transpose(x, (0, 2, 3, 1)) x = np.transpose(x, (0, 2, 3, 1))
grad_y = np.transpose(grad_y, (0, 2, 3, 1)) y_grad = np.transpose(y_grad, (0, 2, 3, 1))
# raise ValueError("data_format must be NHWC, got %s." % data_format) x_grad = scale * (y_grad - np.mean(
grad_x = scale * (grad_y - np.mean( y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean(
grad_y, axis=(0, 1, 2)) - (x - mean) * np.mean( y_grad * (x - mean), axis=(0, 1, 2)) /
grad_y * (x - mean), axis=(0, 1, 2)) /
(var + epsilon)) / np.sqrt(var + epsilon) (var + epsilon)) / np.sqrt(var + epsilon)
grad_scale = np.sum(grad_y * (x - mean) / np.sqrt(var + epsilon), grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon),
axis=(0, 1, 2)) axis=(0, 1, 2))
grad_offset = np.sum(grad_y, axis=(0, 1, 2)) grad_offset = np.sum(y_grad, axis=(0, 1, 2))
# transfer back to N, C, H, W # transfer back to N, C, H, W
if data_format == "NCHW": if data_format == "NCHW":
grad_x = np.transpose(grad_x, (0, 3, 1, 2)) x_grad = np.transpose(x_grad, (0, 3, 1, 2))
x = np.transpose(x, (0, 3, 1, 2)) x = np.transpose(x, (0, 3, 1, 2))
grad_y = np.transpose(grad_y, (0, 3, 1, 2)) y_grad = np.transpose(y_grad, (0, 3, 1, 2))
if len(x_shape) == 2: return x_grad, grad_scale, grad_offset
grad_x = np.reshape(grad_x, x_shape)
return grad_x, grad_scale, grad_offset
def create_or_get_tensor(scope, var_name, var, place): def create_or_get_tensor(scope, var_name, var, place):
...@@ -186,7 +152,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None): ...@@ -186,7 +152,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None):
__set_tensor__(output, data) __set_tensor__(output, data)
class TestBatchNormOpInference(OpTest): class TestBatchNormOpInference(unittest.TestCase):
def setUp(self): def setUp(self):
self.dtype = np.float32 self.dtype = np.float32
...@@ -304,231 +270,121 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): ...@@ -304,231 +270,121 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference):
self.check_with_place(place, data_format, self.dtype, [2, 3]) self.check_with_place(place, data_format, self.dtype, [2, 3])
class TestBatchNormOpTraining(OpTest): class TestBatchNormOpTraining(unittest.TestCase):
def __assert_close(self, tensor, np_array, msg, atol=1e-4): def __assert_close(self, tensor, np_array, msg, atol=1e-4):
if not np.allclose(np.array(tensor), np_array, atol=atol):
import pdb
pdb.set_trace()
self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg)
def test_python_testing(self):
data_format = "NHWC"
epsilon = 0.00001
n, h, w, c = 2, 3, 4, 5
x_shape = [n, h, w, c]
scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance,
epsilon, "NHWC")
# running N, C, H, W case
# should produce the same results
x_shape2 = [n, c, h, w]
x_val2 = np.transpose(x_val, (0, 3, 1, 2))
y_out2 = _reference_testing(x_val2, scale_val, bias_val, mean, variance,
epsilon, "NCHW")
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1))
self.__assert_close(y_out, y_out2_trans, "inference output")
print 'python: NHWC, NCHW, inference checking passed'
def test_python_training(self):
data_format = "NHWC"
epsilon = 0.00001
momentum = 0.9
# N, H, W, C: 2, 3, 4, 2
n, h, w, c = 2, 3, 4, 5
x_shape = [n, h, w, c]
scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
# run forward
y_out, saved_mean, var_ref = _reference_training(
x_val, scale_val, bias_val, epsilon, "NHWC")
#
mean_out = saved_mean * (1. - momentum) + momentum * mean
variance_out = var_ref * (1. - momentum) + momentum * variance
saved_variance = 1. / np.sqrt(var_ref + epsilon)
# running N, C, H, W case
# should produce the same results
x_shape2 = [n, c, h, w]
x_val2 = np.transpose(x_val, (0, 3, 1, 2))
y_out2, saved_mean2, var_ref2 = _reference_training(
x_val2, scale_val, bias_val, epsilon, "NCHW")
self.__assert_close(saved_mean, saved_mean2, "batch mean")
self.__assert_close(var_ref, var_ref2, "batch variance")
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1))
self.__assert_close(y_out, y_out2_trans, "batch output")
print 'python: NHWC, NCHW, forward checking passed'
# test backward now
# NHWC
self.y_grad = np.random.random_sample(x_shape).astype(np.float32)
y_grad = self.y_grad
# y_grad = np.ones(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, "NHWC")
# NCHW
y_grad2 = np.transpose(y_grad, (0, 3, 1, 2))
# y_grad2 = np.ones(x_shape2).astype(np.float32)
x_grad_ref2, scale_grad_ref2, bias_grad_ref2 = _reference_grad(
x_val2, y_grad2, scale_val, saved_mean2, var_ref2, epsilon, "NCHW")
self.__assert_close(scale_grad_ref, scale_grad_ref2, "scale gradient")
self.__assert_close(bias_grad_ref, bias_grad_ref2, "bias gradient")
x_grad_transpose = np.transpose(x_grad_ref2, (0, 2, 3, 1))
self.__assert_close(x_grad_ref, x_grad_transpose, "x gradient")
print 'python: NHWC, NCHW, backward checking passed'
def test_forward_backward(self): def test_forward_backward(self):
def test_with_place(place, data_layout, shape): def test_with_place(place, data_layout, shape):
# attr # attr
epsilon = 0.00001 epsilon = 0.00001
momentum = 0.9 momentum = 0.9
if data_layout == "NCHW":
if len(shape) == 2: n, c, h, w = shape[0], shape[1], shape[2], shape[3]
x_shape = shape
c = shape[1]
else: else:
# n, h, w, c = 2, 3, 4, 2
n, h, w, c = shape[0], shape[1], shape[2], shape[3] n, h, w, c = shape[0], shape[1], shape[2], shape[3]
if data_format == "NHWC":
x_shape = [n, h, w, c]
elif data_format == "NCHW":
x_shape = [n, c, h, w]
else:
raise ValueError("Unknown data type.")
scale_shape = [c] scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32) np.random.seed(123)
scale_val = np.random.random_sample(scale_shape).astype(np.float32) x = np.random.random_sample(shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32)
bias = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32)
# run forward # run forward
y_out, saved_mean, var_ref = _reference_training( y, saved_mean, var_ref = _reference_training(x, scale, bias,
x_val, scale_val, bias_val, epsilon, data_format) epsilon, data_layout)
# update moving mean and variance
mean_out = saved_mean * (1. - momentum) + momentum * mean mean_out = saved_mean * (1. - momentum) + momentum * mean
variance_out = var_ref * (1. - momentum) + momentum * variance variance_out = var_ref * (1. - momentum) + momentum * variance
saved_variance = 1. / np.sqrt(var_ref + epsilon) saved_variance = 1. / np.sqrt(var_ref + epsilon)
# for gradient test
# y_grad = np.ones(x_shape).astype(np.float32)
y_grad = np.zeros(x_shape).astype(np.float32)
if len(y_grad.shape) == 2:
y_grad[0, 0] = 1.
else:
y_grad[0, 0, 0, 0] = 1.
# y_grad = np.random.random_sample(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon,
data_format)
scope = core.Scope()
# create input
x_tensor = create_or_get_tensor(scope, "x_val", x_val, place)
scale_tensor = create_or_get_tensor(scope, "scale_val", scale_val,
place)
bias_tensor = create_or_get_tensor(scope, "bias_val", bias_val,
place)
mean_tensor = create_or_get_tensor(scope, "mean", mean, place)
variance_tensor = create_or_get_tensor(scope, "variance", variance,
place)
# create output
y_tensor = create_or_get_tensor(scope, "y_out", None, place)
saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None,
place)
saved_variance_tensor = create_or_get_tensor(
scope, "saved_variance", None, place)
mean_out_tensor = mean_tensor
variance_out_tensor = variance_tensor
batch_norm_op = Operator(
"batch_norm",
# inputs
X="x_val",
Scale="scale_val",
Bias="bias_val",
Mean="mean",
Variance="variance",
# outputs
Y="y_out",
MeanOut="mean",
VarianceOut="variance",
SavedMean="saved_mean",
SavedVariance="saved_variance",
# attrs
is_test=False,
data_layout=data_layout,
momentum=momentum,
epsilon=epsilon)
batch_norm_op.run(scope, place)
# check forward result
self.__assert_close(y_tensor, y_out, "y_out")
self.__assert_close(saved_mean_tensor, saved_mean, "saved_mean")
self.__assert_close(saved_variance_tensor, saved_variance,
"saved_variance")
self.__assert_close(mean_out_tensor, mean_out, "mean_out")
if isinstance(place, core.CUDAPlace):
atol = 5e-2
else:
atol = 1e-4
self.__assert_close(variance_out_tensor, variance_out,
"variance_out", atol)
print "op test forward passed: ", str(place), data_layout
# run backward # run backward
batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set()) y_grad = np.random.random_sample(shape).astype(np.float32)
set_output_grad( x_grad, scale_grad, bias_grad = _reference_grad(
scope, x, y_grad, scale, saved_mean, var_ref, epsilon, data_format)
["y_out", "mean", "variance", "saved_mean", "saved_variance"],
place, var_dict = locals()
feed_dict={"y_out": y_grad}) var_dict['y@GRAD'] = y_grad
batch_norm_op_grad.run(scope, place)
var_names = [
x_grad_tensor = create_or_get_tensor(scope, 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean',
grad_var_name("x_val"), None, 'saved_variance'
place) ]
scale_grad_tensor = create_or_get_tensor(scope, ground_truth = {name: var_dict[name] for name in var_names}
grad_var_name("scale_val"),
None, place) program = fluid.Program()
bias_grad_tensor = create_or_get_tensor(scope, with fluid.program_guard(program):
grad_var_name("bias_val"), block = program.global_block()
None, place) for name in ground_truth:
block.create_var(
name=name,
dtype='float32',
shape=ground_truth[name].shape)
bn_op = block.append_op(
type="batch_norm",
inputs={
"X": block.var('x'),
"Scale": block.var('scale'),
"Bias": block.var('bias'),
"Mean": block.var('mean'),
"Variance": block.var('variance')
},
outputs={
"Y": block.var('y'),
"MeanOut": block.var('mean'), # share the same memory
"VarianceOut":
block.var('variance'), # share the same memory
"SavedMean": block.var('saved_mean'),
"SavedVariance": block.var('saved_variance')
},
attrs={
"momentum": momentum,
"epsilon": epsilon,
"is_test": False,
"data_layout": data_layout
})
block.create_var(name='y@GRAD', dtype='float32', shape=y.shape)
# generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
bn_op.desc, set(), [])
grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(grad_op_desc)
for var_name in grad_op_desc.output_arg_names():
block.desc.var(var_name.encode("ascii"))
grad_op_desc.infer_var_type(block.desc)
grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32)
exe = fluid.Executor(place)
out = exe.run(
program,
feed={
name: var_dict[name]
for name in
['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD']
},
fetch_list=[
'y', 'mean', 'variance', 'saved_mean', 'saved_variance',
'x@GRAD', 'scale@GRAD', 'bias@GRAD'
])
self.__assert_close(y, out[0], "y")
self.__assert_close(mean_out, out[1], "mean")
self.__assert_close(variance_out, out[2], "variance", 1e-3)
self.__assert_close(saved_mean, out[3], "saved_mean")
self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3)
self.__assert_close(x_grad, out[5], "x_grad")
self.__assert_close(scale_grad, out[6], "scale_grad")
self.__assert_close(bias_grad, out[7], "bias_grad")
# check gradient output print "op test forward passed: ", str(place), data_layout
self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad")
self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad")
self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad")
print "op test backward passed: ", str(place), data_layout
places = [core.CPUPlace()] places = [core.CPUPlace()]
if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"):
...@@ -537,7 +393,6 @@ class TestBatchNormOpTraining(OpTest): ...@@ -537,7 +393,6 @@ class TestBatchNormOpTraining(OpTest):
for place in places: for place in places:
for data_format in ["NCHW", "NHWC"]: for data_format in ["NCHW", "NHWC"]:
test_with_place(place, data_format, [2, 3, 4, 5]) test_with_place(place, data_format, [2, 3, 4, 5])
test_with_place(place, data_format, [2, 3])
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -32,7 +32,6 @@ class TestBook(unittest.TestCase): ...@@ -32,7 +32,6 @@ class TestBook(unittest.TestCase):
cost = layers.square_error_cost(input=y_predict, label=y) cost = layers.square_error_cost(input=y_predict, label=y)
avg_cost = layers.mean(cost) avg_cost = layers.mean(cost)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost)
print(str(program)) print(str(program))
...@@ -94,8 +93,6 @@ class TestBook(unittest.TestCase): ...@@ -94,8 +93,6 @@ class TestBook(unittest.TestCase):
cost = layers.cross_entropy(input=predict, label=label) cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(cost) avg_cost = layers.mean(cost)
program.append_backward(avg_cost)
print(str(program)) print(str(program))
def test_word_embedding(self): def test_word_embedding(self):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import unittest
def fc(X, W, Y):
ret_v = core.Net.create()
ret_v.append_op(Operator("mul", X="X", Y="W", Out="pre_activation"))
ret_v.append_op(Operator("sigmoid", X="pre_activation", Out=Y))
ret_v.complete_add_op(True)
return ret_v
class TestNet(unittest.TestCase):
def test_net_all(self):
net = core.Net.create()
op1 = Operator("sum", X=["X", "Y"], Out="Out")
net.append_op(op1)
net2 = core.Net.create()
net2.append_op(fc(X="X", W="w", Y="fc.out"))
net2.complete_add_op(True)
net.append_op(net2)
net.complete_add_op(True)
expected = '''
Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}.
Op(sum), inputs:{X[X, Y]}, outputs:{Out[Out]}.
Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}.
Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}.
Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Out[fc.out]}.
'''
self.assertEqual(expected, "\n" + str(net))
if __name__ == "__main__":
unittest.main()
...@@ -473,7 +473,7 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): ...@@ -473,7 +473,7 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
loss = simple_fc_net(True) loss = simple_fc_net(True)
test_program = main.clone(for_test=True) test_program = main.clone(for_test=True)
opt = fluid.optimizer.SGD(learning_rate=0.0001) opt = fluid.optimizer.SGD(learning_rate=0.001)
opt.minimize(loss) opt.minimize(loss)
batch_size = 32 batch_size = 32
...@@ -500,4 +500,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): ...@@ -500,4 +500,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
train_loss, = train_exe.run([loss.name], feed_dict=feed_dict) train_loss, = train_exe.run([loss.name], feed_dict=feed_dict)
train_loss = numpy.array(train_loss) train_loss = numpy.array(train_loss)
self.assertTrue(numpy.allclose(train_loss, test_loss)) self.assertTrue(
numpy.allclose(
train_loss, test_loss, atol=1e-8),
"Train loss: " + str(train_loss) + "\n Test loss:" +
str(test_loss))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册