提交 8b597d9d 编写于 作者: C chengduoZH

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/add_gather_and_BCast_op_handle
......@@ -62,29 +62,33 @@ endif()
## Then find the reference-cblas. www.netlib.org/blas/
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas")
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
if(NOT CMAKE_CROSSCOMPILING)
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/include
/usr/include
/usr/include/cblas
)
)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/lib
/usr/lib
/usr/lib/blas/reference/
/usr/lib/reference/
)
)
else()
# Diable the finding of reference cblas under host's system path
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
endif()
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
......
......@@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
IF(APPLE)
SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE()
SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin)
SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin)
ENDIF()
ExternalProject_Add(
extern_grpc
DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git"
GIT_TAG "v1.8.x"
GIT_TAG "v1.11.x"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
......
......@@ -11,19 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
IF(MOBILE_INFERENCE)
if(MOBILE_INFERENCE OR RPI)
return()
ENDIF()
endif()
include (ExternalProject)
# NOTE: snappy is needed when linking with recordio
SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE)
set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
ExternalProject_Add(
extern_snappy
......@@ -51,8 +52,7 @@ ExternalProject_Add(
)
add_library(snappy STATIC IMPORTED GLOBAL)
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION
"${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
include_directories(${SNAPPY_INCLUDE_DIR})
add_dependencies(snappy extern_snappy)
......@@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
IF(MOBILE_INFERENCE)
IF(MOBILE_INFERENCE OR RPI)
return()
ENDIF()
......@@ -21,9 +20,11 @@ include (ExternalProject)
# NOTE: snappy is needed when linking with recordio
SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE)
set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE)
set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
ExternalProject_Add(
extern_snappystream
......@@ -51,8 +52,7 @@ ExternalProject_Add(
)
add_library(snappystream STATIC IMPORTED GLOBAL)
set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION
"${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES})
include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers.
......
......@@ -195,14 +195,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc)
endif()
if("${cc_library_DEPS}" MATCHES "ARCHIVE_START")
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif()
......@@ -243,11 +236,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
endif()
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS}
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library
function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
......@@ -77,6 +92,23 @@ elseif (WITH_MKLML)
)
endif()
if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
copy(snappy_lib
SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream")
copy(snappystream_lib
SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib")
copy(zlib_lib
SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
endif()
# paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid")
......
......@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif()
add_subdirectory(testing)
if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS)
if(NOT MOBILE_INFERENCE AND NOT RPI)
add_subdirectory(fluid)
endif()
......@@ -3,6 +3,7 @@ add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
add_subdirectory(inference)
add_subdirectory(string)
add_subdirectory(recordio)
# NOTE: please add subdirectory inference at last.
add_subdirectory(inference)
......@@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto backward glog lod_rank_table feed_fetch_method)
framework_proto glog lod_rank_table feed_fetch_method)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor)
......
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
namespace paddle {
namespace framework {
// Create the backward operator from a forward operator.
// TODO(yuyang18): Add more API reference comment.
extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars);
struct GradVarInfo {
GradVarInfo() {}
GradVarInfo(const std::string& name, int block_idx, int op_idx)
: name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
bool operator==(const GradVarInfo& b) const {
return name_ == b.name_ && block_idx_ == b.block_idx_ &&
op_idx_ == b.op_idx_;
}
std::string name_;
int block_idx_;
int op_idx_;
};
using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward(
ProgramDesc& program_desc, const VarDesc& target,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework
} // namespace paddle
此差异已折叠。
......@@ -92,7 +92,7 @@ class BlockDesc {
/*
* Remove Op and its input/output variables.
* Note that for either input or ouput variable, if it is also an input or
* Note that for either input or output variable, if it is also an input or
* output variable of other ops, we should remain it.
*/
void RemoveOp(size_t s, size_t e);
......
......@@ -48,7 +48,7 @@ void BroadcastOpHandle::RunImpl() {
auto out_scope_idx = out_handle->scope_idx_;
PADDLE_ENFORCE_LT(out_scope_idx, local_scopes_.size(),
"%s is not the the local_scopes ", out_handle->name_);
"%s is not in the local_scopes ", out_handle->name_);
auto *s = local_scopes_[out_scope_idx];
auto out_var = s->FindVar(out_handle->name_);
PADDLE_ENFORCE_EQ(out_p.which(), in_place.which(),
......
......@@ -57,15 +57,12 @@ class BroadcastTester : public ::testing::Test {
}
}
template <class T>
void BroadcastInitOp(int input_scope_idx) {
for (size_t j = 0; j < gpu_list_.size(); ++j) {
local_scope_.push_back(&g_scope_.NewScope());
auto* out_var = local_scope_[j]->Var("out");
out_var->GetMutable<T>();
local_scope_[j]->Var("out");
}
auto* in_var = local_scope_[input_scope_idx]->Var("input");
in_var->GetMutable<T>();
local_scope_[input_scope_idx]->Var("input");
bc_op_handle_ = new f::details::BroadcastOpHandle(local_scope_, gpu_list_);
......@@ -84,7 +81,6 @@ class BroadcastTester : public ::testing::Test {
out_var_handle->name_ = "out";
out_var_handle->version_ = 2;
out_var_handle->scope_idx_ = j;
out_var_handle->generated_op_ = bc_op_handle_;
bc_op_handle_->AddOutput(out_var_handle);
}
}
......@@ -109,7 +105,7 @@ class BroadcastTester : public ::testing::Test {
void TestBroadcastLodTensor() {
int input_scope_idx = 0;
BroadcastInitOp<f::LoDTensor>(input_scope_idx);
BroadcastInitOp(input_scope_idx);
auto in_var = local_scope_[input_scope_idx]->Var("input");
auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>();
......@@ -148,7 +144,7 @@ class BroadcastTester : public ::testing::Test {
void TestBroadcastSelectedRows() {
int input_scope_idx = 0;
BroadcastInitOp<f::SelectedRows>(input_scope_idx);
BroadcastInitOp(input_scope_idx);
auto in_var = local_scope_[input_scope_idx]->Var("input");
auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
......
......@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include <string>
namespace paddle {
namespace framework {
namespace details {
......@@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() {
}
}
op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get<Scope *>(), place_);
op_->Run(*scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(), place_);
}
std::string ComputationOpHandle::Name() const { return op_->Type(); }
......
......@@ -14,6 +14,9 @@
#include "paddle/fluid/framework/details/fetch_op_handle.h"
#include <string>
#include <vector>
namespace paddle {
namespace framework {
namespace details {
......@@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() {
for (size_t i = 0; i < scopes.size(); ++i) {
auto &scope = scopes[i];
auto &t = scope->FindVar(var_name)->Get<framework::LoDTensor>();
auto &t = scope->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(var_name)
->Get<framework::LoDTensor>();
if (platform::is_gpu_place(var->place_)) {
#ifdef PADDLE_WITH_CUDA
TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]);
......
......@@ -28,6 +28,12 @@ void GatherOpHandle::RunImpl() {
"The number of inputs should be equal to the number of place.");
PADDLE_ENFORCE_EQ(this->outputs_.size(), 1,
"The number of output should be one.");
auto in_0_handle = static_cast<VarHandle *>(inputs_[0]);
auto pre_in_var =
local_scopes_[in_0_handle->scope_idx_]->FindVar(in_0_handle->name_);
PADDLE_ENFORCE(pre_in_var->IsType<framework::SelectedRows>(),
"Currently, gather_op only can gather SelectedRows.");
auto pre_place = in_0_handle->place_;
// Wait input done, this Wait is asynchronous operation
for (auto *in : inputs_) {
......@@ -36,10 +42,6 @@ void GatherOpHandle::RunImpl() {
in->generated_op_->Wait(dev_ctxes_[p]);
}
}
auto in_0_handle = static_cast<VarHandle *>(inputs_[0]);
auto pre_in_var =
local_scopes_[in_0_handle->scope_idx_]->FindVar(in_0_handle->name_);
auto pre_place = in_0_handle->place_;
std::vector<int64_t> out_rows;
std::vector<Tensor *> in_tensors;
......@@ -110,9 +112,9 @@ void GatherOpHandle::RunImpl() {
s = e;
}
} else if (pre_in_var->IsType<framework::LoDTensor>()) {
// gather LoDTensor ???
PADDLE_THROW("Currently, Var only can be SelectedRows.");
} else {
PADDLE_THROW("Var should be LoDTensor or SelectedRows.");
PADDLE_THROW("Var should be SelectedRows.");
}
}
......
......@@ -56,15 +56,12 @@ class GatherTester : public ::testing::Test {
}
}
template <class T>
void InitGatherOp(int input_scope_idx) {
for (size_t j = 0; j < gpu_list_.size(); ++j) {
local_scope_.push_back(&g_scope_.NewScope());
auto* out_var = local_scope_[j]->Var("input");
out_var->GetMutable<T>();
local_scope_[j]->Var("input");
}
auto* in_var = local_scope_[input_scope_idx]->Var("out");
in_var->GetMutable<T>();
local_scope_[input_scope_idx]->Var("out");
gather_op_handle_ = new f::details::GatherOpHandle(local_scope_, gpu_list_);
......@@ -106,11 +103,9 @@ class GatherTester : public ::testing::Test {
}
}
void TestGatherLodTensor() {}
void TestGatherSelectedRows() {
int output_scope_idx = 0;
InitGatherOp<f::SelectedRows>(output_scope_idx);
InitGatherOp(output_scope_idx);
int height = kDims[0] * 2;
std::vector<int64_t> rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1,
......@@ -169,21 +164,12 @@ class GatherTester : public ::testing::Test {
f::details::GatherOpHandle* gather_op_handle_;
};
// TEST_F(GatherTester, TestCPUGatherTestLodTensor) {
// InitCtxOnGpu(false);
// TestGatherLodTensor();
//}
TEST_F(GatherTester, TestCPUGatherTestSelectedRows) {
InitCtxOnGpu(false);
TestGatherSelectedRows();
}
#ifdef PADDLE_WITH_CUDA
// TEST_F(GatherTester, TestGPUGatherTestLodTensor) {
// InitCtxOnGpu(true);
// TestGatherLodTensor();
//}
TEST_F(GatherTester, TestGPUGatherTestSelectedRows) {
InitCtxOnGpu(true);
......
......@@ -32,6 +32,8 @@ namespace details {
// temporarily place it in op_handle.
Tensor *GetTensorFromVar(Variable *in_var);
constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@";
class OpHandleBase {
private:
DISABLE_COPY_AND_ASSIGN(OpHandleBase);
......
......@@ -15,13 +15,15 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/ssa_graph.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
namespace paddle {
namespace framework {
namespace details {
class SSAGraphExecutor {
DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor);
......
......@@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
ready_ops.clear();
};
// Create local scopes.
for (auto &scope : local_scopes_) {
auto &local_scope = scope->NewScope();
*scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>() = &local_scope;
}
// Step 3. Execution
while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) {
// 1. Run All Ready ops
......@@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
PADDLE_ENFORCE(ready_ops.empty());
PADDLE_ENFORCE(delayed_ops.empty());
PADDLE_ENFORCE(blocked_by_delayed_ops.empty());
++computation_count_;
auto sync_computation = [&] {
computation_count_ = 0;
// Wait All computational streams
for (auto p : this->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : local_scopes_) {
scope->DropKids();
}
};
// Wait FetchOps.
if (!fetch_ops.empty()) {
fetch_ops.clear();
sync_computation();
}
if (computation_count_ == max_async_computation) {
sync_computation();
}
// NOTE: the temp scope can be dropped lazily if needed.
// Drop tmp scopes;
for (auto &scope : local_scopes_) {
auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>();
kid = nullptr;
}
return fetch_data;
......
......@@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::unique_ptr<platform::EnforceNotMet> exception_;
std::atomic<int> running_ops_;
bool allow_op_delay_;
size_t computation_count_{0};
size_t max_async_computation{100};
};
} // namespace details
......
......@@ -46,7 +46,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) {
}
}
static DDim GetDims(const Scope& scope, const std::string& name) {
static DDim GetDims(const Scope& scope, const std::string& name,
bool get_actual_dim = false) {
Variable* var = scope.FindVar(name);
if (var == nullptr) {
return DDim({-1});
......@@ -55,7 +56,11 @@ static DDim GetDims(const Scope& scope, const std::string& name) {
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) {
if (get_actual_dim) {
return var->Get<SelectedRows>().value().dims();
} else {
return var->Get<SelectedRows>().GetCompleteDims();
}
} else {
return DDim({-1});
}
......@@ -129,7 +134,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < input.second.size(); ++i) {
ss << input.second[i];
if (scope) {
ss << "[" << GetDims(*scope, input.second[i]) << "]";
ss << "[" << GetDims(*scope, input.second[i], true) << "]";
ss << "(" << GetLoD(*scope, input.second[i]) << ")";
}
if (i != input.second.size() - 1) {
......@@ -149,7 +154,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < output.second.size(); ++i) {
ss << output.second[i];
if (scope) {
ss << "[" << GetDims(*scope, output.second[i]) << "]";
ss << "[" << GetDims(*scope, output.second[i], true) << "]";
ss << "(" << GetLoD(*scope, output.second[i]) << ")";
}
if (i != output.second.size() - 1) {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h"
#include <string>
#include <tuple>
#include <vector>
#ifdef PADDLE_WITH_CUDA
......@@ -41,6 +42,8 @@ class ParallelExecutorPrivate {
#ifdef PADDLE_WITH_CUDA
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif
std::vector<std::tuple<std::string, proto::VarType::Type, bool>> var_types_;
};
std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
......@@ -97,14 +100,9 @@ ParallelExecutor::ParallelExecutor(
allow_op_delay));
// Step 3. Create vars in each scope;
for (auto *scope : member_->local_scopes_) {
for (auto *var : main_program.Block(0).AllVars()) {
if (scope->FindVar(var->Name()) != nullptr) {
continue;
}
InitializeVariable(scope->Var(var->Name()), var->GetType());
}
member_->var_types_.emplace_back(var->Name(), var->GetType(),
var->Persistable());
}
}
......@@ -163,9 +161,42 @@ void ParallelExecutor::Run(
const std::unordered_map<std::string, LoDTensor> &feed_tensors) {
platform::RecordBlock b(0);
SplitTensorToPlaces(feed_tensors);
// Create local scopes.
for (auto &scope : member_->local_scopes_) {
Scope &local_scope = scope->NewScope();
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>() =
&local_scope;
for (auto &name_type_pair : member_->var_types_) {
if (scope->FindVar(std::get<0>(name_type_pair)) != nullptr) {
continue;
}
if (std::get<2>(name_type_pair)) { // Persistable
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
} else {
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
}
}
}
auto fetch_data = member_->executor_->Run(fetch_tensors);
*member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() =
fetch_data;
// Wait All computational streams
for (auto p : member_->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : member_->local_scopes_) {
auto &local_scope =
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
scope->DeleteScope(local_scope);
local_scope = nullptr;
}
}
void ParallelExecutor::SplitTensorToPlaces(
......
......@@ -14,18 +14,17 @@ limitations under the License. */
#include "paddle/fluid/framework/prune.h"
#include <gtest/gtest.h>
#include <string>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
#include <gtest/gtest.h>
namespace f = paddle::framework;
namespace ops = paddle::operators;
void AddOp(const std::string &type, const f::VariableNameMap &inputs,
const f::VariableNameMap &outputs, f::AttributeMap attrs,
......
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor prune init)
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(paddle_fluid_api
SRCS io.cc
......@@ -11,7 +11,7 @@ cc_library(paddle_fluid DEPS ${fluid_modules})
# Create shared library
cc_library(paddle_fluid_shared SHARED
SRCS io.cc
DEPS ARCHIVE_START ${GLOB_OP_LIB} ${FLUID_CORE_MODULES} ARCHIVE_END)
DEPS ${fluid_modules})
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
......
......@@ -17,10 +17,16 @@ limitations under the License. */
#include <fstream>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle {
namespace inference {
// Temporarilly add this function for exposing framework::InitDevices() when
// linking the inference shared library.
void Init(bool init_p2p) { framework::InitDevices(init_p2p); }
void ReadBinaryFile(const std::string& filename, std::string& contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename);
......
......@@ -18,12 +18,15 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
namespace paddle {
namespace inference {
void Init(bool init_p2p);
void LoadPersistables(framework::Executor& executor, framework::Scope& scope,
const framework::ProgramDesc& main_program,
const std::string& dirname,
......
......@@ -17,7 +17,7 @@ function(inference_test TARGET_NAME)
string(REGEX REPLACE "^_$" "" arg "${arg}")
cc_test(test_inference_${TARGET_NAME}${arg}
SRCS test_inference_${TARGET_NAME}.cc
DEPS ARCHIVE_START paddle_fluid ARCHIVE_END
DEPS paddle_fluid
ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.inference.model)
set_tests_properties(test_inference_${TARGET_NAME}${arg}
PROPERTIES DEPENDS test_${TARGET_NAME})
......
......@@ -100,7 +100,7 @@ function(op_library TARGET)
endif()
# Define operators that don't need pybind here.
foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op")
foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
......@@ -199,7 +199,6 @@ else()
set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op)
endif()
op_library(cond_op DEPS framework_proto tensor net_op)
op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(softmax_op DEPS softmax)
......@@ -259,7 +258,6 @@ endforeach()
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op)
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/concat_op.h"
#include <string>
#include <vector>
......@@ -34,7 +35,10 @@ class ConcatOp : public framework::OperatorWithKernel {
size_t axis = static_cast<size_t>(ctx->Attrs().Get<int>("axis"));
const size_t n = ins.size();
PADDLE_ENFORCE_GT(n, 1, "Input tensors count should > 1.");
PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
if (n == 1) {
VLOG(3) << "Warning: concat op have only one input, may waste memory";
}
auto out_dims = ins[0];
size_t in_zero_dims_size = out_dims.size();
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/gather.h"
#include "paddle/fluid/operators/scatter.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
using Scope = framework::Scope;
using Variable = framework::Variable;
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DDim = framework::DDim;
framework::Scope& CondOp::AddSubScope(const Scope& scope) const {
auto sub_scopes_var = scope.FindVar("SubScopes");
PADDLE_ENFORCE_NOT_NULL(sub_scopes_var,
"Output(SubScopes) of CondOp should not be null.");
auto sub_scopes = sub_scopes_var->GetMutable<std::vector<Scope*>>();
auto& sub_scope = scope.NewScope();
sub_scopes->push_back(&sub_scope);
return sub_scope;
}
std::vector<framework::Scope*>& CondOp::GetSubScopes(
const framework::Scope& scope) const {
auto sub_scopes_var = scope.FindVar("SubScopes");
PADDLE_ENFORCE_NOT_NULL(sub_scopes_var,
"Output(SubScopes) of CondOp should not be null.");
return *sub_scopes_var->GetMutable<std::vector<framework::Scope*>>();
}
LoDTensor& CondOp::AddIndexTensor(const Scope& scope) const {
auto index_tensors_var = scope.FindVar("IndexTensors");
PADDLE_ENFORCE_NOT_NULL(index_tensors_var,
"Output(IndexTensors) of CondOp should not be null.");
auto& index_tensors =
*index_tensors_var->GetMutable<std::vector<LoDTensor>>();
index_tensors.push_back(LoDTensor());
return index_tensors.back();
}
std::vector<framework::LoDTensor>& CondOp::GetIndexTensors(
const framework::Scope& scope) const {
auto* index_tensors_var = scope.FindVar("IndexTensors");
PADDLE_ENFORCE_NOT_NULL(index_tensors_var,
"Output(IndexTensors) of CondOp should not be null.");
return *index_tensors_var->GetMutable<std::vector<framework::LoDTensor>>();
}
void CondOp::PrepareDataForSubnet(
const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const {
PADDLE_ENFORCE(!Inputs("Xs").empty(), "Inputs(Xs) of CondOp can't be empty.");
for (int i = 0; i < BRANCH_NUM; ++i) {
// Create two sub scopes for true and false branches
// sub_scopes[0] for the true branch
// sub_scopes[1] for the false branch
AddSubScope(scope);
// Create two tensors for true and false indices:
// index_tensors[0] for the true branch
// index_tensors[1] for the false branch
AddIndexTensor(scope);
}
Variable* cond_var = scope.FindVar(Input("Cond"));
PADDLE_ENFORCE_NOT_NULL(cond_var,
"Input(Cond) of CondOp should not be null.");
const LoDTensor* cond = cond_var->GetMutable<LoDTensor>();
// get the true/false index at runtime according to cond tensor
// index_vectors[0]: vector<int>, contains all index for cond[i] == true
// index_vectors[1]: vector<int>, contains all index for cond[i] == false
std::vector<std::vector<int>> index_vectors;
index_vectors.resize(BRANCH_NUM);
const int* cond_data = cond->data<int>();
for (int i = 0; i < cond->dims()[0]; ++i) {
if (cond_data[i])
index_vectors[TRUE_BRANCH].push_back(i);
else
index_vectors[FALSE_BRANCH].push_back(i);
}
// put index_vectors[0] and index_vectors[1] into two tensors:
// index_tensors[0] and index_tensors[1]
std::vector<framework::LoDTensor>& index_tensors = GetIndexTensors(scope);
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
for (int i = 0; i < BRANCH_NUM; ++i) {
DDim dim = {static_cast<int64_t>(index_vectors[i].size())};
int* index_tensor_data_ptr =
index_tensors[i].mutable_data<int>(dim, platform::CPUPlace());
memcpy(index_tensor_data_ptr, index_vectors[i].data(),
dim[0] * sizeof(int));
}
// create input in subscopes according to index_vectors
for (auto& input : Inputs("Xs")) {
Variable* var_parent = scope.FindVar(input);
PADDLE_ENFORCE_NOT_NULL(var_parent);
const auto* tensor_parent = &var_parent->Get<LoDTensor>();
for (int i = 0; i < BRANCH_NUM; ++i) {
Variable* var_child = sub_scopes[i]->FindVar(input);
PADDLE_ENFORCE_NOT_NULL(var_child);
auto* tensor_child = var_child->GetMutable<LoDTensor>();
// Resize child
DDim dim = tensor_parent->dims();
dim[0] = index_tensors[i].dims()[0];
tensor_child->mutable_data<float>(dim, platform::CPUPlace());
CPUGather<float>(dev_ctx, *tensor_parent, index_tensors[i], tensor_child);
}
}
// create output_tensors in subscope for sub_net
for (int i = 0; i < BRANCH_NUM; ++i) {
for (auto& output : (*sub_net_op_[i]).Outputs()) {
for (auto& var_name : output.second) {
sub_scopes[i]->Var(var_name);
}
}
}
}
void CondOp::MergeDataFromSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const {
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
const std::vector<framework::LoDTensor>& index_tensors =
GetIndexTensors(scope);
// Infer the output dim, out_dim[0] = true_dim[0] + false_dim[0]
PADDLE_ENFORCE(!Outputs("Outs").empty(),
"Outputs(Outs) of CondOp can't be empty.");
for (auto& output : Outputs("Outs")) {
const LoDTensor* tensor_t_out =
&sub_scopes[TRUE_BRANCH]->FindVar(output)->Get<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL");
const LoDTensor* tensor_f_out =
&sub_scopes[FALSE_BRANCH]->FindVar(output)->Get<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL");
auto* var_out = scope.FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_out, "Output not found");
LoDTensor* tensor_out = var_out->GetMutable<LoDTensor>();
PADDLE_ENFORCE_NOT_NULL(tensor_t_out,
"True output tensor should not be NULL");
DDim true_dim = tensor_t_out->dims();
DDim false_dim = tensor_f_out->dims();
true_dim[0] = 0;
false_dim[0] = 0;
PADDLE_ENFORCE_EQ(true_dim, false_dim,
"Outputs not of the same shape except the first dim");
DDim out_dim = tensor_t_out->dims();
out_dim[0] = tensor_t_out->dims()[0] + tensor_f_out->dims()[0];
tensor_out->Resize(out_dim);
tensor_out->mutable_data<float>(platform::CPUPlace());
}
// merge output results:
// output_tensor = true_output_tensor + false_output_tensor
for (auto& output : Outputs("Outs")) {
Variable* var_parent = scope.FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_parent);
auto* tensor_parent = var_parent->GetMutable<LoDTensor>();
for (int i = 0; i < BRANCH_NUM; ++i) {
Variable* var_child = sub_scopes[i]->FindVar(output);
PADDLE_ENFORCE_NOT_NULL(var_child);
auto* tensor_child = &var_child->Get<LoDTensor>();
ScatterAssign<float>(dev_ctx, *tensor_child, index_tensors[i],
tensor_parent);
}
}
}
void CondOp::RunImpl(const Scope& scope, const platform::Place& place) const {
// get device context from pool
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& dev_ctx = *pool.Get(place);
PrepareDataForSubnet(scope, dev_ctx);
std::vector<framework::Scope*>& sub_scopes = GetSubScopes(scope);
for (int i = 0; i < BRANCH_NUM; ++i) {
sub_net_op_[i]->Run(*sub_scopes[i], place);
}
MergeDataFromSubnet(scope, dev_ctx);
}
class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
public:
CondOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Cond", "The condition, which is a bool vector");
AddInput("Xs", "Inputs of Subnets").AsDuplicable();
AddOutput("Outs", "Outputs of Cond_Op after merge").AsDuplicable();
AddOutput("SubScopes", "sub scopes for true and false branches");
AddOutput("IndexTensors", "Index Tensors contains indices for true/false");
AddComment(R"DOC(
Sample Dependent Conditional Operator.
Given Cond[i] as a 1/0 vector to indicate true/false:
Out[i] = subnet_true[i], if Cond[i] == true
Out[i] = subnet_false[i], if Cond[i] == false
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(cond, paddle::operators::CondOp,
paddle::operators::CondOpProtoAndCheckerMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/net_op.h"
namespace paddle {
namespace operators {
/*
* @brief CondOp is a dynamic if-else Operator
*
* It has a input tensor named cond indicating which netop each instance will
* run.
*
* if cond == 1, it will run true_net, which is a NetOp.
*
* if cond == 0, it will run false_net, which is another NetOp.
*/
class CondOp : public framework::OperatorBase {
public:
CondOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {
sub_net_op_.resize(BRANCH_NUM);
}
CondOp(const CondOp& o)
: framework::OperatorBase(
static_cast<const framework::OperatorBase&>(o)) {
// TODO(yuyang18): Implement copy ctor well.
PADDLE_THROW("Not implemented");
}
framework::Scope& AddSubScope(const framework::Scope& scope) const;
std::vector<framework::Scope*>& GetSubScopes(
const framework::Scope& scope) const;
framework::LoDTensor& AddIndexTensor(const framework::Scope& scope) const;
std::vector<framework::LoDTensor>& GetIndexTensors(
const framework::Scope& scope) const;
void PrepareDataForSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const;
void MergeDataFromSubnet(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const;
/*
* Set True Block
*/
void set_truenet(std::unique_ptr<OperatorBase>&& net) {
sub_net_op_[TRUE_BRANCH] = std::move(net);
}
/*
* Set False Block
*/
void set_falsenet(std::unique_ptr<OperatorBase>&& net) {
sub_net_op_[FALSE_BRANCH] = std::move(net);
}
private:
void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override;
private:
const int TRUE_BRANCH = 0;
const int FALSE_BRANCH = 1;
const int BRANCH_NUM = 2;
// sub_net_op_[0]: subnet_t
// sub_net_op_[1]: subnet_f
std::vector<std::unique_ptr<framework::OperatorBase>> sub_net_op_;
};
} // namespace operators
} // namespace paddle
......@@ -15,6 +15,7 @@ limitations under the License. */
#include <stdio.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <vector>
#include "paddle/fluid/operators/ctc_align_op.h"
namespace paddle {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <string.h>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
......
......@@ -161,6 +161,7 @@ class RequestPrefetch final : public RequestBase {
::grpc::ByteBuffer reply;
std::string var_name = request_->OutVarname();
VLOG(3) << "prefetch var " << var_name;
auto var_desc = program_->Block(0).FindVar(var_name);
framework::Scope* local_scope = &scope_->NewScope();
auto* var = local_scope->FindVar(var_name);
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
......@@ -106,18 +107,18 @@ information. However, the output only shares the LoD information with input $X$.
protected:
std::string comment_;
void Replace(std::string& src, std::string from, std::string to) {
void Replace(std::string* src, std::string from, std::string to) {
std::size_t len_from = std::strlen(from.c_str());
std::size_t len_to = std::strlen(to.c_str());
for (std::size_t pos = src.find(from); pos != std::string::npos;
pos = src.find(from, pos + len_to)) {
src.replace(pos, len_from, to);
for (std::size_t pos = src->find(from); pos != std::string::npos;
pos = src->find(from, pos + len_to)) {
src->replace(pos, len_from, to);
}
}
void SetComment(std::string name, std::string equation) {
Replace(comment_, "{name}", name);
Replace(comment_, "{equation}", equation);
Replace(&comment_, "{name}", name);
Replace(&comment_, "{equation}", equation);
}
};
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/gru_op.h"
#include <string>
namespace paddle {
namespace operators {
......
......@@ -13,15 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/gru_compute.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h"
#include <vector>
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,7 @@
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/label_smooth_op.h"
#include <string>
namespace paddle {
namespace operators {
......
......@@ -100,7 +100,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto x_row_max = EigenMatrix<T>::From(emission_row_max);
x_row_max.device(place) =
x.maximum(Eigen::DSizes<int, 1>(1))
.reshape(Eigen::DSizes<int, 2>(int(batch_size), 1));
.reshape(Eigen::DSizes<int, 2>(static_cast<int>(batch_size), 1));
auto x_exps = EigenMatrix<T>::From(*emission_exps);
x_exps.device(place) =
......
......@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <ostream>
#include <thread>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/operators/listen_and_serv_op.h"
......@@ -88,8 +89,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
auto ins = Inputs("X");
auto fan_in = Attr<int>("Fanin");
auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock);
auto *program = block->Program();
auto *optimize_block = Attr<framework::BlockDesc *>(kOptimizeBlock);
auto *prefetch_block = Attr<framework::BlockDesc *>(kPrefetchBlock);
auto *program = optimize_block->Program();
size_t num_blocks = program->Size();
PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks");
......@@ -97,18 +99,25 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
framework::Executor executor(dev_place);
std::vector<int> block_list;
for (size_t blkid = 1; blkid < num_blocks; ++blkid) {
if (blkid != prefetch_block->ID()) {
block_list.push_back(blkid);
}
auto prepared = executor.Prepare(*program, block_list);
}
auto optimize_prepared = executor.Prepare(*program, block_list);
// Insert placeholder for block0 which holds current op itself.
prepared.insert(prepared.begin(),
optimize_prepared.insert(
optimize_prepared.begin(),
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr));
rpc_service_->SetScope(&recv_scope);
rpc_service_->SetDevCtx(&dev_ctx);
// TODO(qiao) set proper fields for table lookup and update
rpc_service_->SetExecutor(&executor);
rpc_service_->SetPrefetchBlkdId(0);
VLOG(3) << "prefetch block id is " << prefetch_block->ID();
auto prefetch_prepared = executor.Prepare(*program, prefetch_block->ID());
rpc_service_->SetPrefetchBlkdId(prefetch_block->ID());
rpc_service_->SetPrefetchPreparedCtx(prefetch_prepared.get());
prefetch_prepared.release();
rpc_service_->SetProgram(program);
// start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_));
......@@ -166,16 +175,18 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
parallel_blkids.push_back(1);
double ts = detail::GetTimestamp();
for (size_t blkid = 2; blkid < num_blocks; ++blkid) {
if (blkid != prefetch_block->ID()) {
if (program->Block(blkid).Parent() != last_parent_blkid) {
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program,
&recv_scope);
ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared,
program, &recv_scope);
parallel_blkids.clear();
last_parent_blkid = program->Block(blkid).Parent();
}
parallel_blkids.push_back(blkid);
}
ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program,
&recv_scope);
}
ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared,
program, &recv_scope);
VLOG(2) << "run all blocks spent " << detail::GetTimestamp() - ts << "(ms)";
// Reset the received sparse variables, the sum operator would not
......@@ -211,6 +222,8 @@ from send_op and send back variables to recv_op.
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
AddAttr<framework::BlockDesc *>(kOptimizeBlock,
"BlockID to run on server side.");
AddAttr<framework::BlockDesc *>(kPrefetchBlock,
"prefetch block to run on server side.");
AddAttr<int>("Fanin", "How many clients send to this server.")
.SetDefault(1);
}
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <stdint.h>
#include <ostream>
#include <string>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
......@@ -27,6 +28,7 @@ namespace paddle {
namespace operators {
constexpr char kOptimizeBlock[] = "OptimizeBlock";
constexpr char kPrefetchBlock[] = "PrefetchBlock";
void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service);
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/logical_op.h"
#include <string>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
......
......@@ -78,6 +78,9 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
"(boolean, default false) "
"Sparse update.")
.SetDefault(false);
AddAttr<bool>("is_distributed",
"(boolean, default false) distributed lookup table.")
.SetDefault(false);
AddAttr<int64_t>("padding_idx",
"(int64, default -1) "
"If the value is -1, it makes no effect to lookup. "
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lrn_op.h"
#include <string>
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lstm_op.h"
#include <string>
namespace paddle {
namespace operators {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
......
......@@ -18,6 +18,7 @@ https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.c
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cross_entropy_op.h"
#include "paddle/fluid/operators/lstm_unit_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/hostdevice.h"
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lstmp_op.h"
#include <string>
namespace paddle {
namespace operators {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
......
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/matmul_op.h"
#include <algorithm>
#include <vector>
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <functional>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/matmul.h"
......
......@@ -13,6 +13,8 @@
* limitations under the License. */
#include "paddle/fluid/operators/maxout_op.h"
#include <vector>
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/minus_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
#include <vector>
namespace paddle {
namespace operators {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/momentum_op.h"
namespace paddle {
namespace operators {
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/mul_op.h"
#include <vector>
namespace paddle {
namespace operators {
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/net_op.h"
#include <set>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
const char NetOp::kAll[] = "all";
void NetOp::CompleteAddOp(bool calc) {
add_op_done_ = true;
if (!calc) return;
std::set<std::string> input_set;
std::set<std::string> output_set;
for (auto& op : ops_) {
for (auto& ipt : op->Inputs()) {
for (auto& var_name : ipt.second) {
// If input variable has been in output set, then it will be
// added into intermediate_outputs_. Otherwise, it will be
// added into input set.
if (Contains(output_set, var_name)) {
intermediate_outputs_.insert(var_name);
} else {
input_set.insert(var_name);
}
}
}
for (auto& opt : op->Outputs()) {
for (auto& var_name : opt.second) {
output_set.insert(var_name);
}
}
}
auto& inputs = inputs_[kAll];
inputs.reserve(input_set.size());
std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs));
auto& outputs = outputs_[kAll];
outputs.reserve(output_set.size());
std::copy(output_set.begin(), output_set.end(), std::back_inserter(outputs));
}
std::string NetOp::DebugStringEx(const framework::Scope* scope) const {
std::ostringstream os;
os << OperatorBase::DebugStringEx(scope) << std::endl;
for (auto& op : ops_) {
std::istringstream is(op->DebugStringEx(scope));
for (std::string line; std::getline(is, line);) {
os << " " << line << std::endl;
}
}
return os.str();
}
bool NetOp::IsNetOp() const { return true; }
std::vector<std::string> NetOp::OutputVars(bool has_intermediate) const {
std::vector<std::string> all;
for (auto& pair : this->outputs_) {
for (auto& var_name : pair.second) {
all.push_back(var_name);
}
}
if (has_intermediate) {
return all;
}
std::vector<std::string> ret_val;
for (auto& each : all) {
if (!Contains(intermediate_outputs_, each)) {
ret_val.push_back(each);
}
}
return ret_val;
}
NetOp::NetOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
std::unique_ptr<framework::OperatorBase> NetOp::Clone() const {
PADDLE_ENFORCE(
add_op_done_,
"Must clone a sealed NetOp, invoke Net::CompleteAddOp before clone");
return std::unique_ptr<OperatorBase>(new NetOp(*this));
}
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <set>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
/**
* @brief Network is also a type of Operator
*
* It will manage the operators it has.
*
* Network is the container and controller of a set of operators.
* A network object knows all Operators belonging to this network. Variables,
* which are inputs and outputs of these operators, are created and managed by a
* hierarchy of Scope objects.
*
* This is the base class of network, all the networks should implement the APIs
* it defines.
*/
class NetOp : public framework::OperatorBase {
public:
static const char kAll[];
NetOp()
: framework::OperatorBase("plain_net", framework::VariableNameMap{},
framework::VariableNameMap{},
framework::AttributeMap{}) {}
NetOp(const std::string& type, const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs);
NetOp(const NetOp& o) : framework::OperatorBase(o.type_, {}, {}, o.attrs_) {
this->ops_.reserve(o.ops_.size());
std::transform(
o.ops_.begin(), o.ops_.end(), std::back_inserter(this->ops_),
[](const std::unique_ptr<framework::OperatorBase>& op) {
return std::unique_ptr<framework::OperatorBase>(op->Clone());
});
this->CompleteAddOp();
}
bool SupportGPU() const override {
for (auto& op : ops_) {
if (!op->SupportGPU()) {
return false;
}
}
return true;
}
void AppendOp(const framework::OperatorBase& op) { AppendOp(op.Clone()); }
/**
* @brief Add an operator by ptr
*/
void AppendOp(std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot AppendOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
ops_.push_back(std::move(op));
}
void InsertOp(size_t pos, std::unique_ptr<framework::OperatorBase> op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, std::move(op));
}
void InsertOp(size_t pos, const framework::OperatorBase& op) {
InsertOp(pos, op.Clone());
}
void CompleteAddOp(bool calculate = true);
std::string DebugStringEx(
const framework::Scope* scope = nullptr) const override;
bool IsNetOp() const override;
std::vector<std::string> OutputVars(bool has_intermediate) const override;
std::unique_ptr<framework::OperatorBase> Clone() const override;
std::vector<std::unique_ptr<framework::OperatorBase>> ops_;
private:
/**
* @brief Run the network.
*
* Run all the operators with the `scope`, if no scope is provided, default
* scope will be used instead. If no OpContext is provicded, default context
* will be used.
*/
void RunImpl(const framework::Scope& scope,
const platform::Place& place) const override {
for (auto& op : ops_) {
op->Run(scope, place);
}
}
bool add_op_done_{false};
std::set<std::string> intermediate_outputs_;
template <typename T, typename KeyType>
static bool Contains(T container, KeyType key) {
return container.find(key) != container.end();
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/net_op.h"
#include <gtest/gtest.h>
namespace paddle {
namespace operators {
using Scope = framework::Scope;
using DeviceContext = platform::DeviceContext;
static int run_cnt = 0;
class TestOp : public framework::OperatorBase {
public:
using framework::OperatorBase::OperatorBase;
DEFINE_OP_CLONE_METHOD(TestOp);
private:
void RunImpl(const Scope& scope,
const platform::Place& place) const override {
++run_cnt;
}
};
template <typename T>
void AssertSameVectorWithoutOrder(const std::vector<T>& expected,
const std::vector<T>& actual) {
ASSERT_EQ(expected.size(), actual.size());
std::unordered_set<T> expected_set;
for (auto& tmp : expected) {
expected_set.insert(tmp);
}
for (auto& act : actual) {
ASSERT_NE(expected_set.end(), expected_set.find(act));
}
}
TEST(OpKernel, all) {
auto net = std::make_shared<NetOp>();
ASSERT_NE(net, nullptr);
net->AppendOp(std::unique_ptr<TestOp>(
new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, framework::AttributeMap{})));
net->AppendOp(std::unique_ptr<TestOp>(
new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}},
{{"Out", {"z"}}}, framework::AttributeMap{})));
net->CompleteAddOp();
AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"},
net->Inputs(NetOp::kAll));
AssertSameVectorWithoutOrder({"y", "z"}, net->Outputs(NetOp::kAll));
auto final_outs = net->OutputVars(false);
ASSERT_EQ(final_outs.size(), 1UL);
ASSERT_EQ(final_outs[0], "z");
}
TEST(NetOp, insert_op) {
NetOp net;
auto op1 = std::unique_ptr<framework::NOP>(
new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}},
{{"Out", {"y"}}}, framework::AttributeMap{}));
net.AppendOp(*op1);
net.InsertOp(0, *op1);
ASSERT_EQ(2UL, net.ops_.size());
net.InsertOp(2, std::move(op1));
ASSERT_EQ(3UL, net.ops_.size());
}
TEST(NetOp, Clone) {
NetOp net;
net.AppendOp(std::unique_ptr<framework::NOP>(new framework::NOP{
"empty", framework::VariableNameMap{}, framework::VariableNameMap{},
framework::AttributeMap{}}));
net.AppendOp(std::unique_ptr<framework::NOP>(new framework::NOP{
"empty2", framework::VariableNameMap{}, framework::VariableNameMap{},
framework::AttributeMap{}}));
net.CompleteAddOp(true);
auto new_net_op = net.Clone();
ASSERT_NE(new_net_op, nullptr);
ASSERT_TRUE(new_net_op->IsNetOp());
auto* new_net = static_cast<NetOp*>(new_net_op.get());
ASSERT_EQ(2UL, new_net->ops_.size());
ASSERT_EQ(new_net->ops_[0]->Type(), "empty");
ASSERT_EQ(new_net->ops_[1]->Type(), "empty2");
}
} // namespace operators
} // namespace paddle
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <future>
#include <future> // NOLINT
#include <ostream>
#include "paddle/fluid/framework/data_type.h"
......@@ -50,8 +50,8 @@ class PrefetchOp : public framework::OperatorBase {
for (size_t i = 0; i < ins.size(); i++) {
if (NeedSend(scope, ins[i])) {
VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << "to get "
<< outs[i] << "back";
VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << " to get "
<< outs[i] << " back";
rpc_client->AsyncPrefetchVariable(epmap[i], ctx, scope, ins[i],
outs[i]);
} else {
......@@ -71,7 +71,7 @@ class PrefetchOpMaker : public framework::OpProtoAndCheckerMaker {
"(RPCClient) The RPC client object which will be"
"initialized at most once.");
AddOutput("Out",
"(SelectedRows) result "
"(LoDTensor) result "
"to be fetched from parameter server")
.AsDuplicable();
AddAttr<std::vector<std::string>>(
......
......@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/prelu_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
namespace paddle {
namespace operators {
......
......@@ -13,7 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/operators/net_op.h"
#include <string>
namespace paddle {
namespace operators {
......
......@@ -14,7 +14,7 @@ limitations under the License. */
#include <unistd.h>
#include <string>
#include <thread>
#include <thread> // NOLINT
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
......@@ -37,11 +37,11 @@ namespace m = paddle::operators::math;
std::unique_ptr<f::OperatorBase> listen_and_serv_op;
int selected_port;
void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) {
void InitTensorsInScope(const p::CPUPlace &place, f::Scope *scope) {
p::CPUDeviceContext ctx(place);
for (int i = 0; i < 2; ++i) {
auto var_name = paddle::string::Sprintf("x%d", i);
auto var = scope.Var(var_name);
auto var = scope->Var(var_name);
auto tensor = var->GetMutable<f::LoDTensor>();
tensor->Resize({10, 10});
float *expect = tensor->mutable_data<float>(place);
......@@ -50,20 +50,20 @@ void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) {
}
}
auto out_var = scope.Var("Out");
auto out_var = scope->Var("Out");
auto out_tensor = out_var->GetMutable<f::LoDTensor>();
out_tensor->Resize({10, 10});
out_tensor->mutable_data<float>(place); // allocate
}
void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) {
void InitSelectedRowsInScope(const p::CPUPlace &place, f::Scope *scope) {
p::CPUDeviceContext ctx(place);
int64_t height = 10;
int64_t row_numel = 10;
m::SetConstant<p::CPUDeviceContext, float> set_one;
// init x0
std::vector<int64_t> rows0{0, 4, 7};
auto x0_var = scope.Var("x0");
auto x0_var = scope->Var("x0");
auto x0 = x0_var->GetMutable<f::SelectedRows>();
x0->set_rows(rows0);
x0->set_height(height);
......@@ -74,7 +74,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) {
// init x1
std::vector<int64_t> rows1{2, 9};
auto x1_var = scope.Var("x1");
auto x1_var = scope->Var("x1");
auto x1 = x1_var->GetMutable<f::SelectedRows>();
x1->set_rows(rows1);
x1->set_height(height);
......@@ -83,7 +83,7 @@ void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) {
f::make_ddim({static_cast<int64_t>(rows1.size()), row_numel}), place);
set_one(ctx, x1_value, 1.0);
auto out_var = scope.Var("Out");
auto out_var = scope->Var("Out");
auto out = out_var->GetMutable<f::SelectedRows>();
auto out_value = out->mutable_value();
out->set_height(height);
......@@ -117,15 +117,16 @@ void StartServerNet(bool is_sparse) {
f::Scope scope;
p::CPUPlace place;
if (is_sparse) {
InitSelectedRowsInScope(scope, place);
InitSelectedRowsInScope(place, &scope);
} else {
InitTensorsInScope(scope, place);
InitTensorsInScope(place, &scope);
}
// sub program run in listen_and_serv_op, for simple test we use sum
f::ProgramDesc program;
const auto &root_block = program.Block(0);
auto *optimize_block = program.AppendBlock(root_block);
auto *prefetch_block = program.AppendBlock(root_block);
// X for server side tensors, RX for received tensers, must be of same shape.
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block);
......@@ -135,6 +136,7 @@ void StartServerNet(bool is_sparse) {
attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
attrs.insert({"GradList", std::vector<std::string>({"x1"})});
attrs.insert({"OptimizeBlock", optimize_block});
attrs.insert({"PrefetchBlock", prefetch_block});
listen_and_serv_op =
f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs);
LOG(INFO) << "selected port before run " << selected_port;
......@@ -148,7 +150,7 @@ TEST(SendRecvOp, CPUDense) {
// local net
f::Scope scope;
p::CPUPlace place;
InitTensorsInScope(scope, place);
InitTensorsInScope(place, &scope);
// create rpc client var
scope.Var("RPC_CLIENT_VAR");
......@@ -191,7 +193,7 @@ TEST(SendRecvOp, CPUSparse) {
f::Scope scope;
p::CPUPlace place;
p::CPUDeviceContext ctx(place);
InitSelectedRowsInScope(scope, place);
InitSelectedRowsInScope(place, &scope);
scope.Var("RPC_CLIENT_VAR");
f::AttributeMap attrs;
selected_port = static_cast<paddle::operators::ListenAndServOp *>(
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <future>
#include <future> // NOLINT
#include <ostream>
#include "paddle/fluid/framework/data_type.h"
......@@ -36,7 +36,7 @@ class SendVarsOp : public framework::OperatorBase {
auto ins = Inputs("X");
std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap");
int sync_send = Attr<int>("sync_sent");
int sync_send = Attr<int>("sync_send");
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& ctx = *pool.Get(place);
......
......@@ -35,8 +35,8 @@ class SGDOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 element");
auto param_dim = ctx->GetInputDim("Param");
// TODO(qijun): check dimensions of Param and Grad at complie
// and run time.
// TODO(qijun): check dimensions of Param and Grad at compile
// and runtime.
ctx->SetOutputDim("ParamOut", param_dim);
}
......
......@@ -48,20 +48,21 @@ class SplitIdsOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutputs("Out"), "SplitIdsOp must has output Out.");
auto ids_var_type = ctx->GetInputsVarType("Ids").front();
PADDLE_ENFORCE_EQ(ids_var_type, framework::proto::VarType::LOD_TENSOR);
auto ids_dims = ctx->GetInputDim("Ids");
if (ids_var_type == framework::proto::VarType::LOD_TENSOR) {
PADDLE_ENFORCE_EQ(ids_dims.size(), 2);
PADDLE_ENFORCE_EQ(ids_dims[1], 1);
}
}
};
class SplitIdsOpInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
auto *input_var = block->Var(op_desc.Input("Ids")[0]);
for (auto &out_var : op_desc.Output("Out")) {
block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR);
block->Var(out_var)->SetType(input_var->GetType());
}
}
};
......@@ -73,4 +74,5 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(split_ids, ops::SplitIdsOp, ops::SplitIdsOpMaker,
ops::SplitIdsOpInferVarType);
REGISTER_OP_CPU_KERNEL(
split_ids, ops::SplitIdsOpKernel<paddle::platform::CPUPlace, int64_t>);
split_ids, ops::SplitIdsOpKernel<paddle::platform::CPUPlace, int64_t>,
ops::SplitIdsOpKernel<paddle::platform::CPUPlace, float>);
......@@ -24,14 +24,16 @@ namespace operators {
template <typename DeviceContext, typename T>
class SplitIdsOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
void Compute(const framework::ExecutionContext &ctx) const override {
auto place = ctx.GetPlace();
if (!platform::is_cpu_place(place)) {
PADDLE_THROW("SplitIds do not support GPU kernel");
}
auto& ids_dims = ctx.Input<framework::LoDTensor>("Ids")->dims();
const T* ids = ctx.Input<framework::LoDTensor>("Ids")->data<T>();
const auto *ids_var = ctx.InputVar("Ids");
if (ids_var->IsType<framework::LoDTensor>()) {
const auto &ids_dims = ctx.Input<framework::LoDTensor>("Ids")->dims();
const T *ids = ctx.Input<framework::LoDTensor>("Ids")->data<T>();
auto outs = ctx.MultiOutput<framework::LoDTensor>("Out");
const size_t shard_num = outs.size();
......@@ -47,14 +49,40 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
// create tensor for each shard and send to parameter server
for (size_t i = 0; i < out_ids.size(); ++i) {
auto* shard_t = outs[i];
auto *shard_t = outs[i];
std::vector<T> ids = out_ids[i];
auto* shard_data = shard_t->mutable_data<T>(
auto *shard_data = shard_t->mutable_data<T>(
framework::make_ddim({static_cast<int64_t>(ids.size()), 1}), place);
for (size_t i = 0; i < ids.size(); ++i) {
shard_data[i] = ids[i];
}
}
} else if (ids_var->IsType<framework::SelectedRows>()) {
const auto *ids_selected_rows = ctx.Input<framework::SelectedRows>("Ids");
auto &ids_dims = ids_selected_rows->value().dims();
PADDLE_ENFORCE_EQ(ids_dims[0], ids_selected_rows->rows().size(), "");
const T *ids = ids_selected_rows->value().data<T>();
const auto &ids_rows = ids_selected_rows->rows();
auto outs = ctx.MultiOutput<framework::SelectedRows>("Out");
const size_t shard_num = outs.size();
// get rows for outputs
for (auto &id : ids_rows) {
size_t shard_id = static_cast<size_t>(id) % shard_num;
outs[shard_id]->mutable_rows()->push_back(id);
}
int64_t row_width = ids_dims[1];
for (auto &out : outs) {
out->set_height(ids_selected_rows->height());
framework::DDim ddim = framework::make_ddim(
{static_cast<int64_t>(out->rows().size()), row_width});
T *output = out->mutable_value()->mutable_data<T>(ddim, place);
for (size_t i = 0; i < ddim[0]; ++i) {
memcpy(output + i * row_width, ids + out->rows()[i] * row_width,
row_width * sizeof(T));
}
}
}
}
};
......
......@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/split_op.h"
#include "paddle/fluid/operators/net_op.h"
namespace paddle {
namespace operators {
......
......@@ -10,9 +10,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/sum_op.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
......@@ -37,7 +39,10 @@ class SumOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputsDim("X");
size_t N = x_dims.size();
PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1.");
PADDLE_ENFORCE_GT(N, 0, "Input tensors count should > 0.");
if (N == 1) {
VLOG(3) << "Warning: sum have only one input, may waste memory";
}
framework::DDim in_dim({0});
for (auto& x_dim : x_dims) {
......
......@@ -2,13 +2,13 @@ if(WITH_PYTHON)
if(WITH_AMD_GPU)
hip_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method
DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor
${GLOB_OP_LIB})
else()
cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method
DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor
${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID)
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include <string>
#include <tuple>
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/program_desc.h"
......@@ -125,23 +124,6 @@ void BindProgramDesc(pybind11::module *m) {
})
.def("append_block", &pd::ProgramDesc::AppendBlock,
pybind11::return_value_policy::reference)
.def("append_backward",
[](pd::ProgramDesc &program_desc, const pd::VarDesc &target,
const std::unordered_set<std::string> &no_grad_vars) {
pd::ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars);
std::unordered_map<
std::string, std::tuple<std::string /* grad_var_name */,
int /* block_idx */, int /* op_idx */>>
retv;
for (auto it = param_grad_map.begin(); it != param_grad_map.end();
++it) {
const auto &grad_info = it->second;
retv[it->first] = std::make_tuple(
grad_info.name_, grad_info.block_idx_, grad_info.op_idx_);
}
return retv;
})
.def("block", &pd::ProgramDesc::MutableBlock,
pybind11::return_value_policy::reference)
.def("num_blocks", &pd::ProgramDesc::Size)
......
......@@ -20,9 +20,6 @@ limitations under the License. */
#include <utility>
#include <vector>
#include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/framework/backward.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
......@@ -31,18 +28,18 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/cond_op.h"
#include "paddle/fluid/operators/net_op.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/pybind/const_value.h"
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/protobuf.h"
#include "paddle/fluid/pybind/pybind.h" // NOLINT
#include "paddle/fluid/pybind/recordio.h"
#include "paddle/fluid/pybind/tensor_py.h"
......@@ -239,11 +236,6 @@ All parameter, weight, gradient are variables in Paddle.
},
py::return_value_policy::reference)
#endif
.def("get_net",
[](Variable &self) -> operators::NetOp * {
return self.GetMutable<operators::NetOp>();
},
py::return_value_policy::reference)
.def("get_reader",
[](Variable &self) -> framework::ReaderHolder * {
PADDLE_ENFORCE(self.IsType<framework::ReaderHolder>());
......@@ -388,11 +380,6 @@ All parameter, weight, gradient are variables in Paddle.
desc.InitializationErrorString());
return OpRegistry::CreateOp(desc);
})
.def("backward",
[](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) {
return Backward(forwardOp, no_grad_vars).release();
})
.def("run",
[](OperatorBase &self, const Scope &scope,
const platform::CPUPlace &place) { self.Run(scope, place); })
......@@ -420,42 +407,6 @@ All parameter, weight, gradient are variables in Paddle.
[](const OperatorBase &op) { return op.OutputVars(false); })
.def("support_gpu", &OperatorBase::SupportGPU);
py::class_<operators::NetOp, OperatorBase>(m, "Net")
.def_static("create",
[]() -> operators::NetOp * {
auto *retv = new operators::NetOp;
retv->SetType("plain_net");
return retv;
})
.def("append_op", [](operators::NetOp &self,
const OperatorBase &op) { self.AppendOp(op); })
.def("complete_add_op", &operators::NetOp::CompleteAddOp)
.def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
self->CompleteAddOp();
});
// cond_op
py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
.def_static("create",
[](py::bytes protobin) -> operators::CondOp * {
proto::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
auto cond_op = OpRegistry::CreateOp(desc);
return static_cast<operators::CondOp *>(cond_op.release());
})
.def("set_truenet",
[](operators::CondOp &self, const operators::NetOp &net) -> void {
self.set_truenet(net.Clone());
})
.def("set_falsenet",
[](operators::CondOp &self, const operators::NetOp &net) -> void {
self.set_falsenet(net.Clone());
});
py::class_<framework::Executor>(m, "Executor")
.def(py::init<const platform::Place &>())
.def("run",
......
......@@ -14,13 +14,13 @@
#include "paddle/fluid/recordio/chunk.h"
#include <zlib.h>
#include <algorithm>
#include <memory>
#include <sstream>
#include "paddle/fluid/platform/enforce.h"
#include "snappy_stream/include/snappystream.hpp"
#include "zlib/include/zlib.h"
#include "snappystream.hpp"
namespace paddle {
namespace recordio {
......
......@@ -13,6 +13,9 @@
// limitations under the License.
#include "paddle/fluid/recordio/header.h"
#include <string>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
......
......@@ -231,7 +231,7 @@ function gen_fluid_inference_lib() {
Deploying fluid inference library ...
========================================
EOF
make inference_lib_dist
make -j `nproc` inference_lib_dist
fi
}
......
......@@ -1119,24 +1119,6 @@ class Program(object):
def current_block(self):
return self.blocks[self.current_block_idx]
def append_backward(self, target, no_grad_set=None):
"""
return map(param_name -> (grad_name, block_index, op_index))
"""
assert isinstance(target, Variable)
if no_grad_set is None:
no_grad_set = set()
try:
param_to_grad_info = self.desc.append_backward(target.desc,
no_grad_set)
except Exception as e:
raise core.EnforceNotMet(
str(e) + "\nCurrent protobuf is\n{0}".format(
self.to_string(False)))
self.sync_with_cpp()
return param_to_grad_info
def create_block(self, parent_idx=None):
new_block_idx = len(self.blocks)
parent = self.current_block() if parent_idx is None else self.block(
......@@ -1201,6 +1183,8 @@ class Parameter(Variable):
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
self.do_model_average = kwargs.get('do_model_average', None)
def __str__(self):
return self.to_string(True)
......@@ -1221,7 +1205,7 @@ class Parameter(Variable):
if with_details:
res_str = Variable.to_string(self, throw_on_error, True)
additional_attr = ("trainable", "optimize_attr", "regularizer",
"gradient_clip_attr")
"gradient_clip_attr", "do_model_average")
for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name,
str(getattr(self, attr_name)))
......
......@@ -218,6 +218,7 @@ def fc(input,
def embedding(input,
size,
is_sparse=False,
is_distributed=False,
padding_idx=None,
param_attr=None,
dtype='float32'):
......@@ -268,8 +269,11 @@ def embedding(input,
inputs={'Ids': input,
'W': w},
outputs={'Out': tmp},
attrs={'is_sparse': is_sparse,
'padding_idx': padding_idx})
attrs={
'is_sparse': is_sparse,
'is_distributed': is_distributed,
'padding_idx': padding_idx
})
return tmp
......@@ -1516,7 +1520,8 @@ def batch_norm(input,
in_place=False,
name=None,
moving_mean_name=None,
moving_variance_name=None):
moving_variance_name=None,
do_model_average_for_mean_and_var=False):
"""
This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters.
......@@ -1547,7 +1552,10 @@ def batch_norm(input,
mean = helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name, initializer=Constant(0.0), trainable=False),
name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=input.dtype)
mean.stop_gradient = True
......@@ -1556,7 +1564,8 @@ def batch_norm(input,
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(1.0),
trainable=False),
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=input.dtype)
variance.stop_gradient = True
......
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from collections import defaultdict
from paddle.fluid.framework import Program
import framework
......@@ -818,8 +818,8 @@ class ModelAverage(Optimizer):
min_average_window, max_average_window and current update times.
Args:
params_grads: A list of parameter-grad variable pairs.
average_window_rate: The rate of average window.
params_grads: A list of parameter-grad variable pairs.
min_average_window: The minimum size of average window.
max_average_window: The maximum size of average window.
......@@ -840,8 +840,8 @@ class ModelAverage(Optimizer):
"""
def __init__(self,
params_grads,
average_window_rate,
params_grads=None,
min_average_window=10000,
max_average_window=10000,
**kwargs):
......@@ -849,23 +849,36 @@ class ModelAverage(Optimizer):
self.average_window = average_window_rate
self.min_average_window = min_average_window
self.max_average_window = max_average_window
self.params_grads = params_grads
self.params_grads = [] if params_grads is None else params_grads
params = {}
for param, grad in self.params_grads:
if param.do_model_average != False:
params[param.name] = (param, grad)
for param in framework.default_main_program().global_block(
).all_parameters():
if param.name not in params and param.do_model_average != False:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
dtype=param.dtype,
persistable=False,
stop_gradient=True)
params[param.name] = (param, grad)
self.params_grads = params.values()
for param, grad in self.params_grads:
if grad is not None:
self._append_average_accumulate_op(param)
self.apply_program = Program()
block = self.apply_program.global_block()
with program_guard(main_program=self.apply_program):
for param_grad in self.params_grads:
if param_grad[1] is not None:
self._add_average_apply_op(block, param_grad)
self.restore_program = Program()
block = self.restore_program.global_block()
with program_guard(main_program=self.restore_program):
for param_grad in self.params_grads:
if param_grad[1] is not None:
self._add_average_restore_op(block, param_grad)
def _add_average_apply_op(self, block, param_grad):
......
......@@ -28,13 +28,15 @@ class ParamAttr(object):
learning_rate=1.0,
regularizer=None,
trainable=True,
gradient_clip=None):
gradient_clip=None,
do_model_average=None):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
self.gradient_clip = gradient_clip
self.model_average = do_model_average
def set_default_initializer(self, initializer):
if initializer is None:
......@@ -80,7 +82,8 @@ class ParamAttr(object):
},
'regularizer': self.regularizer,
'trainable': self.trainable,
'gradient_clip_attr': self.gradient_clip
'gradient_clip_attr': self.gradient_clip,
'model_average': self.model_average
}
if with_initializer:
kwargs['initializer'] = self.initializer
......
......@@ -14,23 +14,13 @@
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import paddle.fluid as fluid
from op_test import OpTest
from paddle.fluid.framework import grad_var_name
def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars():
var = scope.var(input)
var.get_tensor()
for output in backward_op.output_vars():
var = scope.var(output)
var.get_tensor()
return backward_op
def _reference_testing(x, scale, offset, mean, var, epsilon, data_format):
x_shape = x.shape
if len(x_shape) == 2:
......@@ -64,11 +54,6 @@ def _reference_testing(x, scale, offset, mean, var, epsilon, data_format):
def _reference_training(x, scale, offset, epsilon, data_format):
x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
if data_format == "NCHW":
n, c, h, w = x.shape
......@@ -88,8 +73,6 @@ def _reference_training(x, scale, offset, epsilon, data_format):
offset_tile = np.reshape(offset, (1, c, 1, 1))
offset_tile = np.reshape(offset_tile, (1, c, 1, 1))
y = normalized * scale_tile + offset_tile
if len(x_shape) == 2:
y = np.reshape(y, (y.shape[0], y.shape[1]))
return y, mean, var
elif data_format == "NHWC":
x_square = x * x
......@@ -100,59 +83,42 @@ def _reference_training(x, scale, offset, epsilon, data_format):
var = x_square_sum / element_count - mean * mean
normalized = (x - mean) / np.sqrt(var + epsilon)
y = normalized * scale + offset
if len(x_shape) == 2:
y = np.reshape(y, x_shape)
return y, mean, var
else:
raise ValueError("Unknown data order.")
def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format):
# Use the following formulas to calculate gradients:
# grad_scale =
# sum(grad_y * (x - mean)) * rsqrt(var + epsilon)
#
# grad_offset = sum(output_y)
#
# grad_x =
# x_grad =
# 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], grad_y.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], 1, 1, grad_y.shape[1]))
if data_format == "NCHW":
x = np.transpose(x, (0, 2, 3, 1))
grad_y = np.transpose(grad_y, (0, 2, 3, 1))
y_grad = np.transpose(y_grad, (0, 2, 3, 1))
# raise ValueError("data_format must be NHWC, got %s." % data_format)
grad_x = scale * (grad_y - np.mean(
grad_y, axis=(0, 1, 2)) - (x - mean) * np.mean(
grad_y * (x - mean), axis=(0, 1, 2)) /
x_grad = scale * (y_grad - np.mean(
y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean(
y_grad * (x - mean), axis=(0, 1, 2)) /
(var + epsilon)) / np.sqrt(var + epsilon)
grad_scale = np.sum(grad_y * (x - mean) / np.sqrt(var + epsilon),
grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon),
axis=(0, 1, 2))
grad_offset = np.sum(grad_y, axis=(0, 1, 2))
grad_offset = np.sum(y_grad, axis=(0, 1, 2))
# transfer back to N, C, H, W
if data_format == "NCHW":
grad_x = np.transpose(grad_x, (0, 3, 1, 2))
x_grad = np.transpose(x_grad, (0, 3, 1, 2))
x = np.transpose(x, (0, 3, 1, 2))
grad_y = np.transpose(grad_y, (0, 3, 1, 2))
y_grad = np.transpose(y_grad, (0, 3, 1, 2))
if len(x_shape) == 2:
grad_x = np.reshape(grad_x, x_shape)
return grad_x, grad_scale, grad_offset
return x_grad, grad_scale, grad_offset
def create_or_get_tensor(scope, var_name, var, place):
......@@ -186,7 +152,7 @@ def set_output_grad(scope, outputs, place, feed_dict=None):
__set_tensor__(output, data)
class TestBatchNormOpInference(OpTest):
class TestBatchNormOpInference(unittest.TestCase):
def setUp(self):
self.dtype = np.float32
......@@ -304,231 +270,121 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference):
self.check_with_place(place, data_format, self.dtype, [2, 3])
class TestBatchNormOpTraining(OpTest):
class TestBatchNormOpTraining(unittest.TestCase):
def __assert_close(self, tensor, np_array, msg, atol=1e-4):
if not np.allclose(np.array(tensor), np_array, atol=atol):
import pdb
pdb.set_trace()
self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg)
def test_python_testing(self):
data_format = "NHWC"
epsilon = 0.00001
n, h, w, c = 2, 3, 4, 5
x_shape = [n, h, w, c]
scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance,
epsilon, "NHWC")
# running N, C, H, W case
# should produce the same results
x_shape2 = [n, c, h, w]
x_val2 = np.transpose(x_val, (0, 3, 1, 2))
y_out2 = _reference_testing(x_val2, scale_val, bias_val, mean, variance,
epsilon, "NCHW")
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1))
self.__assert_close(y_out, y_out2_trans, "inference output")
print 'python: NHWC, NCHW, inference checking passed'
def test_python_training(self):
data_format = "NHWC"
epsilon = 0.00001
momentum = 0.9
# N, H, W, C: 2, 3, 4, 2
n, h, w, c = 2, 3, 4, 5
x_shape = [n, h, w, c]
scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
# run forward
y_out, saved_mean, var_ref = _reference_training(
x_val, scale_val, bias_val, epsilon, "NHWC")
#
mean_out = saved_mean * (1. - momentum) + momentum * mean
variance_out = var_ref * (1. - momentum) + momentum * variance
saved_variance = 1. / np.sqrt(var_ref + epsilon)
# running N, C, H, W case
# should produce the same results
x_shape2 = [n, c, h, w]
x_val2 = np.transpose(x_val, (0, 3, 1, 2))
y_out2, saved_mean2, var_ref2 = _reference_training(
x_val2, scale_val, bias_val, epsilon, "NCHW")
self.__assert_close(saved_mean, saved_mean2, "batch mean")
self.__assert_close(var_ref, var_ref2, "batch variance")
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1))
self.__assert_close(y_out, y_out2_trans, "batch output")
print 'python: NHWC, NCHW, forward checking passed'
# test backward now
# NHWC
self.y_grad = np.random.random_sample(x_shape).astype(np.float32)
y_grad = self.y_grad
# y_grad = np.ones(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, "NHWC")
# NCHW
y_grad2 = np.transpose(y_grad, (0, 3, 1, 2))
# y_grad2 = np.ones(x_shape2).astype(np.float32)
x_grad_ref2, scale_grad_ref2, bias_grad_ref2 = _reference_grad(
x_val2, y_grad2, scale_val, saved_mean2, var_ref2, epsilon, "NCHW")
self.__assert_close(scale_grad_ref, scale_grad_ref2, "scale gradient")
self.__assert_close(bias_grad_ref, bias_grad_ref2, "bias gradient")
x_grad_transpose = np.transpose(x_grad_ref2, (0, 2, 3, 1))
self.__assert_close(x_grad_ref, x_grad_transpose, "x gradient")
print 'python: NHWC, NCHW, backward checking passed'
def test_forward_backward(self):
def test_with_place(place, data_layout, shape):
# attr
epsilon = 0.00001
momentum = 0.9
if len(shape) == 2:
x_shape = shape
c = shape[1]
if data_layout == "NCHW":
n, c, h, w = shape[0], shape[1], shape[2], shape[3]
else:
# n, h, w, c = 2, 3, 4, 2
n, h, w, c = shape[0], shape[1], shape[2], shape[3]
if data_format == "NHWC":
x_shape = [n, h, w, c]
elif data_format == "NCHW":
x_shape = [n, c, h, w]
else:
raise ValueError("Unknown data type.")
scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32)
scale_val = np.random.random_sample(scale_shape).astype(np.float32)
bias_val = np.random.random_sample(scale_shape).astype(np.float32)
np.random.seed(123)
x = np.random.random_sample(shape).astype(np.float32)
scale = np.random.random_sample(scale_shape).astype(np.float32)
bias = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
# run forward
y_out, saved_mean, var_ref = _reference_training(
x_val, scale_val, bias_val, epsilon, data_format)
# update moving mean and variance
y, saved_mean, var_ref = _reference_training(x, scale, bias,
epsilon, data_layout)
mean_out = saved_mean * (1. - momentum) + momentum * mean
variance_out = var_ref * (1. - momentum) + momentum * variance
saved_variance = 1. / np.sqrt(var_ref + epsilon)
# for gradient test
# y_grad = np.ones(x_shape).astype(np.float32)
y_grad = np.zeros(x_shape).astype(np.float32)
if len(y_grad.shape) == 2:
y_grad[0, 0] = 1.
else:
y_grad[0, 0, 0, 0] = 1.
# y_grad = np.random.random_sample(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon,
data_format)
scope = core.Scope()
# create input
x_tensor = create_or_get_tensor(scope, "x_val", x_val, place)
scale_tensor = create_or_get_tensor(scope, "scale_val", scale_val,
place)
bias_tensor = create_or_get_tensor(scope, "bias_val", bias_val,
place)
mean_tensor = create_or_get_tensor(scope, "mean", mean, place)
variance_tensor = create_or_get_tensor(scope, "variance", variance,
place)
# create output
y_tensor = create_or_get_tensor(scope, "y_out", None, place)
saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None,
place)
saved_variance_tensor = create_or_get_tensor(
scope, "saved_variance", None, place)
mean_out_tensor = mean_tensor
variance_out_tensor = variance_tensor
batch_norm_op = Operator(
"batch_norm",
# inputs
X="x_val",
Scale="scale_val",
Bias="bias_val",
Mean="mean",
Variance="variance",
# outputs
Y="y_out",
MeanOut="mean",
VarianceOut="variance",
SavedMean="saved_mean",
SavedVariance="saved_variance",
# attrs
is_test=False,
data_layout=data_layout,
momentum=momentum,
epsilon=epsilon)
batch_norm_op.run(scope, place)
# check forward result
self.__assert_close(y_tensor, y_out, "y_out")
self.__assert_close(saved_mean_tensor, saved_mean, "saved_mean")
self.__assert_close(saved_variance_tensor, saved_variance,
"saved_variance")
self.__assert_close(mean_out_tensor, mean_out, "mean_out")
if isinstance(place, core.CUDAPlace):
atol = 5e-2
else:
atol = 1e-4
self.__assert_close(variance_out_tensor, variance_out,
"variance_out", atol)
print "op test forward passed: ", str(place), data_layout
# run backward
batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set())
set_output_grad(
scope,
["y_out", "mean", "variance", "saved_mean", "saved_variance"],
place,
feed_dict={"y_out": y_grad})
batch_norm_op_grad.run(scope, place)
x_grad_tensor = create_or_get_tensor(scope,
grad_var_name("x_val"), None,
place)
scale_grad_tensor = create_or_get_tensor(scope,
grad_var_name("scale_val"),
None, place)
bias_grad_tensor = create_or_get_tensor(scope,
grad_var_name("bias_val"),
None, place)
y_grad = np.random.random_sample(shape).astype(np.float32)
x_grad, scale_grad, bias_grad = _reference_grad(
x, y_grad, scale, saved_mean, var_ref, epsilon, data_format)
var_dict = locals()
var_dict['y@GRAD'] = y_grad
var_names = [
'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean',
'saved_variance'
]
ground_truth = {name: var_dict[name] for name in var_names}
program = fluid.Program()
with fluid.program_guard(program):
block = program.global_block()
for name in ground_truth:
block.create_var(
name=name,
dtype='float32',
shape=ground_truth[name].shape)
bn_op = block.append_op(
type="batch_norm",
inputs={
"X": block.var('x'),
"Scale": block.var('scale'),
"Bias": block.var('bias'),
"Mean": block.var('mean'),
"Variance": block.var('variance')
},
outputs={
"Y": block.var('y'),
"MeanOut": block.var('mean'), # share the same memory
"VarianceOut":
block.var('variance'), # share the same memory
"SavedMean": block.var('saved_mean'),
"SavedVariance": block.var('saved_variance')
},
attrs={
"momentum": momentum,
"epsilon": epsilon,
"is_test": False,
"data_layout": data_layout
})
block.create_var(name='y@GRAD', dtype='float32', shape=y.shape)
# generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
bn_op.desc, set(), [])
grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(grad_op_desc)
for var_name in grad_op_desc.output_arg_names():
block.desc.var(var_name.encode("ascii"))
grad_op_desc.infer_var_type(block.desc)
grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32)
exe = fluid.Executor(place)
out = exe.run(
program,
feed={
name: var_dict[name]
for name in
['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD']
},
fetch_list=[
'y', 'mean', 'variance', 'saved_mean', 'saved_variance',
'x@GRAD', 'scale@GRAD', 'bias@GRAD'
])
self.__assert_close(y, out[0], "y")
self.__assert_close(mean_out, out[1], "mean")
self.__assert_close(variance_out, out[2], "variance", 1e-3)
self.__assert_close(saved_mean, out[3], "saved_mean")
self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3)
self.__assert_close(x_grad, out[5], "x_grad")
self.__assert_close(scale_grad, out[6], "scale_grad")
self.__assert_close(bias_grad, out[7], "bias_grad")
# check gradient output
self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad")
self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad")
self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad")
print "op test backward passed: ", str(place), data_layout
print "op test forward passed: ", str(place), data_layout
places = [core.CPUPlace()]
if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"):
......@@ -537,7 +393,6 @@ class TestBatchNormOpTraining(OpTest):
for place in places:
for data_format in ["NCHW", "NHWC"]:
test_with_place(place, data_format, [2, 3, 4, 5])
test_with_place(place, data_format, [2, 3])
if __name__ == '__main__':
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import paddle.fluid.core as core
import unittest
import numpy as np
from paddle.fluid.op import Operator, CondOp
class PySimpleCond(object):
'''
A simple implementation of dynamic if-else based on numpy
'''
def __init__(self):
array = [1] * 10
for i in range(1, 10, 2):
array[i] = 0
self.cond = np.array(array)
self.x = np.ones(shape=(10, 1)).astype("float32")
def forward(self):
self.index_t = np.where(self.cond == 1)
self.index_f = np.where(self.cond == 0)
y_t = self.x[self.index_t]
y_f = self.x[self.index_f]
y_t = y_t * 2.
y_f = y_f * (-2.)
output = np.zeros(shape=(10, 1))
output[self.index_t] = y_t
output[self.index_f] = y_f
return output
class PySimpleCondTest(unittest.TestCase):
def setUp(self):
self.condnn = PySimpleCond()
def test_forward(self):
output = self.condnn.forward()
def create_tensor(scope, name, shape, np_data):
tensor = scope.var(name).get_tensor()
tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace())
return tensor
class TestCondOp(unittest.TestCase):
'''
Test CondOp
equation:
cond = [True, False, True, False, ...]
y[index_t] = x[index_t] * 2.
y[index_f] = x[index_f] * -2.
outputs:
y
'''
def setUp(self):
self.py_cond = PySimpleCond()
def forward(self):
self.scope = core.Scope()
self.create_global_variables()
self.create_cond_op()
self.create_sub_net()
self.condop.run(self.scope, core.CPUPlace())
return np.array(self.scope.find_var("Out").get_tensor())
def create_global_variables(self):
x_np_data = self.py_cond.x
create_tensor(self.scope, "X", [10, 1], x_np_data)
cond_np_data = self.py_cond.cond.astype("int32")
create_tensor(self.scope, "cond", [10, 1], cond_np_data)
self.scope.var("SubScopes")
self.scope.var("IndexTensors")
self.scope.var("Out")
def create_cond_op(self):
self.condop = CondOp(
Cond="cond",
Xs=["X"],
Outs=["Out"],
SubScopes="SubScopes",
IndexTensors="IndexTensors")
def create_sub_net(self):
truenet = core.Net.create()
scale_op_t = Operator("scale", X='X', Out='Out', scale=2.)
truenet.append_op(scale_op_t)
truenet.complete_add_op(True)
self.condop.set_truenet(truenet)
falsenet = core.Net.create()
scale_op_t = Operator("scale", X='X', Out='Out', scale=-2.)
falsenet.append_op(scale_op_t)
falsenet.complete_add_op(True)
self.condop.set_falsenet(falsenet)
def test_forward(self):
print 'test cond op forward'
pd_output = self.forward()
py_output = self.py_cond.forward()
print 'pd_output', pd_output
print
print 'py_output', py_output
self.assertEqual(pd_output.shape, py_output.shape)
print 'test passed'
return 0
if __name__ == "__main__":
unittest.main()
......@@ -32,7 +32,6 @@ class TestBook(unittest.TestCase):
cost = layers.square_error_cost(input=y_predict, label=y)
avg_cost = layers.mean(cost)
self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost)
print(str(program))
......@@ -94,8 +93,6 @@ class TestBook(unittest.TestCase):
cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(cost)
program.append_backward(avg_cost)
print(str(program))
def test_word_embedding(self):
......
......@@ -473,7 +473,7 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
loss = simple_fc_net(True)
test_program = main.clone(for_test=True)
opt = fluid.optimizer.SGD(learning_rate=0.0001)
opt = fluid.optimizer.SGD(learning_rate=0.001)
opt.minimize(loss)
batch_size = 32
......@@ -500,4 +500,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
train_loss, = train_exe.run([loss.name], feed_dict=feed_dict)
train_loss = numpy.array(train_loss)
self.assertTrue(numpy.allclose(train_loss, test_loss))
self.assertTrue(
numpy.allclose(
train_loss, test_loss, atol=1e-8),
"Train loss: " + str(train_loss) + "\n Test loss:" +
str(test_loss))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册