提交 4cf76cfd 编写于 作者: M minqiyang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_reader_op_in_pserver

...@@ -65,6 +65,7 @@ option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better d ...@@ -65,6 +65,7 @@ option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better d
option(WITH_ANAKIN "Compile with Anakin library" OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
......
...@@ -125,6 +125,10 @@ def parse_args(): ...@@ -125,6 +125,10 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--use_inference_transpiler', '--use_inference_transpiler',
action='store_true', action='store_true',
help='If set, uses inference transpiler to optimize the program.') help='If set, use inference transpiler to optimize the program.')
parser.add_argument(
'--no_random',
action='store_true',
help='If set, keep the random seed and do not shuffle the data.')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -132,10 +132,6 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, ...@@ -132,10 +132,6 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
exe.run(startup_prog) exe.run(startup_prog)
# Use inference_transpiler to speedup # Use inference_transpiler to speedup
if args.use_inference_transpiler:
t = fluid.InferenceTranspiler()
t.transpile(infer_prog, place)
if not args.use_reader_op: if not args.use_reader_op:
feed_var_list = [ feed_var_list = [
var for var in train_prog.global_block().vars.itervalues() var for var in train_prog.global_block().vars.itervalues()
...@@ -186,6 +182,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, ...@@ -186,6 +182,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))), print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))),
# evaluation # evaluation
if not args.no_test and batch_acc and not args.use_reader_op: if not args.no_test and batch_acc and not args.use_reader_op:
if args.use_inference_transpiler:
t = fluid.InferenceTranspiler()
t.transpile(infer_prog, place)
pass_test_acc = test(exe, infer_prog, test_reader, feeder, pass_test_acc = test(exe, infer_prog, test_reader, feeder,
batch_acc) batch_acc)
print(", Test Accuracy: %f" % pass_test_acc) print(", Test Accuracy: %f" % pass_test_acc)
...@@ -316,6 +316,8 @@ def main(): ...@@ -316,6 +316,8 @@ def main():
args = parse_args() args = parse_args()
print_arguments(args) print_arguments(args)
print_paddle_envs() print_paddle_envs()
if args.no_random:
fluid.default_startup_program().random_seed = 1
# the unique trainer id, starting from 0, needed by trainer # the unique trainer id, starting from 0, needed by trainer
# only # only
......
...@@ -197,12 +197,12 @@ def get_model(args): ...@@ -197,12 +197,12 @@ def get_model(args):
optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
batched_train_reader = paddle.batch( batched_train_reader = paddle.batch(
paddle.reader.shuffle( train_reader if args.no_random else paddle.reader.shuffle(
train_reader, buf_size=5120), train_reader, buf_size=5120),
batch_size=args.batch_size * args.gpus, batch_size=args.batch_size * args.gpus,
drop_last=True) drop_last=True)
batched_test_reader = paddle.batch( batched_test_reader = paddle.batch(
train_reader, batch_size=args.batch_size, drop_last=True) test_reader, batch_size=args.batch_size, drop_last=True)
return avg_cost, inference_program, optimizer, batched_train_reader,\ return avg_cost, inference_program, optimizer, batched_train_reader,\
batched_test_reader, batch_acc batched_test_reader, batch_acc
...@@ -83,18 +83,20 @@ else() ...@@ -83,18 +83,20 @@ else()
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib) set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
endif() endif()
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS if(WITH_SYSTEM_BLAS)
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) ${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_FOUND ON) set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER REFERENCE) set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY}) set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY})
add_definitions(-DPADDLE_USE_REFERENCE_CBLAS) add_definitions(-DPADDLE_USE_REFERENCE_CBLAS)
message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
endif()
endif() endif()
if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND) if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND)
......
...@@ -7,18 +7,18 @@ if(NOT WITH_FLUID_ONLY) ...@@ -7,18 +7,18 @@ if(NOT WITH_FLUID_ONLY)
add_subdirectory(legacy/parameter) add_subdirectory(legacy/parameter)
if(MOBILE_INFERENCE) if(MOBILE_INFERENCE)
add_subdirectory(capi) add_subdirectory(legacy/capi)
else() else()
add_subdirectory(legacy/pserver) add_subdirectory(legacy/pserver)
add_subdirectory(trainer) add_subdirectory(trainer)
add_subdirectory(scripts) add_subdirectory(scripts)
if(WITH_C_API) if(WITH_C_API)
add_subdirectory(capi) add_subdirectory(legacy/capi)
endif() endif()
if(WITH_SWIG_PY) if(WITH_SWIG_PY)
add_subdirectory(api) add_subdirectory(legacy/api)
endif() endif()
endif() endif()
endif() endif()
......
...@@ -25,11 +25,12 @@ else() ...@@ -25,11 +25,12 @@ else()
cc_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) cc_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
endif() endif()
cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor)
cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope) cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope)
cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle) scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle)
cc_library(ssa_graph_builder_factory SRCS ssa_graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer ssa_graph_checker) cc_library(ssa_graph_builder_factory SRCS ssa_graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer ssa_graph_checker)
......
...@@ -33,6 +33,8 @@ struct BuildStrategy { ...@@ -33,6 +33,8 @@ struct BuildStrategy {
GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice}; GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice};
std::string debug_graphviz_path_{""}; std::string debug_graphviz_path_{""};
bool enable_data_balance_{true};
}; };
} // namespace details } // namespace details
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/data_balance_op_handle.h"
#include <algorithm>
#include "paddle/fluid/framework/details/container_cast.h"
namespace paddle {
namespace framework {
namespace details {
#ifdef PADDLE_WITH_CUDA
DataBalanceOpHandle::DataBalanceOpHandle(
const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
const platform::NCCLContextMap *ctxs)
: local_scopes_(local_scopes), places_(places) {
if (ctxs) {
for (auto &p : places_) {
this->dev_ctxes_[p] = ctxs->DevCtx(p);
}
}
}
#else
DataBalanceOpHandle::DataBalanceOpHandle(
const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places)
: local_scopes_(local_scopes), places_(places) {}
#endif
std::string DataBalanceOpHandle::Name() const { return "data balance"; }
std::vector<std::array<int, 3>> DataBalanceOpHandle::GetBalancePlan(
const std::vector<int> &device_sizes) {
int device_num = device_sizes.size();
int total_size = 0;
int empty_num = 0;
std::vector<std::array<int, 2>> size_device_vec;
size_device_vec.reserve(device_num);
for (int i = 0; i < device_num; ++i) {
if (device_sizes[i] == 0) {
++empty_num;
}
total_size += device_sizes[i];
size_device_vec.push_back({{device_sizes[i], i}});
}
std::vector<std::array<int, 3>> res;
if (empty_num == 0) {
// No need to do data balance.
return res;
}
if (total_size < device_num) {
// No enough data.
PADDLE_THROW("There is no next data.");
}
std::sort(size_device_vec.begin(), size_device_vec.end(),
[](const std::array<int, 2> &a, const std::array<int, 2> &b) {
return a[0] > b[0];
});
int expected_device_size = total_size / device_num;
int src_idx = 0;
for (int dst_idx = device_num - empty_num; dst_idx < device_num; ++dst_idx) {
if (size_device_vec[src_idx][0] <= expected_device_size) {
++src_idx;
PADDLE_ENFORCE_LT(
src_idx, device_num - empty_num,
"In current srategy an empty tensor should not be copy source.");
}
size_device_vec[src_idx][0] -= expected_device_size;
size_device_vec[dst_idx][0] += expected_device_size;
res.push_back({{size_device_vec[src_idx][1], size_device_vec[dst_idx][1],
expected_device_size}});
}
return res;
}
void DataBalanceOpHandle::RunImpl() {
if (places_.size() == 1) {
return;
}
auto in_var_handles = DynamicCast<VarHandle>(inputs_);
auto out_var_handles = DynamicCast<VarHandle>(outputs_);
PADDLE_ENFORCE(in_var_handles.size() % places_.size() == 0);
PADDLE_ENFORCE_EQ(
in_var_handles.size(), out_var_handles.size(),
"The NoDummyInputSize and NoDummyOutputSize should be equal.");
int data_num = in_var_handles.size() / places_.size();
WaitInputVarGenerated();
std::vector<std::vector<LoDTensor *>> lod_tensors(data_num);
std::vector<int> device_sizes;
for (int i = 0; i < static_cast<int>(in_var_handles.size()); ++i) {
PADDLE_ENFORCE_EQ(in_var_handles[i]->name_, out_var_handles[i]->name_,
"The name of input and output should be equal.");
int place_idx = i / data_num;
int data_idx = i % data_num;
auto *local_scope =
local_scopes_[place_idx]->FindVar(kLocalExecScopeName)->Get<Scope *>();
auto *tensor_var = local_scope->FindVar(in_var_handles[i]->name_);
PADDLE_ENFORCE(tensor_var->IsType<LoDTensor>());
auto *tensor = tensor_var->GetMutable<LoDTensor>();
lod_tensors[data_idx].push_back(tensor);
int ins_size =
tensor->lod().empty() ? tensor->dims()[0] : tensor->NumElements();
if (data_idx == 0) {
device_sizes.emplace_back(ins_size);
} else {
PADDLE_ENFORCE_EQ(
ins_size, device_sizes.at(place_idx),
"All data on the same device shall have the same batch size.");
}
}
const auto &balance_plan = GetBalancePlan(device_sizes);
for (const auto &trans : balance_plan) {
for (int data_idx = 0; data_idx < data_num; ++data_idx) {
LoDTensor *src_tensor = lod_tensors[data_idx][trans[0]];
LoDTensor *dst_tensor = lod_tensors[data_idx][trans[1]];
int trans_ins_size = trans[2];
LoD src_lod = src_tensor->lod();
int src_ins_size =
src_lod.empty() ? src_tensor->dims()[0] : src_tensor->NumElements();
int cut_point = src_ins_size - trans_ins_size;
if (!src_lod.empty()) {
for (auto &level : src_lod) {
cut_point = level[cut_point];
}
}
TensorCopySync(src_tensor->Slice(cut_point, src_tensor->dims()[0]),
dst_tensor->place(), dst_tensor);
src_tensor->ShareDataWith(src_tensor->Slice(0, cut_point));
if (!src_lod.empty()) {
dst_tensor->set_lod(SliceInLevel(
src_lod, 0, src_ins_size - trans_ins_size, src_ins_size));
src_tensor->set_lod(
SliceInLevel(src_lod, 0, 0, src_ins_size - trans_ins_size));
}
}
}
}
} // namespace details
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#endif
namespace paddle {
namespace framework {
namespace details {
struct DataBalanceOpHandle : public OpHandleBase {
public:
#ifdef PADDLE_WITH_CUDA
DataBalanceOpHandle(const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places,
const platform::NCCLContextMap *ctxs);
#else
DataBalanceOpHandle(const std::vector<Scope *> &local_scopes,
const std::vector<platform::Place> &places);
#endif
std::string Name() const override;
bool IsMultiDeviceTransfer() override { return false; };
protected:
void RunImpl() override;
private:
// std::vector<(src_dev_id, dst_dev_id, trans_size)>
std::vector<std::array<int, 3>> GetBalancePlan(
const std::vector<int> &batch_size_per_device);
const std::vector<Scope *> local_scopes_;
const std::vector<platform::Place> places_;
};
} // namespace details
} // namespace framework
} // namespace paddle
...@@ -67,8 +67,8 @@ void FetchOpHandle::RunImpl() { ...@@ -67,8 +67,8 @@ void FetchOpHandle::RunImpl() {
#endif #endif
} else { } else {
tensors_[i].ShareDataWith(t); tensors_[i].ShareDataWith(t);
tensors_[i].set_lod(t.lod());
} }
tensors_[i].set_lod(t.lod());
} }
this->WaitAndMergeCPUTensors(); this->WaitAndMergeCPUTensors();
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "paddle/fluid/framework/details/all_reduce_op_handle.h" #include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/broadcast_op_handle.h" #include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/data_balance_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h" #include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/rpc_op_handle.h" #include "paddle/fluid/framework/details/rpc_op_handle.h"
...@@ -215,7 +216,14 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build( ...@@ -215,7 +216,14 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
} else { } else {
// This op runs on all devices, and its output may have parameter's // This op runs on all devices, and its output may have parameter's
// gradients. // gradients.
if (op->Type() == "read" && strategy_.enable_data_balance_) {
op->SetAttr("throw_eof_exp", false);
CreateComputationalOps(&result, *op, places_.size()); CreateComputationalOps(&result, *op, places_.size());
const auto &data_var_names = op->Output("Out");
InsertDataBalanceOp(&result, data_var_names);
} else {
CreateComputationalOps(&result, *op, places_.size());
}
if (!is_forwarding && places_.size() > 1) { if (!is_forwarding && places_.size() > 1) {
// Currently, we assume that once gradient is generated, it can be // Currently, we assume that once gradient is generated, it can be
...@@ -360,6 +368,29 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result, ...@@ -360,6 +368,29 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result,
} }
} }
void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
SSAGraph *result, const std::vector<std::string> &datas) const {
#ifdef PADDLE_WITH_CUDA
result->ops_.emplace_back(
new DataBalanceOpHandle(local_scopes_, places_, nccl_ctxs_));
#else
result->ops_.emplace_back(new DataBalanceOpHandle(local_scopes_, places_));
#endif
auto *op_handle = result->ops_.back().get();
for (size_t i = 0; i < places_.size(); ++i) {
auto &p = places_[i];
SetCommunicationContext(op_handle, p);
for (const std::string &d_name : datas) {
auto &vars = result->vars_[i][d_name];
PADDLE_ENFORCE(!vars.empty());
op_handle->AddInput(vars.back().get());
auto var = new VarHandle(vars.size(), i, d_name, p);
vars.emplace_back(var);
op_handle->AddOutput(var);
}
}
}
bool MultiDevSSAGraphBuilder::IsParameterGradientOnce( bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
const std::string &og, const std::string &og,
std::unordered_set<std::string> *og_has_been_broadcast) const { std::unordered_set<std::string> *og_has_been_broadcast) const {
...@@ -512,7 +543,8 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(SSAGraph *result, ...@@ -512,7 +543,8 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(SSAGraph *result,
op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]); op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]);
// the variable name which contains .block means it was splited by // the variable name which contains .block means it was splited by
// split_byref op // split_byref op
// so that we can balance the variable blocks to all the pserver instances. // so that we can balance the variable blocks to all the pserver
// instances.
if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce && if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce &&
op.InputArgumentNames()[0].find(".block") == std::string::npos) { op.InputArgumentNames()[0].find(".block") == std::string::npos) {
op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames()); op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames());
......
...@@ -101,6 +101,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { ...@@ -101,6 +101,9 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
void InsertAllReduceOp(SSAGraph *result, const std::string &og) const; void InsertAllReduceOp(SSAGraph *result, const std::string &og) const;
void InsertDataBalanceOp(SSAGraph *result,
const std::vector<std::string> &datas) const;
void CreateBroadcastOp(SSAGraph *result, const std::string &p_name, void CreateBroadcastOp(SSAGraph *result, const std::string &p_name,
size_t src_dev_id) const; size_t src_dev_id) const;
......
...@@ -58,8 +58,10 @@ void OpHandleBase::Run(bool use_cuda) { ...@@ -58,8 +58,10 @@ void OpHandleBase::Run(bool use_cuda) {
void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) { void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_NOT_NULL(waited_ctx);
if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) { if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) {
for (auto &dev_ctx : dev_ctxes_) { for (auto &dev_ctx : dev_ctxes_) {
PADDLE_ENFORCE_NOT_NULL(dev_ctx.second);
dev_ctx.second->Wait(); dev_ctx.second->Wait();
} }
} else { } else {
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
...@@ -68,9 +69,9 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { ...@@ -68,9 +69,9 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
// only print first ten elements // only print first ten elements
int64_t size = t.numel() < 10 ? t.numel() : 10; int64_t size = t.numel() < 10 ? t.numel() : 10;
for (int64_t i = 0; i < size; ++i) { for (int64_t i = 0; i < size; ++i) {
if (t.type().hash_code() == typeid(float).hash_code()) { // NOLINT if (IsType<float>(t.type())) {
os << t.data<float>()[i] << " "; os << t.data<float>()[i] << " ";
} else if (t.type().hash_code() == typeid(int64_t).hash_code()) { } else if (IsType<int64_t>(t.type())) {
os << t.data<int64_t>()[i] << " "; os << t.data<int64_t>()[i] << " ";
} else { } else {
PADDLE_THROW("LoDTensor data type not in [float, int64_t]"); PADDLE_THROW("LoDTensor data type not in [float, int64_t]");
...@@ -89,6 +90,7 @@ std::string LoDToString(const LoD &lod) { ...@@ -89,6 +90,7 @@ std::string LoDToString(const LoD &lod) {
LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin, LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
size_t elem_end) { size_t elem_end) {
PADDLE_ENFORCE_LT(level, in.size()); PADDLE_ENFORCE_LT(level, in.size());
PADDLE_ENFORCE_LT(elem_begin, elem_end);
PADDLE_ENFORCE_LT(elem_end, in[level].size()); PADDLE_ENFORCE_LT(elem_end, in[level].size());
LoD res; LoD res;
...@@ -384,7 +386,7 @@ void LoDTensor::MergeLoDTensor( ...@@ -384,7 +386,7 @@ void LoDTensor::MergeLoDTensor(
LoD new_lod = lod_tensors[0]->lod(); LoD new_lod = lod_tensors[0]->lod();
for (size_t i = 1; i < lod_tensors.size(); ++i) { for (size_t i = 1; i < lod_tensors.size(); ++i) {
auto *t = lod_tensors[i]; auto *t = lod_tensors[i];
PADDLE_ENFORCE_EQ(new_type.hash_code(), t->type().hash_code()); PADDLE_ENFORCE_EQ(new_type, t->type());
PADDLE_ENFORCE_EQ(new_layout, t->layout()); PADDLE_ENFORCE_EQ(new_layout, t->layout());
PADDLE_ENFORCE_EQ(framework::product(new_dim) / new_dim[0], PADDLE_ENFORCE_EQ(framework::product(new_dim) / new_dim[0],
...@@ -392,6 +394,7 @@ void LoDTensor::MergeLoDTensor( ...@@ -392,6 +394,7 @@ void LoDTensor::MergeLoDTensor(
new_dim[0] += t->dims()[0]; new_dim[0] += t->dims()[0];
auto &lod = t->lod(); auto &lod = t->lod();
PADDLE_ENFORCE_EQ(new_lod.size(), lod.size());
for (size_t j = 0; j < lod.size(); ++j) { for (size_t j = 0; j < lod.size(); ++j) {
auto &sub_lod = new_lod[j]; auto &sub_lod = new_lod[j];
auto &offset = sub_lod.back(); auto &offset = sub_lod.back();
......
...@@ -76,13 +76,9 @@ class OpRegistry { ...@@ -76,13 +76,9 @@ class OpRegistry {
template <typename PlaceType, bool at_end, size_t I, typename... KernelType> template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
struct OpKernelRegistrarFunctor; struct OpKernelRegistrarFunctor;
template <typename PlaceType, size_t I, typename... KernelTypes> template <typename PlaceType, typename T, typename Func>
struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> { inline void RegisterKernelClass(const char* op_type, const char* library_type,
using KERNEL_TYPE = Func func) {
typename std::tuple_element<I, std::tuple<KernelTypes...>>::type;
void operator()(const char* op_type, const char* library_type) const {
using T = typename KERNEL_TYPE::ELEMENT_TYPE;
std::string library(library_type); std::string library(library_type);
std::string data_layout = "ANYLAYOUT"; std::string data_layout = "ANYLAYOUT";
if (library == "MKLDNN") { if (library == "MKLDNN") {
...@@ -91,8 +87,20 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> { ...@@ -91,8 +87,20 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(), OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(),
StringToDataLayout(data_layout), StringToDataLayout(data_layout),
StringToLibraryType(library_type)); StringToLibraryType(library_type));
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); OperatorWithKernel::AllOpKernels()[op_type][key] = func;
}
template <typename PlaceType, size_t I, typename... KernelTypes>
struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
using KERNEL_TYPE =
typename std::tuple_element<I, std::tuple<KernelTypes...>>::type;
void operator()(const char* op_type, const char* library_type) const {
using T = typename KERNEL_TYPE::ELEMENT_TYPE;
RegisterKernelClass<PlaceType, T>(
op_type, library_type, [](const framework::ExecutionContext& ctx) {
KERNEL_TYPE().Compute(ctx);
});
constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value; constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...> OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
func; func;
...@@ -116,6 +124,47 @@ class OpKernelRegistrar : public Registrar { ...@@ -116,6 +124,47 @@ class OpKernelRegistrar : public Registrar {
} }
}; };
template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
struct OpKernelRegistrarFunctorEx;
template <typename PlaceType, typename... DataTypeAndKernelType>
class OpKernelRegistrarEx : public Registrar {
public:
explicit OpKernelRegistrarEx(const char* op_type, const char* library_type) {
OpKernelRegistrarFunctorEx<PlaceType, false, 0, DataTypeAndKernelType...>
func;
func(op_type, library_type);
}
};
template <typename PlaceType, size_t I, typename... DataTypeAndKernelType>
struct OpKernelRegistrarFunctorEx<PlaceType, true, I,
DataTypeAndKernelType...> {
void operator()(const char* op_type, const char* library_type) const {}
};
template <typename PlaceType, size_t I, typename... DataTypeAndKernelType>
struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
DataTypeAndKernelType...> {
using Functor =
typename std::tuple_element<I + 1,
std::tuple<DataTypeAndKernelType...>>::type;
using T =
typename std::tuple_element<I,
std::tuple<DataTypeAndKernelType...>>::type;
void operator()(const char* op_type, const char* library_type) const {
RegisterKernelClass<PlaceType, T>(op_type, library_type, Functor());
constexpr auto size =
std::tuple_size<std::tuple<DataTypeAndKernelType...>>::value;
OpKernelRegistrarFunctorEx<PlaceType, I + 2 >= size, I + 2,
DataTypeAndKernelType...>
func;
func(op_type, library_type);
}
};
/** /**
* check if MACRO is used in GLOBAL NAMESPACE. * check if MACRO is used in GLOBAL NAMESPACE.
*/ */
...@@ -174,6 +223,25 @@ class OpKernelRegistrar : public Registrar { ...@@ -174,6 +223,25 @@ class OpKernelRegistrar : public Registrar {
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \ #define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
#define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##op_type##_##library_type##__, \
"REGISTER_OP_KERNEL_EX must be called in global namespace"); \
static ::paddle::framework::OpKernelRegistrarEx<place_class, __VA_ARGS__> \
__op_kernel_registrar_##op_type##_##library_type##__(#op_type, \
#library_type); \
int TouchOpKernelRegistrar_##op_type##_##library_type() { \
__op_kernel_registrar_##op_type##_##library_type##__.Touch(); \
return 0; \
}
#define REGISTER_OP_CUDA_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX(op_type, CUDA, ::paddle::platform::CUDAPlace, \
__VA_ARGS__)
#define REGISTER_OP_CPU_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
/** /**
* Macro to mark what Operator and Kernel * Macro to mark what Operator and Kernel
* we will use and tell the compiler to * we will use and tell the compiler to
......
...@@ -592,8 +592,7 @@ static void CheckTensorNANOrInf(const std::string& name, ...@@ -592,8 +592,7 @@ static void CheckTensorNANOrInf(const std::string& name,
if (tensor.memory_size() == 0) { if (tensor.memory_size() == 0) {
return; return;
} }
if (tensor.type().hash_code() != typeid(float).hash_code() && // NOLINT if (!IsType<float>(tensor.type()) && !IsType<double>(tensor.type())) {
tensor.type().hash_code() != typeid(double).hash_code()) { // NOLINT
return; return;
} }
PADDLE_ENFORCE(!framework::TensorContainsInf(tensor), PADDLE_ENFORCE(!framework::TensorContainsInf(tensor),
...@@ -652,7 +651,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -652,7 +651,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
dev_ctx = pool.Get(expected_kernel_key.place_); dev_ctx = pool.Get(expected_kernel_key.place_);
} }
kernel_iter->second->Compute(ExecutionContext(*this, exec_scope, *dev_ctx)); kernel_iter->second(ExecutionContext(*this, exec_scope, *dev_ctx));
if (!transfered_inplace_vars.empty()) { if (!transfered_inplace_vars.empty()) {
// there is inplace variable has been transfered. // there is inplace variable has been transfered.
......
...@@ -347,9 +347,9 @@ class OpKernel : public OpKernelBase { ...@@ -347,9 +347,9 @@ class OpKernel : public OpKernelBase {
class OperatorWithKernel : public OperatorBase { class OperatorWithKernel : public OperatorBase {
public: public:
using OpKernelFunc = std::function<void(const ExecutionContext&)>;
using OpKernelMap = using OpKernelMap =
std::unordered_map<OpKernelType, std::unique_ptr<OpKernelBase>, std::unordered_map<OpKernelType, OpKernelFunc, OpKernelType::Hash>;
OpKernelType::Hash>;
OperatorWithKernel(const std::string& type, const VariableNameMap& inputs, OperatorWithKernel(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap& attrs) const VariableNameMap& outputs, const AttributeMap& attrs)
......
...@@ -24,18 +24,24 @@ limitations under the License. */ ...@@ -24,18 +24,24 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
template <typename T>
bool IsType(const std::type_index& type_index) {
return type_index == std::type_index(typeid(T));
}
inline proto::VarType::Type ToVarType(std::type_index type) { inline proto::VarType::Type ToVarType(std::type_index type) {
if (type.hash_code() == typeid(LoDTensor).hash_code()) { if (IsType<LoDTensor>(type)) {
return proto::VarType_Type_LOD_TENSOR; return proto::VarType_Type_LOD_TENSOR;
} else if (type.hash_code() == typeid(LoDRankTable).hash_code()) { } else if (IsType<LoDRankTable>(type)) {
return proto::VarType_Type_LOD_RANK_TABLE; return proto::VarType_Type_LOD_RANK_TABLE;
} else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) { } else if (IsType<LoDTensorArray>(type)) {
return proto::VarType_Type_LOD_TENSOR_ARRAY; return proto::VarType_Type_LOD_TENSOR_ARRAY;
} else if (type.hash_code() == typeid(SelectedRows).hash_code()) { } else if (IsType<SelectedRows>(type)) {
return proto::VarType_Type_SELECTED_ROWS; return proto::VarType_Type_SELECTED_ROWS;
} else if (type.hash_code() == typeid(ReaderHolder).hash_code()) { } else if (IsType<ReaderHolder>(type)) {
return proto::VarType_Type_READER; return proto::VarType_Type_READER;
} else if (type.hash_code() == typeid(ChannelHolder).hash_code()) { } else if (IsType<ChannelHolder>(type)) {
return proto::VarType_Type_CHANNEL; return proto::VarType_Type_CHANNEL;
} else { } else {
PADDLE_THROW("ToVarType:Unsupported type %s", type.name()); PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <cstdio> #include <cstdio>
#include <string> #include <string>
#include <typeindex>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
...@@ -41,7 +42,7 @@ int AccuDims(Vec &&vec, int size) { ...@@ -41,7 +42,7 @@ int AccuDims(Vec &&vec, int size) {
return res; return res;
} }
#define SET_TYPE(type__) dic_[typeid(type__).hash_code()] = #type__; #define SET_TYPE(type__) dic_[std::type_index(typeid(type__))] = #type__;
/* /*
* Map typeid to representation. * Map typeid to representation.
*/ */
...@@ -53,14 +54,14 @@ struct DataTypeNamer { ...@@ -53,14 +54,14 @@ struct DataTypeNamer {
template <typename T> template <typename T>
const std::string &repr() const { const std::string &repr() const {
auto x = typeid(T).hash_code(); auto x = std::type_index(typeid(T));
PADDLE_ENFORCE(dic_.count(x), "unknown type for representation"); PADDLE_ENFORCE(dic_.count(x), "unknown type for representation");
return dic_.at(x); return dic_.at(x);
} }
const std::string &repr(size_t &hash) const { // NOLINT const std::string &repr(const std::type_index &type) const { // NOLINT
PADDLE_ENFORCE(dic_.count(hash), "unknown type for representation"); PADDLE_ENFORCE(dic_.count(type), "unknown type for representation");
return dic_.at(hash); return dic_.at(type);
} }
private: private:
...@@ -71,9 +72,7 @@ struct DataTypeNamer { ...@@ -71,9 +72,7 @@ struct DataTypeNamer {
SET_TYPE(void *); SET_TYPE(void *);
} }
std::unordered_map<decltype(typeid(int).hash_code()), // NOLINT std::unordered_map<std::type_index, std::string> dic_;
std::string>
dic_;
}; };
#undef SET_TYPE #undef SET_TYPE
......
...@@ -23,9 +23,9 @@ namespace analysis { ...@@ -23,9 +23,9 @@ namespace analysis {
template <> template <>
std::string &NodeAttr::As<std::string>() { std::string &NodeAttr::As<std::string>() {
if (data_.empty()) { if (data_.empty()) {
type_hash_ = typeid(std::string).hash_code(); type_index_ = std::type_index(typeid(std::string));
} }
PADDLE_ENFORCE_EQ(type_hash_, typeid(std::string).hash_code()); PADDLE_ENFORCE_EQ(type_index_, std::type_index(typeid(std::string)));
return data_; return data_;
} }
......
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/inference/analysis/device.h" #include "paddle/fluid/inference/analysis/device.h"
#include "paddle/fluid/inference/analysis/dot.h" #include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/helper.h"
...@@ -57,12 +58,12 @@ struct NodeAttr { ...@@ -57,12 +58,12 @@ struct NodeAttr {
// init storage in the first usage. // init storage in the first usage.
if (data_.empty()) { if (data_.empty()) {
VLOG(4) << "resize data to " << sizeof(T); VLOG(4) << "resize data to " << sizeof(T);
type_hash_ = typeid(T).hash_code(); type_index_ = std::type_index(typeid(T));
data_.resize(sizeof(T)); data_.resize(sizeof(T));
} }
PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), PADDLE_ENFORCE(framework::IsType<T>(type_index_),
"type not matched, origin is %s, want %s", "type not matched, origin is %s, want %s",
DataTypeNamer::Global().repr(type_hash_), DataTypeNamer::Global().repr(type_index_),
DataTypeNamer::Global().repr<T>()); DataTypeNamer::Global().repr<T>());
PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error"); PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error");
return *reinterpret_cast<T *>(&data_[0]); return *reinterpret_cast<T *>(&data_[0]);
...@@ -70,7 +71,7 @@ struct NodeAttr { ...@@ -70,7 +71,7 @@ struct NodeAttr {
private: private:
std::string data_; std::string data_;
size_t type_hash_{std::numeric_limits<size_t>::max()}; std::type_index type_index_{typeid(NodeAttr)};
}; };
/* /*
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -47,7 +48,7 @@ class ConditionalOp : public framework::OperatorBase { ...@@ -47,7 +48,7 @@ class ConditionalOp : public framework::OperatorBase {
if (!(ips.size() == 1UL && ips[0]->IsInitialized())) { if (!(ips.size() == 1UL && ips[0]->IsInitialized())) {
PADDLE_THROW("should have one initialized input as condition"); PADDLE_THROW("should have one initialized input as condition");
} }
if (!(ips[0]->type().hash_code() == typeid(bool).hash_code() && // NOLINT if (!(framework::IsType<bool>(ips[0]->type()) && // NOLINT
ips[0]->numel() == 1)) { ips[0]->numel() == 1)) {
PADDLE_THROW( PADDLE_THROW(
"condition input's data type should be bool, " "condition input's data type should be bool, "
......
...@@ -115,6 +115,7 @@ class MKLDNNMemory { ...@@ -115,6 +115,7 @@ class MKLDNNMemory {
template <typename T> template <typename T>
class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> { class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override { void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace."); "It must use CPUPlace.");
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <ctime> #include <ctime>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
namespace paddle { namespace paddle {
...@@ -62,7 +63,7 @@ struct Formater { ...@@ -62,7 +63,7 @@ struct Formater {
} }
} }
void PrintDtype() { void PrintDtype() {
if (dtype.hash_code() != typeid(const char).hash_code()) { if (!framework::IsType<const char>(dtype)) {
CLOG << "\tdtype: " << dtype.name() << std::endl; CLOG << "\tdtype: " << dtype.name() << std::endl;
} }
} }
...@@ -83,15 +84,15 @@ struct Formater { ...@@ -83,15 +84,15 @@ struct Formater {
void PrintData(size_t size) { void PrintData(size_t size) {
PADDLE_ENFORCE_NOT_NULL(data); PADDLE_ENFORCE_NOT_NULL(data);
// print float // print float
if (dtype.hash_code() == typeid(const float).hash_code()) { if (framework::IsType<const float>(dtype)) {
Display<float>(size); Display<float>(size);
} else if (dtype.hash_code() == typeid(const double).hash_code()) { } else if (framework::IsType<const double>(dtype)) {
Display<double>(size); Display<double>(size);
} else if (dtype.hash_code() == typeid(const int).hash_code()) { } else if (framework::IsType<const int>(dtype)) {
Display<int>(size); Display<int>(size);
} else if (dtype.hash_code() == typeid(const int64_t).hash_code()) { } else if (framework::IsType<const int64_t>(dtype)) {
Display<int64_t>(size); Display<int64_t>(size);
} else if (dtype.hash_code() == typeid(const bool).hash_code()) { } else if (framework::IsType<const bool>(dtype)) {
Display<bool>(size); Display<bool>(size);
} else { } else {
CLOG << "\tdata: unprintable type: " << dtype.name() << std::endl; CLOG << "\tdata: unprintable type: " << dtype.name() << std::endl;
......
...@@ -66,9 +66,19 @@ class ReadOp : public framework::OperatorBase { ...@@ -66,9 +66,19 @@ class ReadOp : public framework::OperatorBase {
std::vector<std::string> out_arg_names = Outputs("Out"); std::vector<std::string> out_arg_names = Outputs("Out");
std::vector<framework::LoDTensor> ins; std::vector<framework::LoDTensor> ins;
reader->ReadNext(&ins); reader->ReadNext(&ins);
PADDLE_ENFORCE(!ins.empty(), "There is no next data."); if (ins.empty()) {
if (Attr<bool>("throw_eof_exp")) {
PADDLE_THROW("There is no next data.");
} else {
ins.resize(out_arg_names.size());
for (auto& tensor : ins) {
// data type is not important for subsequent DataBalanceOpHandle
tensor.mutable_data<float>(framework::make_ddim({0}), dev_place);
}
}
}
PADDLE_ENFORCE_EQ(ins.size(), out_arg_names.size()); PADDLE_ENFORCE_EQ(ins.size(), out_arg_names.size());
for (size_t i = 0; i < ins.size(); ++i) { for (size_t i = 0; i < out_arg_names.size(); ++i) {
auto* out = auto* out =
scope.FindVar(out_arg_names[i])->GetMutable<framework::LoDTensor>(); scope.FindVar(out_arg_names[i])->GetMutable<framework::LoDTensor>();
out->ShareDataWith(ins[i]); out->ShareDataWith(ins[i]);
...@@ -82,6 +92,10 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -82,6 +92,10 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override { void Make() override {
AddInput("Reader", "(ReaderHolder) The executed reader."); AddInput("Reader", "(ReaderHolder) The executed reader.");
AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable(); AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable();
AddAttr<bool>("throw_eof_exp",
"If set true, an exception will be thrown when the Reader "
"yields empty (which means there is no next data).")
.SetDefault(true);
AddComment(R"DOC( AddComment(R"DOC(
Read Operator Read Operator
......
...@@ -12,14 +12,108 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,14 +12,108 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/reshape_op.h"
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class ReshapeOp : public framework::OperatorWithKernel {
public:
ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ReshapeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of ReshapeOp should not be null.");
const std::vector<int> &shape = ctx->Attrs().Get<std::vector<int>>("shape");
PADDLE_ENFORCE(!shape.empty(),
"The shape information must be set by Attr(shape).");
if (ctx->HasInput("Shape") && ctx->IsRuntime()) {
// If true, set the shape of Output(Out) according to Input(Shape) in
// ReshapeKernel with ExecutionContext. Also check LoD in ReshapeKernel.
ctx->ShareLoD("X", /*->*/ "Out");
return;
}
auto x_dims = ctx->GetInputDim("X");
auto out_dims = ValidateShape(shape, x_dims);
ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx->ShareLoD("X", /*->*/ "Out");
}
}
static framework::DDim ValidateShape(const std::vector<int> shape,
const framework::DDim &in_dims) {
const int64_t in_size = framework::product(in_dims);
// only one dimension can be set to -1, whose size will be automatically
// infered.
const int64_t unk_dim_val = -1;
const int64_t copy_dim_val = 0;
std::vector<int64_t> output_shape(shape.size(), 0);
int64_t capacity = 1;
int unk_dim_idx = -1;
for (size_t i = 0; i < shape.size(); ++i) {
if (shape[i] == unk_dim_val) {
PADDLE_ENFORCE(
unk_dim_idx == -1,
"Only one input dimension of Attr(shape) can be unknown.");
unk_dim_idx = i;
} else if (shape[i] == copy_dim_val) {
PADDLE_ENFORCE(
static_cast<int>(i) < in_dims.size(),
"The index of dimension to copy from input shape must be less "
"than the size of input shape.");
} else {
PADDLE_ENFORCE(
shape[i] > 0,
"Each input dimension of Attr(shape) must not be negtive except "
"one unknown dimension.");
}
capacity *= (shape[i] ? shape[i] : in_dims[i]);
output_shape[i] =
(shape[i] ? static_cast<int64_t>(shape[i]) : in_dims[i]);
}
if (unk_dim_idx != -1) {
if (in_size > 0) {
// in_size < 0 and is un-determinate in compile time, skip the check,
// for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8],
// capacity = -24, in_size = -8, output_shape[0] = 0
// the following check will fail.
output_shape[unk_dim_idx] = -in_size / capacity;
PADDLE_ENFORCE_EQ(output_shape[unk_dim_idx] * capacity, -in_size,
"Invalid shape is given.");
} else {
output_shape[unk_dim_idx] = -1;
}
} else {
PADDLE_ENFORCE_EQ(capacity, in_size, "Invalid shape is given.");
}
return framework::make_ddim(output_shape);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker { class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
...@@ -107,19 +201,93 @@ class ReshapeGradOp : public framework::OperatorWithKernel { ...@@ -107,19 +201,93 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
} }
}; };
class ReshapeKernel {
public:
void operator()(const framework::ExecutionContext &ctx) const {
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *in = ctx.Input<framework::LoDTensor>("X");
auto *shape_tensor = ctx.HasInput("Shape")
? ctx.Input<framework::LoDTensor>("Shape")
: nullptr;
framework::DDim out_dims = out->dims();
if (shape_tensor) {
auto *shape_data = shape_tensor->data<int>();
framework::Tensor cpu_shape_tensor;
if (platform::is_gpu_place(ctx.GetPlace())) {
TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor);
shape_data = cpu_shape_tensor.data<int>();
}
auto shape =
std::vector<int>(shape_data, shape_data + shape_tensor->numel());
out_dims = ReshapeOp::ValidateShape(shape, in->dims());
}
if (!in->lod().empty()) {
PADDLE_ENFORCE_EQ(
out_dims[0], in->dims()[0],
"Reshape operator cannot reshape an input sequence batch "
"into an output sequence batch that has a different "
"number of time steps. Please consider using "
"sequence_reshape op.");
}
bool inplace = ctx.Attr<bool>("inplace");
out->Resize(out_dims);
if (!inplace) {
out->mutable_data(ctx.GetPlace(), in->type());
framework::TensorCopySync(*in, ctx.GetPlace(), out);
out->Resize(out_dims);
} else {
out->ShareDataWith(*in);
out->Resize(out_dims);
}
}
};
class ReshapeGradKernel {
public:
void operator()(const framework::ExecutionContext &ctx) const {
auto *d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->mutable_data(ctx.GetPlace(), d_out->type());
bool inplace = ctx.Attr<bool>("inplace");
auto in_dims = d_x->dims();
if (!inplace) {
framework::TensorCopy(*d_out, ctx.GetPlace(), ctx.device_context(), d_x);
ctx.device_context().Wait();
d_x->Resize(in_dims);
} else {
d_x->ShareDataWith(*d_out);
d_x->Resize(in_dims);
}
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, REGISTER_OPERATOR(reshape, ops::ReshapeOp, ops::ReshapeOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>); paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(reshape_grad, ops::ReshapeGradOp); REGISTER_OPERATOR(reshape_grad, ops::ReshapeGradOp);
REGISTER_OP_CPU_KERNEL(reshape, ops::ReshapeKernel<CPU, float>, REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
ops::ReshapeKernel<CPU, double>, ops::ReshapeKernel, int, ops::ReshapeKernel,
ops::ReshapeKernel<CPU, int>, int64_t, ops::ReshapeKernel);
ops::ReshapeKernel<CPU, int64_t>); REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
REGISTER_OP_CPU_KERNEL(reshape_grad, ops::ReshapeGradKernel<CPU, float>, double, ops::ReshapeGradKernel, int,
ops::ReshapeGradKernel<CPU, double>, ops::ReshapeGradKernel, int64_t,
ops::ReshapeGradKernel<CPU, int>, ops::ReshapeGradKernel);
ops::ReshapeGradKernel<CPU, int64_t>);
#ifdef PADDLE_WITH_CUDA
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double,
ops::ReshapeKernel, int, ops::ReshapeKernel,
int64_t, ops::ReshapeKernel);
REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
double, ops::ReshapeGradKernel, int,
ops::ReshapeGradKernel, int64_t,
ops::ReshapeGradKernel);
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/reshape_op.h"
using CUDA = paddle::platform::CUDADeviceContext;
REGISTER_OP_CUDA_KERNEL(reshape, paddle::operators::ReshapeKernel<CUDA, float>,
paddle::operators::ReshapeKernel<CUDA, double>,
paddle::operators::ReshapeKernel<CUDA, int>,
paddle::operators::ReshapeKernel<CUDA, int64_t>);
REGISTER_OP_CUDA_KERNEL(reshape_grad,
paddle::operators::ReshapeGradKernel<CUDA, float>,
paddle::operators::ReshapeGradKernel<CUDA, double>,
paddle::operators::ReshapeGradKernel<CUDA, int>,
paddle::operators::ReshapeGradKernel<CUDA, int64_t>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
class ReshapeOp : public framework::OperatorWithKernel {
public:
ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ReshapeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of ReshapeOp should not be null.");
const std::vector<int> &shape = ctx->Attrs().Get<std::vector<int>>("shape");
PADDLE_ENFORCE(!shape.empty(),
"The shape information must be set by Attr(shape).");
if (ctx->HasInput("Shape") && ctx->IsRuntime()) {
// If true, set the shape of Output(Out) according to Input(Shape) in
// ReshapeKernel with ExecutionContext. Also check LoD in ReshapeKernel.
ctx->ShareLoD("X", /*->*/ "Out");
return;
}
auto x_dims = ctx->GetInputDim("X");
auto out_dims = ValidateShape(shape, x_dims);
ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx->ShareLoD("X", /*->*/ "Out");
}
}
static framework::DDim ValidateShape(const std::vector<int> shape,
const framework::DDim &in_dims) {
const int64_t in_size = framework::product(in_dims);
// only one dimension can be set to -1, whose size will be automatically
// infered.
const int64_t unk_dim_val = -1;
const int64_t copy_dim_val = 0;
std::vector<int64_t> output_shape(shape.size(), 0);
int64_t capacity = 1;
int unk_dim_idx = -1;
for (size_t i = 0; i < shape.size(); ++i) {
if (shape[i] == unk_dim_val) {
PADDLE_ENFORCE(
unk_dim_idx == -1,
"Only one input dimension of Attr(shape) can be unknown.");
unk_dim_idx = i;
} else if (shape[i] == copy_dim_val) {
PADDLE_ENFORCE(
static_cast<int>(i) < in_dims.size(),
"The index of dimension to copy from input shape must be less "
"than the size of input shape.");
} else {
PADDLE_ENFORCE(
shape[i] > 0,
"Each input dimension of Attr(shape) must not be negtive except "
"one unknown dimension.");
}
capacity *= (shape[i] ? shape[i] : in_dims[i]);
output_shape[i] =
(shape[i] ? static_cast<int64_t>(shape[i]) : in_dims[i]);
}
if (unk_dim_idx != -1) {
if (in_size > 0) {
// in_size < 0 and is un-determinate in compile time, skip the check,
// for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8],
// capacity = -24, in_size = -8, output_shape[0] = 0
// the following check will fail.
output_shape[unk_dim_idx] = -in_size / capacity;
PADDLE_ENFORCE_EQ(output_shape[unk_dim_idx] * capacity, -in_size,
"Invalid shape is given.");
} else {
output_shape[unk_dim_idx] = -1;
}
} else {
PADDLE_ENFORCE_EQ(capacity, in_size, "Invalid shape is given.");
}
return framework::make_ddim(output_shape);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
template <typename DeviceContext, typename T>
class ReshapeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const {
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *in = ctx.Input<framework::LoDTensor>("X");
auto *shape_tensor = ctx.HasInput("Shape")
? ctx.Input<framework::LoDTensor>("Shape")
: nullptr;
framework::DDim out_dims = out->dims();
if (shape_tensor) {
auto *shape_data = shape_tensor->data<int>();
framework::Tensor cpu_shape_tensor;
if (platform::is_gpu_place(ctx.GetPlace())) {
TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor);
shape_data = cpu_shape_tensor.data<int>();
}
auto shape =
std::vector<int>(shape_data, shape_data + shape_tensor->numel());
out_dims = ReshapeOp::ValidateShape(shape, in->dims());
}
if (!in->lod().empty()) {
PADDLE_ENFORCE_EQ(
out_dims[0], in->dims()[0],
"Reshape operator cannot reshape an input sequence batch "
"into an output sequence batch that has a different "
"number of time steps. Please consider using "
"sequence_reshape op.");
}
bool inplace = ctx.Attr<bool>("inplace");
out->Resize(out_dims);
if (!inplace) {
out->mutable_data<T>(ctx.GetPlace());
framework::TensorCopySync(*in, ctx.GetPlace(), out);
out->Resize(out_dims);
} else {
out->ShareDataWith(*in);
out->Resize(out_dims);
}
}
};
template <typename DeviceContext, typename T>
class ReshapeGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const {
auto *d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->mutable_data<T>(ctx.GetPlace());
bool inplace = ctx.Attr<bool>("inplace");
auto in_dims = d_x->dims();
if (!inplace) {
framework::TensorCopy(*d_out, ctx.GetPlace(), ctx.device_context(), d_x);
ctx.device_context().Wait();
d_x->Resize(in_dims);
} else {
d_x->ShareDataWith(*d_out);
d_x->Resize(in_dims);
}
}
};
} // namespace operators
} // namespace paddle
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
namespace paddle { namespace paddle {
...@@ -135,15 +136,14 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -135,15 +136,14 @@ class WhileGradOp : public framework::OperatorBase {
auto &og_inside = auto &og_inside =
detail::Ref(cur_scope.Var(inside_og_name), detail::Ref(cur_scope.Var(inside_og_name),
"Cannot find inside gradient %s", inside_og_name); "Cannot find inside gradient %s", inside_og_name);
if (og_outside.Type().hash_code() == if (framework::IsType<framework::LoDTensor>(og_outside.Type())) {
typeid(framework::LoDTensor).hash_code()) {
auto &outside_tensor = og_outside.Get<framework::LoDTensor>(); auto &outside_tensor = og_outside.Get<framework::LoDTensor>();
auto &inside_tensor = auto &inside_tensor =
detail::Ref(og_inside.GetMutable<framework::LoDTensor>()); detail::Ref(og_inside.GetMutable<framework::LoDTensor>());
inside_tensor.set_lod(outside_tensor.lod()); inside_tensor.set_lod(outside_tensor.lod());
inside_tensor.ShareDataWith(outside_tensor); inside_tensor.ShareDataWith(outside_tensor);
} else if (og_outside.Type().hash_code() == } else if (framework::IsType<framework::LoDTensorArray>(
typeid(framework::LoDTensorArray).hash_code()) { og_outside.Type())) {
auto &outside_array = og_outside.Get<framework::LoDTensorArray>(); auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
auto &inside_array = auto &inside_array =
detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>()); detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
......
...@@ -643,7 +643,11 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -643,7 +643,11 @@ All parameter, weight, gradient are variables in Paddle.
[](const BuildStrategy &self) { return self.debug_graphviz_path_; }, [](const BuildStrategy &self) { return self.debug_graphviz_path_; },
[](BuildStrategy &self, const std::string &path) { [](BuildStrategy &self, const std::string &path) {
self.debug_graphviz_path_ = path; self.debug_graphviz_path_ = path;
}); })
.def_property(
"enable_data_balance",
[](const BuildStrategy &self) { return self.enable_data_balance_; },
[](BuildStrategy &self, bool b) { self.enable_data_balance_ = b; });
pe.def(py::init<const std::vector<platform::Place> &, pe.def(py::init<const std::vector<platform::Place> &,
const std::unordered_set<std::string> &, const std::unordered_set<std::string> &,
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
%include "std_string.i" %include "std_string.i"
%{ %{
#define SWIG_FILE_WITH_INIT #define SWIG_FILE_WITH_INIT
#include "api/PaddleAPI.h" #include "legacy/api/PaddleAPI.h"
%} %}
%include "exception.i" %include "exception.i"
...@@ -199,4 +199,4 @@ namespace std { ...@@ -199,4 +199,4 @@ namespace std {
%ignore OptimizationConfigPrivate; %ignore OptimizationConfigPrivate;
%ignore ParameterTraverseCallbackPrivate; %ignore ParameterTraverseCallbackPrivate;
%include "utils/GlobalConstants.h" %include "utils/GlobalConstants.h"
%include "api/PaddleAPI.h" %include "legacy/api/PaddleAPI.h"
...@@ -106,7 +106,7 @@ function cmake_gen() { ...@@ -106,7 +106,7 @@ function cmake_gen() {
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF}
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_ANAKIN=${WITH_ANAKIN:-ON} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON}
======================================== ========================================
EOF EOF
...@@ -135,7 +135,7 @@ EOF ...@@ -135,7 +135,7 @@ EOF
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \ -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_ANAKIN=${WITH_ANAKIN:-ON} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON}
} }
......
...@@ -18,7 +18,7 @@ import time ...@@ -18,7 +18,7 @@ import time
import shutil import shutil
from paddle.fluid.evaluator import Evaluator from paddle.fluid.evaluator import Evaluator
from paddle.fluid.framework import Program, Parameter, default_main_program, Variable from paddle.fluid.framework import Program, Parameter, default_main_program, default_startup_program, Variable
from . import core from . import core
__all__ = [ __all__ = [
...@@ -1374,3 +1374,101 @@ def get_latest_checkpoint_serial(checkpoint_dir): ...@@ -1374,3 +1374,101 @@ def get_latest_checkpoint_serial(checkpoint_dir):
if success_num > current_dir: if success_num > current_dir:
current_dir = success_num current_dir = success_num
return current_dir return current_dir
def get_test_program(filelist, program=None, startup_program=None):
"""
Transpile current train program to a program to read test dataset
if the program is using reader ops like "open_files_op".
"""
def _copy_reader_var_(block, var, new_name=None):
if new_name == None:
new_name = var.name
new_var = block.create_var(
name=str(new_name), type=core.VarDesc.VarType.READER)
new_var.desc.set_shapes(var.desc.shapes())
new_var.desc.set_dtypes(var.desc.dtypes())
new_var.persistable = True
return new_var
def _get_test_reader_name(train_reader_name):
return train_reader_name + "_test"
def _is_reader_op(op):
block = op.block
if "Out" in op.output_names:
reader_out = block.vars[op.output("Out")[0]]
if reader_out.type == core.VarDesc.VarType.READER:
return True
return False
if program == None:
program = default_main_program()
if startup_program == None:
startup_program = default_startup_program()
startup_block = startup_program.global_block()
# 1. find out the orignal reader var name
startup_reader_op_list = []
for op in startup_block.ops:
if _is_reader_op(op):
startup_reader_op_list.append(op)
if len(startup_reader_op_list) == 0:
return program
root_reader_op = startup_reader_op_list[0]
train_test_reader_map = {}
# 2. add operators to startup to read open and read test data files
for op in startup_reader_op_list:
assert (len(op.output("Out")) == 1)
train_reader_name = op.output("Out")[0]
train_reader = startup_block.vars[train_reader_name]
test_reader = _copy_reader_var_(
startup_block,
train_reader,
new_name=_get_test_reader_name(train_reader_name))
train_test_reader_map[train_reader.name] = test_reader
test_op_inputs = {}
for name in op.input_names:
train_arg_names = op.input(name)
test_arg_vars = []
for arg_name in train_arg_names:
arg_var = train_test_reader_map[
arg_name] if name == "UnderlyingReader" else startup_block.vars[
arg_name]
test_arg_vars.append(arg_var)
test_op_inputs[name] = test_arg_vars
test_op = startup_block.append_op(
type=op.type,
inputs=test_op_inputs,
outputs={'Out': [test_reader]},
attrs=op.attrs)
# root reader op's filelist attr for read test files
if op.type == root_reader_op.type:
test_op.set_attr("file_names", filelist)
if op.type == "create_multi_pass_reader":
test_op.set_attr("pass_num", 1)
# 3. rename reader vars in inference program to different name
# to avoid read from train data.
main_block = program.global_block()
for var in main_block.vars.values():
if var.type == core.VarDesc.VarType.READER:
main_block.rename_var(
str(var.name), str(_get_test_reader_name(var.name)))
for op in main_block.ops:
if op.type == root_reader_op.type:
test_op.set_attr("file_names", filelist)
if op.type == "create_multi_pass_reader":
test_op.set_attr("pass_num", 1)
startup_program.sync_with_cpp()
program.sync_with_cpp()
return program
...@@ -4,3 +4,5 @@ mnist_1.recordio ...@@ -4,3 +4,5 @@ mnist_1.recordio
mnist_2.recordio mnist_2.recordio
flowers.recordio flowers.recordio
wmt16.recordio wmt16.recordio
data_balance_test.recordio
data_balance_with_lod_test.recordio
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
import paddle.v2 as paddle
import numpy as np
class TestDataBalance(unittest.TestCase):
def prepare_data(self):
def fake_data_generator():
for n in xrange(self.total_ins_num):
yield np.ones((3, 4)) * n, n
# Prepare data
with fluid.program_guard(fluid.Program(), fluid.Program()):
reader = paddle.batch(
fake_data_generator, batch_size=self.batch_size)
feeder = fluid.DataFeeder(
feed_list=[
fluid.layers.data(
name='image', shape=[3, 4], dtype='float32'),
fluid.layers.data(
name='label', shape=[1], dtype='int64'),
],
place=fluid.CPUPlace())
self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file(
self.data_file_name, reader, feeder)
def prepare_lod_data(self):
def fake_data_generator():
for n in xrange(1, self.total_ins_num + 1):
d1 = (np.ones((n, 3)) * n).astype('float32')
d2 = (np.array(n).reshape((1, 1))).astype('int32')
yield d1, d2
# Prepare lod data
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.recordio_writer.create_recordio_writer(
filename=self.lod_data_file_name) as writer:
eof = False
generator = fake_data_generator()
while (not eof):
data_batch = [
np.array([]).reshape((0, 3)), np.array([]).reshape(
(0, 1))
]
lod = [0]
for _ in xrange(self.batch_size):
try:
ins = generator.next()
except StopIteration:
eof = True
break
for i, d in enumerate(ins):
data_batch[i] = np.concatenate(
(data_batch[i], d), axis=0)
lod.append(lod[-1] + ins[0].shape[0])
if data_batch[0].shape[0] > 0:
for i, d in enumerate(data_batch):
t = fluid.LoDTensor()
t.set(data_batch[i], fluid.CPUPlace())
if i == 0:
t.set_lod([lod])
writer.append_tensor(t)
writer.complete_append_tensor()
def setUp(self):
self.use_cuda = fluid.core.is_compiled_with_cuda()
self.data_file_name = './data_balance_test.recordio'
self.lod_data_file_name = './data_balance_with_lod_test.recordio'
self.total_ins_num = 50
self.batch_size = 10
self.prepare_data()
self.prepare_lod_data()
def main(self):
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_prog, startup_prog):
data_reader = fluid.layers.io.open_files(
filenames=[self.data_file_name],
shapes=[[-1, 3, 4], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
if self.use_cuda:
data_reader = fluid.layers.double_buffer(data_reader)
image, label = fluid.layers.read_file(data_reader)
place = fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
parallel_exe = fluid.ParallelExecutor(
use_cuda=self.use_cuda, main_program=main_prog)
if (parallel_exe.device_count > self.batch_size):
print("WARNING: Unittest TestDataBalance skipped. \
For the result is not correct when device count \
is larger than batch size.")
exit(0)
fetch_list = [image.name, label.name]
data_appeared = [False] * self.total_ins_num
while (True):
try:
image_val, label_val = parallel_exe.run(fetch_list,
return_numpy=True)
except fluid.core.EnforceNotMet as ex:
self.assertIn("There is no next data.", ex.message)
break
ins_num = image_val.shape[0]
broadcasted_label = np.ones(
(ins_num, 3, 4)) * label_val.reshape((ins_num, 1, 1))
self.assertEqual(image_val.all(), broadcasted_label.all())
for l in label_val:
self.assertFalse(data_appeared[l[0]])
data_appeared[l[0]] = True
for i in data_appeared:
self.assertTrue(i)
def main_lod(self):
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_prog, startup_prog):
data_reader = fluid.layers.io.open_files(
filenames=[self.lod_data_file_name],
shapes=[[-1, 3], [-1, 1]],
lod_levels=[1, 0],
dtypes=['float32', 'int32'],
thread_num=1)
ins, label = fluid.layers.read_file(data_reader)
place = fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
parallel_exe = fluid.ParallelExecutor(
use_cuda=self.use_cuda, main_program=main_prog)
if (parallel_exe.device_count > self.batch_size):
print("WARNING: Unittest TestDataBalance skipped. \
For the result is not correct when device count \
is larger than batch size.")
exit(0)
fetch_list = [ins.name, label.name]
data_appeared = [False] * self.total_ins_num
while (True):
try:
ins_tensor, label_tensor = parallel_exe.run(
fetch_list, return_numpy=False)
except fluid.core.EnforceNotMet as ex:
self.assertIn("There is no next data.", ex.message)
break
ins_val = np.array(ins_tensor)
label_val = np.array(label_tensor)
ins_lod = ins_tensor.lod()[0]
self.assertEqual(ins_val.shape[1], 3)
self.assertEqual(label_val.shape[1], 1)
self.assertEqual(len(ins_lod) - 1, label_val.shape[0])
for i in range(0, len(ins_lod) - 1):
ins_elem = ins_val[ins_lod[i]:ins_lod[i + 1]][:]
label_elem = label_val[i][0]
self.assertEqual(ins_elem.all(), label_elem.all())
self.assertFalse(data_appeared[int(label_elem - 1)])
data_appeared[int(label_elem - 1)] = True
for i in data_appeared:
self.assertTrue(i)
def test_all(self):
self.main()
self.main_lod()
...@@ -15,51 +15,248 @@ ...@@ -15,51 +15,248 @@
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.transpiler.distribute_transpiler import delete_ops from paddle.fluid.transpiler.distribute_transpiler import delete_ops
import traceback
from transpiler_test import TranspilerTest
class TranspilerTest(unittest.TestCase):
class TestDistTranspiler(TranspilerTest):
def setUp(self): def setUp(self):
self.current_pserver_ep = "127.0.0.1:6174" self.trainer_id = 0
self.trainers = 2
self.pservers = 2
# NOTE: we do not actually bind this port
self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175"
self.pserver1_ep = "127.0.0.1:6174"
self.pserver2_ep = "127.0.0.1:6175"
self.slice_var_up = True
self.sync_mode = True
self.transpiler = None
def net_conf(self):
x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
y_predict = fluid.layers.fc(input=x,
size=1000,
act=None,
param_attr=fluid.ParamAttr(name='fc_w'),
bias_attr=fluid.ParamAttr(name='fc_b'))
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
sgd_optimizer.minimize(avg_cost)
return
def get_main_program(self):
main = fluid.Program()
with fluid.program_guard(main):
self.net_conf()
self.origin_prog = main.clone()
return main
def get_trainer(self):
t = self._transpiler_instance()
return t.get_trainer_program()
def get_pserver(self, ep):
t = self._transpiler_instance()
pserver = t.get_pserver_program(ep)
startup = t.get_startup_program(ep, pserver)
return pserver, startup
def _transpiler_instance(self):
if not self.transpiler:
main = self.get_main_program()
self.transpiler = fluid.DistributeTranspiler()
self.transpiler.transpile(
self.trainer_id,
program=main,
pservers=self.pserver_eps,
trainers=self.trainers,
slice_var_up=self.slice_var_up,
sync_mode=self.sync_mode)
return self.transpiler
class TestBasicModel(TranspilerTest):
def test_transpiler(self): def test_transpiler(self):
pserver, startup = self.get_pserver(self.pserver1_ep)
pserver2, startup2 = self.get_pserver(self.pserver2_ep)
trainer = self.get_trainer() trainer = self.get_trainer()
pserver, startup = self.get_pserver(self.current_pserver_ep)
self.assertEqual([op.type for op in trainer.global_block().ops], self.assertEqual([op.type for op in trainer.global_block().ops], [
self.get_expect_trainer_ops()) 'mul', 'elementwise_add', 'elementwise_sub', 'square', 'mean',
'fill_constant', 'mean_grad', 'square_grad', 'elementwise_sub_grad',
'elementwise_add_grad', 'send', 'mul_grad', 'split_byref', 'send',
'send_barrier', 'recv', 'recv', 'fetch_barrier', 'concat'
])
self.assertEqual(len(pserver.blocks), 3) self.assertEqual(len(pserver.blocks), 3)
# block0: listen_and_serv # block0: listen_and_serv
self.assertEqual([op.type for op in pserver.blocks[0].ops], self.assertEqual([op.type for op in pserver.blocks[0].ops],
["listen_and_serv"]) ["listen_and_serv"])
# block2: optimize pass # block1~2: optimize pass
self.assertEqual([op.type for op in pserver.blocks[1].ops], self.assertEqual([op.type for op in pserver.blocks[1].ops],
["sum", "scale", "sgd"]) ["sum", "scale", "sgd"])
# confirm startup program # confirm startup program
self.assertEqual([op.type for op in startup.global_block().ops],
self.assertEqual([op.type for op in startup.global_block().ops], [ ["fill_constant", "fill_constant", "uniform_random"])
"fill_constant", "fill_constant", "uniform_random", "uniform_random"
])
# the variable #fc_w will be split into two blocks # the variable #fc_w will be split into two blocks
fc_w_var = startup.global_block().var("fc_w.block1") fc_w_var = startup.global_block().var("fc_w.block1")
self.assertEqual(fc_w_var.shape, (500, 1000)) self.assertEqual(fc_w_var.shape, (500, 1000))
# all parameters should be optimized on pserver
pserver_params = []
for prog in [pserver, pserver2]:
for blk in prog.blocks:
for op in blk.ops:
if "Param" in op.input_names:
param_name = op.input("Param")[0]
is_block_idx = param_name.find(".block")
if is_block_idx != -1:
origin_param_name = param_name[:is_block_idx]
else:
origin_param_name = param_name
pserver_params.append(origin_param_name)
trainer_params = []
for op in self.origin_prog.global_block().ops:
if "Param" in op.input_names:
trainer_params.append(op.input("Param")[0])
self.assertEqual(set(pserver_params), set(trainer_params))
class TestNoSliceVar(TranspilerTest):
def setUp(self):
super(TestNoSliceVar, self).setUp()
self.slice_var_up = False
def test_transpiler(self):
_, startup = self.get_pserver(self.pserver1_ep)
_, startup2 = self.get_pserver(self.pserver2_ep)
if startup.global_block().vars.has_key("fc_w"):
fc_w_var = startup.global_block().vars["fc_w"]
elif startup2.global_block().vars.has_key("fc_w"):
fc_w_var = startup2.global_block().vars["fc_w"]
self.assertEqual(fc_w_var.shape, (1000, 1000))
def get_expect_trainer_ops(self):
trainer = fluid.Program()
with fluid.program_guard(trainer): class TestLRDecay(TranspilerTest):
optimize_ops, params_grads = self.net_conf() def net_conf(self):
x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
y_predict = fluid.layers.fc(input=x,
size=1000,
act=None,
param_attr=fluid.ParamAttr(name='fc_w'),
bias_attr=fluid.ParamAttr(name='fc_b'))
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=1.0,
decay_steps=2100,
decay_rate=0.1,
staircase=True))
sgd_optimizer.minimize(avg_cost)
return
def test_transpiler(self):
pserver, startup = self.get_pserver(self.pserver1_ep)
trainer = self.get_trainer()
self.assertEqual(len(pserver.blocks), 4)
lr_decay_ops = [op.type for op in pserver.blocks[1].ops]
self.assertEqual(lr_decay_ops, [
"increment", "cast", "fill_constant", "elementwise_div", "floor",
"fill_constant", "elementwise_pow", "fill_constant",
"elementwise_mul"
])
class TestLRDecayConditional(TranspilerTest):
def net_conf(self):
x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
y_predict = fluid.layers.fc(input=x,
size=1000,
act=None,
param_attr=fluid.ParamAttr(name='fc_w'),
bias_attr=fluid.ParamAttr(name='fc_b'))
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.piecewise_decay([10000, 20000],
[1.0, 0.5, 1.0]))
sgd_optimizer.minimize(avg_cost)
return
def test_transpiler(self):
pserver, startup = self.get_pserver(self.pserver1_ep)
trainer = self.get_trainer()
serv_op = pserver.blocks[0].ops[0]
sub_blocks = []
optimize_blocks = []
for b in serv_op.attrs["optimize_blocks"]:
optimize_blocks.append(b.idx)
for b in pserver.blocks:
if b.idx not in optimize_blocks:
sub_blocks.append(b.idx)
self.assertEqual(len(pserver.blocks), 7)
lr_decay_ops = [op.type for op in pserver.blocks[1].ops]
self.assertEqual(lr_decay_ops, [
"increment", "cast", "fill_constant", "fill_constant", "less_than",
"logical_not", "conditional_block", "fill_constant",
"fill_constant", "less_than", "logical_not", "logical_and",
"logical_and", "conditional_block", "fill_constant",
"conditional_block"
])
# test the condition blocks
for b in sub_blocks:
if b == 0:
continue
block = pserver.blocks[b]
self.assertEqual([op.type for op in block.ops], ["assign"])
class TestL2Decay(TranspilerTest):
def net_conf(self):
x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
y_predict = fluid.layers.fc(
input=x,
size=1000,
act=None,
param_attr=fluid.ParamAttr(
name='fc_w',
regularizer=fluid.regularizer.L2Decay(),
gradient_clip=fluid.clip.GradientClipByValue(0.1)),
bias_attr=fluid.ParamAttr(name='fc_b'))
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
sgd_optimizer.minimize(avg_cost)
return
def test_transpiler(self):
pserver, startup = self.get_pserver(self.pserver1_ep)
trainer = self.get_trainer()
self.assertEqual(len(pserver.blocks), 3)
self.assertEqual([op.type for op in pserver.blocks[1].ops],
["sum", "scale", "clip", "sgd"])
self.assertEqual(
[op.type for op in pserver.blocks[2].ops],
["sum", "scale", "clip", "scale", "elementwise_add", "sgd"])
# TODO(typhoonzero): test clipping and L2Decay ops are removed from trainer
delete_ops(trainer.global_block(), optimize_ops) # FIXME(typhoonzero): need to add test for async case:
ops = [op.type for op in trainer.global_block().ops] + [ # see https://github.com/PaddlePaddle/Paddle/issues/11691
"split_byref", "send", "send_barrier", "recv", "recv", class TestAsyncSGD(TranspilerTest):
"fetch_barrier", "concat" pass
]
ops.insert(ops.index("elementwise_add_grad") + 1, "send")
return ops
if __name__ == "__main__": if __name__ == "__main__":
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.transpiler.distribute_transpiler import delete_ops
from transpiler_test import TranspilerTest
class TestSimpleDistTranspiler(TranspilerTest):
def setUp(self):
self.current_pserver_ep = "127.0.0.1:6175"
def test_simple_transpiler(self):
np.random.seed(1)
trainer = self.get_trainer()
pserver, startup = self.get_pserver(self.current_pserver_ep)
self.assertEqual([op.type for op in trainer.global_block().ops],
self.get_expect_trainer_ops())
self.assertEqual(len(pserver.blocks), 2)
# block0: listen_and_serv
self.assertEqual([op.type for op in pserver.blocks[0].ops],
["listen_and_serv"])
# block1: optimize pass
self.assertEqual([op.type for op in pserver.blocks[1].ops],
["sum", "scale", "sgd"])
# confirm startup program
self.assertEqual([op.type for op in startup.global_block().ops],
["fill_constant", "uniform_random", "uniform_random"])
# the variable #fc_w will NOT be splited
fc_w_var = startup.global_block().var("fc_w@GRAD")
self.assertEqual(fc_w_var.shape, (1000, 1000))
fc_w_var = startup.global_block().var("fc_w@GRAD.trainer_0")
self.assertEqual(fc_w_var.shape, (1000, 1000))
def get_expect_trainer_ops(self):
trainer = fluid.Program()
with fluid.program_guard(trainer):
optimize_ops, params_grads = self.net_conf()
delete_ops(trainer.global_block(), optimize_ops)
ops = [op.type for op in trainer.global_block().ops] + [
"send", "send_barrier", "recv", "recv", "fetch_barrier"
]
ops.insert(ops.index("elementwise_add_grad") + 1, "send")
return ops
def _transpiler_instance(self):
main = self.get_main_program()
t = fluid.DistributeTranspiler()
t.transpile(
self.trainer_id,
program=main,
pservers=self.pserver_eps,
trainers=self.trainers,
slice_var_up=False)
return t
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
class TranspilerTest(unittest.TestCase):
@classmethod
def setUpClass(self):
self.trainer_id = 0
self.trainers = 2
self.pservers = 2
self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175"
def net_conf(self):
x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
y_predict = fluid.layers.fc(input=x,
size=1000,
act=None,
param_attr=fluid.ParamAttr(name='fc_w'))
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
return optimize_ops, params_grads
def get_main_program(self):
main = fluid.Program()
with fluid.program_guard(main):
self.net_conf()
return main
def get_trainer(self):
return self._transpiler_instance().get_trainer_program()
def get_pserver(self, ep):
t = self._transpiler_instance()
pserver = t.get_pserver_program(ep)
startup = t.get_startup_program(ep, pserver)
return pserver, startup
def _transpiler_instance(self):
main = self.get_main_program()
t = fluid.DistributeTranspiler()
t.transpile(
self.trainer_id,
program=main,
pservers=self.pserver_eps,
trainers=self.trainers)
return t
...@@ -455,6 +455,8 @@ class DistributeTranspiler(object): ...@@ -455,6 +455,8 @@ class DistributeTranspiler(object):
__append_optimize_op__(op, per_opt_block, grad_to_block_id, __append_optimize_op__(op, per_opt_block, grad_to_block_id,
merged_var, lr_ops) merged_var, lr_ops)
# dedup grad to ids list
grad_to_block_id = list(set(grad_to_block_id))
# append global ops # append global ops
if global_ops: if global_ops:
opt_state_block = pserver_program.create_block( opt_state_block = pserver_program.create_block(
...@@ -960,8 +962,6 @@ class DistributeTranspiler(object): ...@@ -960,8 +962,6 @@ class DistributeTranspiler(object):
if not block_map.has_key(varname): if not block_map.has_key(varname):
block_map[varname] = [] block_map[varname] = []
block_map[varname].append((long(offset), long(size))) block_map[varname].append((long(offset), long(size)))
# Do not remove this important debug message:
print("block map: %s" % block_map)
for varname, splited in block_map.iteritems(): for varname, splited in block_map.iteritems():
orig_var = program.global_block().var(varname) orig_var = program.global_block().var(varname)
...@@ -1401,6 +1401,16 @@ class DistributeTranspiler(object): ...@@ -1401,6 +1401,16 @@ class DistributeTranspiler(object):
break break
return lr_ops return lr_ops
def _is_opt_role_op(self, op):
# NOTE: depend on oprole to find out whether this op is for
# optimize
op_maker = core.op_proto_and_checker_maker
optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize
if op_maker.kOpRoleAttrName() in op.attrs and \
int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role):
return True
return False
def _get_optimize_pass(self): def _get_optimize_pass(self):
""" """
Get optimizer operators, paramters and gradients from origin_program Get optimizer operators, paramters and gradients from origin_program
...@@ -1413,10 +1423,7 @@ class DistributeTranspiler(object): ...@@ -1413,10 +1423,7 @@ class DistributeTranspiler(object):
params_grads = [] params_grads = []
origin_var_dict = self.origin_program.global_block().vars origin_var_dict = self.origin_program.global_block().vars
for op in block.ops: for op in block.ops:
# NOTE(Yancey1989): we can not use op role to distinguish an optimizer op if self._is_opt_role_op(op):
# or not, because all ops in optimizer sub-graph would
# sign the optimizer op role
if self._is_optimizer_op(op):
opt_ops.append(op) opt_ops.append(op)
# HACK(wuyi): if we find grad vars from input of optimize # HACK(wuyi): if we find grad vars from input of optimize
# ops, we may get the output of clip op. Use syntax "@GRAD" # ops, we may get the output of clip op. Use syntax "@GRAD"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册