未验证 提交 a5060b55 编写于 作者: W wanghuancoder 提交者: GitHub

add fetch, test=develop (#35019)

* add fetch, test=develop

* fix fetch2op, test=develop

* fix fetch2op, test=develop

* refine, test=develop

* fix fetch ctx, test=develop

* add wait, test=develop

* rename fetch2 to fetch_v2, test=develop

* merge, test=develop
上级 b09f4d7f
...@@ -21,11 +21,29 @@ InterpreterCore::InterpreterCore(const platform::Place& place, ...@@ -21,11 +21,29 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
VariableScope* global_scope, VariableScope* global_scope,
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names) const std::vector<std::string>& fetch_names)
: place_(place), main_program_(main_prog), global_scope_(global_scope) { : place_(place),
main_program_(main_prog),
global_scope_(global_scope),
fetch_context_pool_({place}) {
is_build_ = false; is_build_ = false;
feed_names_ = feed_names; feed_names_ = feed_names;
fetch_names_ = fetch_names;
// add feedop and fetchop to main_program // Step1: add feedop and fetchop to main_program
auto* fetch_holder = main_program_.MutableBlock(0)->Var("fetch_vars");
fetch_holder->SetType(proto::VarType::FETCH_LIST);
fetch_holder->SetPersistable(true);
int i = 0;
for (auto& fetch_name : fetch_names) {
// append fetch op
auto* op = main_program_.MutableBlock(0)->AppendOp();
op->SetType("fetch_v2");
op->SetInput("X", {fetch_name});
op->SetOutput("Out", {"fetch_vars"});
op->SetAttr("col", {static_cast<int>(i)});
op->CheckAttrs();
i++;
}
// prune // prune
...@@ -34,8 +52,8 @@ InterpreterCore::InterpreterCore(const platform::Place& place, ...@@ -34,8 +52,8 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
// convert to run graph // convert to run graph
} }
void InterpreterCore::Run(const std::vector<framework::Tensor>& feed_tensors, paddle::framework::FetchList InterpreterCore::Run(
std::vector<framework::Tensor>* fetch_tensors) { const std::vector<framework::Tensor>& feed_tensors) {
if (is_build_ == false) { if (is_build_ == false) {
BuildVariableScope(main_program_, global_scope_); BuildVariableScope(main_program_, global_scope_);
} }
...@@ -58,32 +76,8 @@ void InterpreterCore::Run(const std::vector<framework::Tensor>& feed_tensors, ...@@ -58,32 +76,8 @@ void InterpreterCore::Run(const std::vector<framework::Tensor>& feed_tensors,
ExecuteInstructionList(vec_instruction_, *global_scope_, place_); ExecuteInstructionList(vec_instruction_, *global_scope_, place_);
} }
for (size_t i = 0; i < fetch_names_.size(); ++i) { return *(global_scope_->var_list[global_scope_->name2id["fetch_vars"]]
auto it = global_scope_->name2id.find(fetch_names_[i]); ->GetMutable<framework::FetchList>());
assert(it != global_scope_->name2id.end());
PADDLE_ENFORCE_NE(
it, global_scope_->name2id.end(),
platform::errors::NotFound(
"Can't find (%d) the fetch var (%s) in scope", i, fetch_names_[i]));
auto fetch_tensor =
global_scope_->var_list[it->second]->GetMutable<framework::LoDTensor>();
if (platform::is_gpu_place(fetch_tensor->place())) {
Tensor out;
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place_);
dev_ctx->Wait();
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
dev_ctx->Wait();
fetch_tensors->push_back(out);
} else {
Tensor out;
TensorCopySync(*fetch_tensor, platform::CPUPlace(), &out);
fetch_tensors->push_back(out);
}
}
} }
void InterpreterCore::Convert() { void InterpreterCore::Convert() {
...@@ -195,6 +189,9 @@ void InterpreterCore::BuildInstructionCtx(Instruction* instr_node, ...@@ -195,6 +189,9 @@ void InterpreterCore::BuildInstructionCtx(Instruction* instr_node,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place); auto* dev_ctx = pool.Get(place);
if (instr_node->kernel_func_.operator_base_->Type() == "fetch_v2") {
dev_ctx = fetch_context_pool_.Get(place);
}
Scope scope; Scope scope;
instr_node->execution_ctx_.reset(new ExecutionContext( instr_node->execution_ctx_.reset(new ExecutionContext(
...@@ -206,6 +203,12 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { ...@@ -206,6 +203,12 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
instr_node.kernel_func_.operator_base_) instr_node.kernel_func_.operator_base_)
->InferShape(instr_node.infershape_ctx_.get()); ->InferShape(instr_node.infershape_ctx_.get());
if (instr_node.kernel_func_.operator_base_->Type() == "fetch_v2") {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place_);
dev_ctx->Wait(); // TODO(wanghuancoder)
}
instr_node.kernel_func_.compute_func_(*instr_node.execution_ctx_.get()); instr_node.kernel_func_.compute_func_(*instr_node.execution_ctx_.get());
} }
...@@ -247,6 +250,8 @@ void InterpreterCore::ExecuteInstructionList( ...@@ -247,6 +250,8 @@ void InterpreterCore::ExecuteInstructionList(
} }
} }
fetch_context_pool_.Get(place)->Wait();
for (size_t i = 0; i < working_var_ref.size(); ++i) { for (size_t i = 0; i < working_var_ref.size(); ++i) {
if (working_var_ref[i].var_ref_count_ != 0) { if (working_var_ref[i].var_ref_count_ != 0) {
std::cerr << " var ref is not zero " << i << std::endl; std::cerr << " var ref is not zero " << i << std::endl;
......
...@@ -35,8 +35,8 @@ class InterpreterCore { ...@@ -35,8 +35,8 @@ class InterpreterCore {
const std::vector<std::string>& feed_names, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names); const std::vector<std::string>& fetch_names);
void Run(const std::vector<framework::Tensor>& feed_tensors, paddle::framework::FetchList Run(
std::vector<framework::Tensor>* fetch_tensors); const std::vector<framework::Tensor>& feed_tensors);
static void BuildOpFuncList(const platform::Place& place, static void BuildOpFuncList(const platform::Place& place,
const framework::ProgramDesc& pdesc, const framework::ProgramDesc& pdesc,
...@@ -64,7 +64,7 @@ class InterpreterCore { ...@@ -64,7 +64,7 @@ class InterpreterCore {
VariableScope* var_scope); VariableScope* var_scope);
const platform::Place& place_; const platform::Place& place_;
const ProgramDesc& main_program_; ProgramDesc main_program_;
VariableScope* global_scope_; VariableScope* global_scope_;
std::vector<VariableMetaInfo> vec_meta_info_; std::vector<VariableMetaInfo> vec_meta_info_;
...@@ -80,7 +80,8 @@ class InterpreterCore { ...@@ -80,7 +80,8 @@ class InterpreterCore {
bool is_build_; bool is_build_;
std::vector<std::string> feed_names_; std::vector<std::string> feed_names_;
std::vector<std::string> fetch_names_;
platform::DeviceContextPool fetch_context_pool_;
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -47,15 +47,13 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place, ...@@ -47,15 +47,13 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
&vec_func_list, &global_scope_); &vec_func_list, &global_scope_);
} }
int StandaloneExecutor::Run(const std::vector<std::string>& feed_names, paddle::framework::FetchList StandaloneExecutor::Run(
const std::vector<framework::Tensor>& feed_tensors, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names, const std::vector<framework::Tensor>& feed_tensors,
std::vector<framework::Tensor>* fetch_tensors) { const std::vector<std::string>& fetch_names) {
auto core = GetInterpreterCore(feed_names, fetch_names); auto core = GetInterpreterCore(feed_names, fetch_names);
core->Run(feed_tensors, fetch_tensors); return core->Run(feed_tensors);
return 0;
} }
void StandaloneExecutor::BuildVariableOuterScope( void StandaloneExecutor::BuildVariableOuterScope(
......
...@@ -26,10 +26,10 @@ namespace framework { ...@@ -26,10 +26,10 @@ namespace framework {
class ExecutorBase { class ExecutorBase {
public: public:
virtual ~ExecutorBase() {} virtual ~ExecutorBase() {}
virtual int Run(const std::vector<std::string>& feed_names, virtual paddle::framework::FetchList Run(
const std::vector<framework::Tensor>& feed_tensors, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names, const std::vector<framework::Tensor>& feed_tensors,
std::vector<framework::Tensor>* fetch_tensors) = 0; const std::vector<std::string>& fetch_names) = 0;
}; };
class StandaloneExecutor : public ExecutorBase { class StandaloneExecutor : public ExecutorBase {
...@@ -40,10 +40,10 @@ class StandaloneExecutor : public ExecutorBase { ...@@ -40,10 +40,10 @@ class StandaloneExecutor : public ExecutorBase {
~StandaloneExecutor() {} ~StandaloneExecutor() {}
virtual int Run(const std::vector<std::string>& feed_names, virtual paddle::framework::FetchList Run(
const std::vector<framework::Tensor>& feed_tensors, const std::vector<std::string>& feed_names,
const std::vector<std::string>& fetch_names, const std::vector<framework::Tensor>& feed_tensors,
std::vector<framework::Tensor>* fetch_tensors); const std::vector<std::string>& fetch_names);
private: private:
void BuildVariableOuterScope(const framework::ProgramDesc& pdesc, void BuildVariableOuterScope(const framework::ProgramDesc& pdesc,
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace framework {
class OpDesc;
class InferShapeContext;
template <typename T>
class EmptyGradOpMaker;
} // namespace framework
namespace imperative {
class OpBase;
} // namespace imperative
namespace platform {
struct CPUPlace;
struct CUDAPlace;
struct float16;
} // namespace platform
} // namespace paddle
namespace paddle {
namespace operators {
static void DataCopy(const framework::LoDTensor &src_item,
const std::string &fetch_var_name,
framework::LoDTensor *dst_item,
const platform::DeviceContext &dev_ctx) {
if (src_item.IsInitialized() && src_item.numel() > 0) {
#ifdef PADDLE_WITH_MKLDNN
// Conversion from MKL-DNN to Paddle
if (src_item.layout() == framework::DataLayout::kMKLDNN) {
framework::Tensor out;
// Convert to desired Paddle layout, apart from grads of filter
// as params are not a subject to paddle's data_format
framework::innerTransDataLayoutFromMKLDNN(
src_item.layout(), fetch_var_name == framework::GradVarName("Filter")
? framework::DataLayout::kNCHW
: paddle::platform::MKLDNNDeviceContext::tls()
.get_cur_paddle_data_layout(),
src_item, &out, platform::CPUPlace());
TensorCopy(src_item, platform::CPUPlace(), dev_ctx, dst_item);
} else {
if (platform::is_gpu_place(src_item.place())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TensorCopy(src_item, platform::CUDAPinnedPlace(), dev_ctx, dst_item);
#endif
} else {
TensorCopy(src_item, platform::CPUPlace(), dst_item);
}
}
#else
if (platform::is_gpu_place(src_item.place())) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TensorCopy(src_item, platform::CUDAPinnedPlace(), dev_ctx, dst_item);
#endif
} else {
TensorCopy(src_item, platform::CPUPlace(), dst_item);
}
#endif
} else {
// Not copy, if the src tensor is empty.
dst_item->clear();
dst_item->Resize({0});
}
dst_item->set_lod(src_item.lod());
}
class FetchV2Op : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {}
protected:
framework::OpKernelType GetKernelTypeForVar(
const std::string &var_name, const framework::Tensor &tensor,
const framework::OpKernelType &expected_kernel_type) const override {
return framework::OpKernelType(expected_kernel_type.data_type_,
expected_kernel_type.place_,
tensor.layout());
}
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"),
ctx.device_context());
}
};
class FetchV2InferVarType : public framework::VarTypeInference {
public:
void operator()(framework::InferVarTypeContext *ctx) const override {
ctx->SyncTypeAndDataType("X", "Out");
}
};
class FetchV2Kernel {
public:
void operator()(const framework::ExecutionContext &ctx) const {
auto fetch_var_name = ctx.InputName("X");
auto *fetch_var = ctx.InputVar("X");
if (fetch_var == nullptr) {
return;
}
PADDLE_ENFORCE_EQ(ctx.HasOutput("Out"), true,
platform::errors::NotFound(
"Output(Out) of memcpy_d2h_op is not found."));
auto *out_var = ctx.OutputVar("Out");
// Get dev_ctx from ExecutionContext, it's D2H stream
auto &dev_ctx = ctx.device_context();
int col = ctx.Attr<int>("col");
PADDLE_ENFORCE_GE(
col, 0, platform::errors::InvalidArgument(
"Expected the column index (the attribute 'col' of "
"operator 'Fetch') of current fetching variable to be "
"no less than 0. But received column index = %d.",
col));
auto *fetch_list = out_var->GetMutable<framework::FetchList>();
if (static_cast<size_t>(col) >= fetch_list->size()) {
fetch_list->resize(col + 1);
}
if (fetch_var->IsType<framework::LoDTensor>()) {
auto &src_item = fetch_var->Get<framework::LoDTensor>();
auto *dst_item = &(BOOST_GET(framework::LoDTensor, fetch_list->at(col)));
DataCopy(src_item, fetch_var_name, dst_item, dev_ctx);
} else {
auto &src_item = fetch_var->Get<framework::LoDTensorArray>();
framework::LoDTensorArray tmp(src_item.size());
fetch_list->at(col) = tmp;
auto &dst_item =
BOOST_GET(framework::LoDTensorArray, fetch_list->at(col));
for (size_t i = 0; i < src_item.size(); ++i) {
DataCopy(src_item[i], fetch_var_name, &dst_item[i], dev_ctx);
}
}
}
};
class FetchV2OpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"(LoDTensor) The resulted LoDTensor which is expected to return "
"to users.");
AddOutput("Out",
"(vector<LoDTensor>) A fetching list of LoDTensor which may have "
"different dimension, shape and data type.");
AddAttr<int>("col", "(int) The column index of fetching object.");
AddComment(R"DOC(
FetchV2 Operator.
It should not be configured by users directly.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OPERATOR(
fetch_v2, ops::FetchV2Op, ops::FetchV2OpProtoMaker,
ops::FetchV2InferVarType,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL_FUNCTOR(fetch_v2, float, ops::FetchV2Kernel, double,
ops::FetchV2Kernel, int, ops::FetchV2Kernel,
int64_t, ops::FetchV2Kernel, bool,
ops::FetchV2Kernel, plat::float16,
ops::FetchV2Kernel);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(fetch_v2, float, ops::FetchV2Kernel, double,
ops::FetchV2Kernel, int, ops::FetchV2Kernel,
int64_t, ops::FetchV2Kernel, bool,
ops::FetchV2Kernel, plat::float16,
ops::FetchV2Kernel);
#endif
#ifdef PADDLE_WITH_ASCEND_CL
REGISTER_OP_NPU_KERNEL_FUNCTOR(fetch_v2, float, ops::FetchV2Kernel, double,
ops::FetchV2Kernel, int, ops::FetchV2Kernel,
int64_t, ops::FetchV2Kernel, bool,
ops::FetchV2Kernel, plat::float16,
ops::FetchV2Kernel);
#endif
...@@ -10,6 +10,7 @@ register_unity_group(cc ...@@ -10,6 +10,7 @@ register_unity_group(cc
conditional_block_infer_op.cc conditional_block_infer_op.cc
feed_op.cc feed_op.cc
fetch_op.cc fetch_op.cc
fetch_v2_op.cc
get_places_op.cc get_places_op.cc
logical_op.cc logical_op.cc
bitwise_op.cc bitwise_op.cc
......
...@@ -1952,7 +1952,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1952,7 +1952,6 @@ All parameter, weight, gradient are variables in Paddle.
[](StandaloneExecutor &self, [](StandaloneExecutor &self,
const std::unordered_map<std::string, py::array> &input_dict, const std::unordered_map<std::string, py::array> &input_dict,
std::vector<std::string> fetch_names) { std::vector<std::string> fetch_names) {
pybind11::gil_scoped_release release;
std::vector<framework::Tensor> feed_tensors; std::vector<framework::Tensor> feed_tensors;
std::vector<std::string> feed_names; std::vector<std::string> feed_names;
...@@ -1964,13 +1963,13 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1964,13 +1963,13 @@ All parameter, weight, gradient are variables in Paddle.
feed_tensors.push_back(t); feed_tensors.push_back(t);
} }
std::vector<framework::Tensor> fetch_tensors; paddle::framework::FetchList ret;
self.Run(feed_names, feed_tensors, fetch_names, &fetch_tensors); {
std::vector<py::array> vec_ret; pybind11::gil_scoped_release release;
for (size_t i = 0; i < fetch_tensors.size(); ++i) { ret = self.Run(feed_names, feed_tensors, fetch_names);
vec_ret.push_back(TensorToPyArray(fetch_tensors[i], true));
} }
return vec_ret;
return py::cast(std::move(ret));
}); });
m.def("init_gflags", framework::InitGflags); m.def("init_gflags", framework::InitGflags);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册