提交 c3bf3326 编写于 作者: Y Yang Yang(Tony) 提交者: GitHub

Merge pull request #4537 from QiJune/executor_impl

Executor interface design and implementation
...@@ -42,5 +42,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -42,5 +42,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward ${GLOB_OP_LIB})
if(WITH_GPU)
nv_test(executor_test SRCS executor_test.cc DEPS executor)
else()
cc_test(executor_test SRCS executor_test.cc DEPS executor)
endif()
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/executor.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <set>
#include <vector>
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h"
#include <boost/range/adaptor/reversed.hpp>
namespace paddle {
namespace framework {
const std::string kFeedOpType = "feed";
const std::string kFetchOpType = "fetch";
Executor::Executor(const std::vector<platform::Place>& places) {
PADDLE_ENFORCE_GT(places.size(), 0);
device_contexts_.resize(places.size());
for (size_t i = 0; i < places.size(); i++) {
if (platform::is_cpu_place(places[i])) {
device_contexts_[i] = new platform::CPUDeviceContext(
boost::get<platform::CPUPlace>(places[i]));
} else if (platform::is_gpu_place(places[i])) {
#ifdef PADDLE_WITH_CUDA
device_contexts_[i] = new platform::CUDADeviceContext(
boost::get<platform::GPUPlace>(places[i]));
#else
PADDLE_THROW(
"'GPUPlace' is not supported, Please re-compile with WITH_GPU "
"option");
#endif
}
}
}
Executor::~Executor() {
for (auto& device_context : device_contexts_) {
delete device_context;
}
}
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
// TODO(tonyyang-svail):
// - only runs on the first device (i.e. no interdevice communication)
// - will change to use multiple blocks for RNN op and Cond Op
PADDLE_ENFORCE_GT(pdesc.blocks_size(), block_id);
auto& block = pdesc.blocks(block_id);
auto& device = device_contexts_[0];
// Instantiate all the vars in the global scope
for (auto& var : block.vars()) {
scope->NewVar(var.name());
}
Scope& local_scope = scope->NewScope();
std::vector<bool> should_run = Prune(pdesc, block_id);
PADDLE_ENFORCE_EQ(should_run.size(), static_cast<size_t>(block.ops_size()));
for (size_t i = 0; i < should_run.size(); ++i) {
if (should_run[i]) {
for (auto& var : block.ops(i).outputs()) {
for (auto& argu : var.arguments()) {
if (local_scope.FindVar(argu) == nullptr) {
local_scope.NewVar(argu);
}
}
}
auto op = paddle::framework::OpRegistry::CreateOp(block.ops(i));
op->Run(local_scope, *device);
}
}
// TODO(tonyyang-svail):
// - Destroy local_scope
}
std::vector<bool> Prune(const ProgramDesc& pdesc, int block_id) {
// TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op
auto& block = pdesc.blocks(block_id);
auto& ops = block.ops();
bool expect_feed = true;
for (auto& op_desc : ops) {
PADDLE_ENFORCE(op_desc.type() != kFeedOpType || expect_feed,
"All FeedOps are at the beginning of the ProgramDesc");
expect_feed = (op_desc.type() == kFeedOpType);
}
bool expect_fetch = true;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
PADDLE_ENFORCE(op_desc.type() != kFetchOpType || expect_fetch,
"All FetchOps must at the end of the ProgramDesc");
expect_fetch = (op_desc.type() == kFetchOpType);
}
std::set<std::string> dependent_vars;
std::vector<bool> should_run;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
bool found_dependent_vars = false;
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
if (dependent_vars.count(argu) != 0) {
found_dependent_vars = true;
}
}
}
if (op_desc.type() == kFetchOpType || found_dependent_vars) {
// erase its output to the dependency graph
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.erase(argu);
}
}
// insert its input to the dependency graph
for (auto& var : op_desc.inputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.insert(argu);
}
}
should_run.push_back(true);
} else {
should_run.push_back(false);
}
}
// TODO(tonyyang-svail):
// - check this after integration of Init
// PADDLE_ENFORCE(dependent_vars.empty());
// since we are traversing the ProgramDesc in reverse order
// we reverse the should_run vector
std::reverse(should_run.begin(), should_run.end());
return should_run;
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h"
namespace paddle {
namespace framework {
class Executor {
public:
explicit Executor(const std::vector<platform::Place>& places);
~Executor();
/* @Brief
* Runtime evaluation of the given ProgramDesc under certain Scope
*
* @param
* ProgramDesc
* Scope
*/
void Run(const ProgramDesc&, Scope*, int);
private:
std::vector<platform::DeviceContext*> device_contexts_;
};
/* @Brief
* Pruning the graph
*
* @param
* ProgramDesc
*
* @return
* vector<bool> Same size as ops. Indicates whether an op should be run.
*/
std::vector<bool> Prune(const ProgramDesc& pdesc, int block_id);
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/executor.h"
#include <memory>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/framework/attribute.h"
#include "paddle/framework/backward.h"
#include "paddle/framework/block_desc.h"
#include "paddle/framework/op_desc.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
USE_OP(elementwise_add);
USE_OP(gaussian_random);
USE_OP(feed);
USE_OP(fetch);
USE_OP(mul);
USE_OP(sum);
USE_OP(squared_l2_distance);
USE_OP(fill_constant);
USE_OP(sgd);
using namespace paddle::platform;
using namespace paddle::framework;
void AddOp(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, AttributeMap attrs,
paddle::framework::BlockDescBind* block) {
// insert output
for (auto kv : outputs) {
for (auto v : kv.second) {
auto var = block->NewVar(v);
var->SetDataType(paddle::framework::DataType::FP32);
}
}
// insert op
auto op = block->AppendOp();
op->SetType(type);
for (auto& kv : inputs) {
op->SetInput(kv.first, kv.second);
}
for (auto& kv : outputs) {
op->SetOutput(kv.first, kv.second);
}
op->SetAttrMap(attrs);
}
// Tensors in feed value variable will only be in CPUPlace
// So we can memcpy the data from vector<T> to feed_value
template <typename T>
void SetFeedVariable(const std::vector<std::vector<T>>& inputs,
const std::vector<std::vector<int64_t>>& dims) {
Variable* g_feed_value = GetGlobalScope().FindVar("feed_value");
auto& feed_inputs =
*(g_feed_value->GetMutable<std::vector<paddle::framework::Tensor>>());
size_t size = inputs.size();
feed_inputs.resize(size);
for (size_t i = 0; i < size; i++) {
T* dst = feed_inputs[i].mutable_data<T>(make_ddim(dims[i]), CPUPlace());
memcpy(dst, inputs[i].data(), inputs[i].size() * sizeof(T));
}
}
// Tensors in fetch value variable will only be in CPUPlace
// So we can memcpy the data from fetch_value to vector<T>
template <typename T>
std::vector<std::vector<T>> GetFetchVariable() {
Variable* g_fetch_value = GetGlobalScope().FindVar("fetch_value");
auto& fetch_outputs =
*(g_fetch_value->GetMutable<std::vector<paddle::framework::Tensor>>());
size_t size = fetch_outputs.size();
std::vector<std::vector<T>> result;
result.reserve(size);
for (size_t i = 0; i < size; i++) {
std::vector<T> tmp;
tmp.resize(fetch_outputs[i].numel());
memcpy(tmp.data(), fetch_outputs[i].data<T>(),
fetch_outputs[i].numel() * sizeof(T));
result.push_back(tmp);
}
return result;
}
class ExecutorTesterRandom : public ::testing::Test {
public:
virtual void SetUp() override {
int input_dim = 3, batch_size = 2, embed_dim = 5;
auto temp_init_root_block = init_pdesc_.add_blocks();
temp_init_root_block->set_idx(0);
temp_init_root_block->set_parent_idx(-1);
paddle::framework::ProgramDescBind& init_program =
paddle::framework::ProgramDescBind::Instance(&init_pdesc_);
paddle::framework::BlockDescBind* init_root_block = init_program.Block(0);
AddOp("gaussian_random", {}, {{"Out", {"w1"}}},
{{"dims", std::vector<int>{input_dim, embed_dim}}}, init_root_block);
AddOp("gaussian_random", {}, {{"Out", {"w2"}}},
{{"dims", std::vector<int>{embed_dim, input_dim}}}, init_root_block);
AddOp("fetch", {{"Input", {"w1"}}}, {}, {{"col", 0}}, init_root_block);
AddOp("fetch", {{"Input", {"w2"}}}, {}, {{"col", 1}}, init_root_block);
// flush
init_program.Proto();
// run block
auto temp_root_block = pdesc_.add_blocks();
temp_root_block->set_idx(0);
temp_root_block->set_parent_idx(-1);
paddle::framework::ProgramDescBind& program =
paddle::framework::ProgramDescBind::Instance(&pdesc_);
paddle::framework::BlockDescBind* root_block = program.Block(0);
// feed data
inputs_.push_back({1.0, 1.0, 1.0, 1.0, 1.0, 1.0});
dims_.push_back({batch_size, input_dim});
AddOp("feed", {}, {{"Out", {"a"}}},
{{"dims", std::vector<int>{batch_size, input_dim}}, {"col", 0}},
root_block);
// forward
AddOp("mul", {{"X", {"a"}}, {"Y", {"w1"}}}, {{"Out", {"b"}}}, {},
root_block);
AddOp("mul", {{"X", {"b"}}, {"Y", {"w2"}}}, {{"Out", {"a_out"}}}, {},
root_block);
AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}},
{{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {},
root_block);
// backward
AddOp("fill_constant", {}, {{"Out", {"l2_distance@GRAD"}}},
{{"shape", std::vector<int>{batch_size, 1}}, {"value", float(1.0)}},
root_block);
AppendBackward(program, {});
// update
AddOp("fill_constant", {}, {{"Out", {"learning_rate"}}},
{{"shape", std::vector<int>{1}}, {"value", float(0.001)}},
root_block);
AddOp("sgd", {{"Param", {"w1"}},
{"LearningRate", {"learning_rate"}},
{"Grad", {"w1@GRAD"}}},
{{"ParamOut", {"w1"}}}, {}, root_block);
AddOp("sgd", {{"Param", {"w2"}},
{"LearningRate", {"learning_rate"}},
{"Grad", {"w2@GRAD"}}},
{{"ParamOut", {"w2"}}}, {}, root_block);
AddOp("fetch", {{"Input", {"w1"}}}, {}, {{"col", 0}}, root_block);
AddOp("fetch", {{"Input", {"w2"}}}, {}, {{"col", 1}}, root_block);
AddOp("fetch", {{"Input", {"l2_distance"}}}, {}, {{"col", 0}}, root_block);
// flush
program.Proto();
}
protected:
ProgramDesc init_pdesc_;
ProgramDesc pdesc_;
std::vector<std::vector<float>> inputs_;
std::vector<std::vector<int64_t>> dims_;
};
class ExecutorTesterFeedAndFetch : public ::testing::Test {
public:
virtual void SetUp() override {
auto temp_root_block = pdesc_.add_blocks();
temp_root_block->set_idx(0);
temp_root_block->set_parent_idx(-1);
// wrap to BlockDescBind
paddle::framework::ProgramDescBind& program =
paddle::framework::ProgramDescBind::Instance(&pdesc_);
paddle::framework::BlockDescBind* root_block = program.Block(0);
std::vector<int> dim{6};
AddOp("feed", {}, {{"Out", {"a"}}}, {{"dims", dim}, {"col", 0}},
root_block);
AddOp("feed", {}, {{"Out", {"b"}}}, {{"dims", dim}, {"col", 1}},
root_block);
AddOp("fetch", {{"Input", {"a"}}}, {}, {{"col", 0}}, root_block);
AddOp("fetch", {{"Input", {"b"}}}, {}, {{"col", 1}}, root_block);
// flush
program.Proto();
std::vector<float> vec1 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
std::vector<float> vec2 = {4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
inputs_.push_back(vec1);
inputs_.push_back(vec2);
dims_.push_back({static_cast<int64_t>(vec1.size())});
dims_.push_back({static_cast<int64_t>(vec2.size())});
}
protected:
ProgramDesc pdesc_;
std::vector<std::vector<float>> inputs_;
std::vector<std::vector<int64_t>> dims_;
};
#ifndef PADDLE_WITH_CUDA
TEST_F(ExecutorTesterRandom, CPU) {
std::vector<Place> places;
CPUPlace cpu_place;
places.push_back(cpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
paddle::memory::Used(cpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
executor->Run(init_pdesc_, &GetGlobalScope(), 0);
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
std::vector<std::vector<float>> result = GetFetchVariable<float>();
}
TEST_F(ExecutorTesterFeedAndFetch, CPU) {
std::vector<Place> places;
CPUPlace cpu_place;
places.push_back(cpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
paddle::memory::Used(cpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
for (int batch_id = 0; batch_id < 3; batch_id++) {
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
std::vector<std::vector<float>> result = GetFetchVariable<float>();
PADDLE_ENFORCE_EQ(result.size(), inputs_.size());
for (size_t i = 0; i < result.size(); ++i) {
PADDLE_ENFORCE_EQ(result[i].size(), inputs_[i].size());
for (size_t j = 0; j < result[i].size(); ++j) {
PADDLE_ENFORCE_EQ(result[i][j], inputs_[i][j]);
}
}
}
}
#else
TEST_F(ExecutorTesterRandom, GPU) {
std::vector<Place> places;
GPUPlace gpu_place(0);
places.push_back(gpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
// If paddle is compiled with GPU, both CPU and GPU BuddyAllocator
// need to be used at first.
paddle::memory::Used(CPUPlace());
paddle::memory::Used(gpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
executor->Run(init_pdesc_, &GetGlobalScope(), 0);
for (int batch_id = 0; batch_id < 3; batch_id++) {
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
}
}
TEST_F(ExecutorTesterFeedAndFetch, GPU) {
std::vector<Place> places;
GPUPlace gpu_place(0);
places.push_back(gpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
// If paddle is compiled with GPU, both CPU and GPU BuddyAllocator
// need to be used at first.
paddle::memory::Used(CPUPlace());
paddle::memory::Used(gpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
for (int batch_id = 0; batch_id < 3; batch_id++) {
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
std::vector<std::vector<float>> result = GetFetchVariable<float>();
PADDLE_ENFORCE_EQ(result.size(), inputs_.size());
for (size_t i = 0; i < result.size(); ++i) {
PADDLE_ENFORCE_EQ(result[i].size(), inputs_[i].size());
for (size_t j = 0; j < result[i].size(); ++j) {
PADDLE_ENFORCE_EQ(result[i][j], inputs_[i][j]);
}
}
}
}
#endif
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include <memory> // for unique_ptr
#include <mutex> // for call_once
#include "paddle/string/printf.h" #include "paddle/string/printf.h"
namespace paddle { namespace paddle {
...@@ -62,5 +65,17 @@ void Scope::DropKids() { ...@@ -62,5 +65,17 @@ void Scope::DropKids() {
kids_.clear(); kids_.clear();
} }
std::once_flag feed_variable_flag;
framework::Scope& GetGlobalScope() {
static std::unique_ptr<framework::Scope> g_scope{nullptr};
std::call_once(feed_variable_flag, [&]() {
g_scope.reset(new framework::Scope());
g_scope->NewVar("feed_value");
g_scope->NewVar("fetch_value");
});
return *(g_scope.get());
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -73,5 +73,7 @@ class Scope { ...@@ -73,5 +73,7 @@ class Scope {
DISABLE_COPY_AND_ASSIGN(Scope); DISABLE_COPY_AND_ASSIGN(Scope);
}; };
framework::Scope& GetGlobalScope();
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/feed_op.h"
namespace paddle {
namespace operators {
class FeedOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output should be not null.");
auto& shape = ctx->Attrs().Get<std::vector<int>>("dims");
std::vector<int64_t> shape_int64(shape.size(), 0);
std::transform(shape.begin(), shape.end(), shape_int64.begin(),
[](int a) { return static_cast<int64_t>(a); });
ctx->SetOutputDim("Out", framework::make_ddim(shape_int64));
// TODO(qijun): need to handle LodTensor later
}
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
return static_cast<framework::DataType>(Attr<int>("dataType"));
}
};
class FeedOpMaker : public framework::OpProtoAndCheckerMaker {
public:
FeedOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<int>("dataType", "output data type")
.SetDefault(framework::DataType::FP32);
AddAttr<int>("col", "The col in global feed variable").SetDefault(0);
AddAttr<std::vector<int>>("dims", "The dimension of feed tensor.");
AddOutput("Out", "The output of feed op.");
AddComment(R"DOC(Feed data from global feed variable)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(feed, ops::FeedOp, ops::FeedOpMaker);
REGISTER_OP_CPU_KERNEL(feed, ops::FeedKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/feed_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(feed, ops::FeedKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename T>
class FeedKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
framework::Tensor* out = ctx.Output<framework::Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
framework::Variable* g_feed_variable =
framework::GetGlobalScope().FindVar("feed_value");
const auto& tensors =
g_feed_variable->Get<std::vector<framework::Tensor>>();
int col = ctx.template Attr<int>("col");
PADDLE_ENFORCE_GT(tensors.size(), static_cast<size_t>(col));
// TODO(qijun):
// check tensors[col].dims() with attribute,
// except the first dimenson.
out->CopyFrom<T>(tensors[col], ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/fetch_op.h"
namespace paddle {
namespace operators {
class FetchOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"), "Input should be not null.");
}
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
return static_cast<framework::DataType>(Attr<int>("dataType"));
}
};
class FetchOpMaker : public framework::OpProtoAndCheckerMaker {
public:
FetchOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<int>("dataType", "output data type")
.SetDefault(framework::DataType::FP32);
AddAttr<int>("col", "The col in global fetch variable").SetDefault(0);
AddInput("Input", "The output of fetch op.");
AddComment(R"DOC(Fetch data to global fetch variable)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(fetch, ops::FetchOp, ops::FetchOpMaker);
REGISTER_OP_CPU_KERNEL(fetch, ops::FetchKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/fetch_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(fetch, ops::FetchKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename T>
class FetchKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const framework::Tensor* input = ctx.Input<framework::Tensor>("Input");
framework::Variable* g_fetch_variable =
framework::GetGlobalScope().FindVar("fetch_value");
auto* tensors =
g_fetch_variable->GetMutable<std::vector<framework::Tensor>>();
int col = ctx.template Attr<int>("col");
if (tensors->size() < static_cast<size_t>(col + 1)) {
tensors->resize(col + 1);
}
PADDLE_ENFORCE_GT(tensors->size(), static_cast<size_t>(col));
(*tensors)[col].Resize(input->dims());
(*tensors)[col].mutable_data<T>(platform::CPUPlace());
(*tensors)[col].CopyFrom<T>(*input, platform::CPUPlace());
// TODO(qijun): need to handle LodTensor later
}
};
} // namespace operators
} // namespace paddle
...@@ -43,6 +43,8 @@ int GetCurrentDeviceId() { ...@@ -43,6 +43,8 @@ int GetCurrentDeviceId() {
} }
void SetDeviceId(int id) { void SetDeviceId(int id) {
// TODO(qijun): find a better way to cache the cuda device count
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
PADDLE_ENFORCE(cudaSetDevice(id), PADDLE_ENFORCE(cudaSetDevice(id),
"cudaSetDevice failed in paddle::platform::SetDeviceId"); "cudaSetDevice failed in paddle::platform::SetDeviceId");
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册