提交 eeb70edd 编写于 作者: F flame 提交者: ceci3

add anakin fc op converter (#15965)

上级 ab5a6484
......@@ -13,6 +13,16 @@
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
......@@ -23,15 +33,39 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op,
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Out").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
PADDLE_ENFORCE(x_name.size() > 0);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto *y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(y_v);
auto *y_t = y_v->GetMutable<framework::LoDTensor>();
auto shape = framework::vectorize2int(y_t->dims());
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
auto weight_shape = framework::vectorize2int(y_t->dims());
engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "bias_term", false);
engine_->AddOpAttr(op_name, "axis", 1);
int out_dim = weight_shape[1];
engine_->AddOpAttr(op_name, "out_dim", out_dim);
weight_shape.push_back(1);
weight_shape.push_back(1);
Shape anakin_shape(weight_shape);
framework::LoDTensor weight_tensor;
weight_tensor.Resize(y_t->dims());
TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1);
}
} // namespace anakin
......
......@@ -22,14 +22,16 @@ namespace inference {
namespace anakin {
TEST(fc_op, test) {
auto it = OpRegister::instance()->Get("fc");
ASSERT_TRUE(it != nullptr);
auto fc_converter = OpRegister::instance()->Get("fc");
ASSERT_TRUE(fc_converter != nullptr);
// Registrar<FcOpConverter> register_fc("fc");
// auto fc = std::make_shared<FcOpConverter>();
std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("mul_x", {1, 1, 1, 1});
validator.DeclParamVar("mul_y", {1, 1, 1, 2});
validator.DeclParamVar("mul_y", {1, 2});
validator.DeclOutputVar("mul_out", {1, 1, 1, 2});
// Prepare Op description
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
......@@ -127,6 +128,7 @@ class AnakinConvertValidation {
engine_->SetInputShape(input, t_shape);
}
engine_->Optimize();
engine_->InitGraph();
}
// We use the set 'neglected_output' here, because some Ops like batch norm,
......@@ -138,16 +140,47 @@ class AnakinConvertValidation {
platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_);
// std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector;
std::map<std::string, framework::LoDTensor*> inputs;
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto* var = scope_.FindVar(input);
auto tensor = var->GetMutable<framework::LoDTensor>();
inputs.insert({input, tensor});
}
std::map<std::string, framework::LoDTensor*> outputs;
std::vector<std::vector<float>> fluid_outputs;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> fluid_out;
auto* var = scope_.FindVar(output);
auto* tensor = var->GetMutable<framework::LoDTensor>();
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &fluid_out);
fluid_outputs.push_back(fluid_out);
size_t fluid_out_size = fluid_out.size();
for (size_t i = 0; i < fluid_out_size; i++) {
// size_t fluid_out_size = fluid_out.size();
/*for (size_t i = 0; i < fluid_out_size; i++) {
std::cout << fluid_out[i] << std::endl;
}*/
outputs.insert({output, tensor});
}
engine_->Execute(inputs, outputs);
int i_output = 0;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> anakin_out;
auto* var = scope_.FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &anakin_out);
size_t anakin_out_size = anakin_out.size();
auto fluid_out = fluid_outputs[i_output++];
for (size_t i = 0; i < anakin_out_size; i++) {
LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], "
<< "fluid[" << fluid_out[i] << "]";
}
}
}
......
......@@ -46,16 +46,19 @@ class TestAnakinEngine : public ::testing::Test {
void TestAnakinEngine::SetUp() {
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
}
TEST_F(TestAnakinEngine, Execute) {
TEST_F(TestAnakinEngine, Execute) {
engine_->AddOp("op1", "Dense", {"x"}, {"y"});
engine_->AddOpAttr("op1", "out_dim", 2);
engine_->AddOpAttr("op1", "bias_term", false);
engine_->AddOpAttr("op1", "axis", 1);
std::vector<int> shape = {1, 1, 1, 2};
Shape tmp_shape(shape);
// PBlock<NV> weight1(tmp_shape);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(tmp_shape);
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
cpu_data[0] = 2.;
......@@ -64,6 +67,8 @@ void TestAnakinEngine::SetUp() {
engine_->AddOpAttr("op1", "weight_1", *weight1);
engine_->Freeze();
// PTuple<int> input_shape = {1};
// engine_->AddOpAttr("x", "input_shape", input_shape);
engine_->SetInputShape("x", {1, 1, 1, 1});
engine_->Optimize();
engine_->InitGraph();
......@@ -82,11 +87,10 @@ void TestAnakinEngine::SetUp() {
engine_->Execute(inputs, outputs);
auto *y_data_gpu = y_data;
float y_data_cpu[2];
cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2,
cudaMemcpyDeviceToHost);
cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost);
LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1];
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册