diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index 8b00b7e791f023468a4913a0946424aa38e6e39f..33a5aff1de2851ad55c2df83cc48ba86f8ded754 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -13,6 +13,16 @@ // limitations under the License. #include "paddle/fluid/inference/anakin/convert/fc.h" +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; namespace paddle { namespace inference { @@ -23,15 +33,39 @@ void FcOpConverter::operator()(const framework::proto::OpDesc &op, framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Out").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); auto x_name = op_desc.Input("X").front(); - PADDLE_ENFORCE(x_name.size() > 0); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto *y_v = scope.FindVar(op_desc.Input("Y").front()); PADDLE_ENFORCE_NOT_NULL(y_v); auto *y_t = y_v->GetMutable(); - auto shape = framework::vectorize2int(y_t->dims()); + auto input_name = op_desc.Input("X").front(); + auto output_name = op_desc.Output("Out").front(); + + auto weight_shape = framework::vectorize2int(y_t->dims()); + engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); + engine_->AddOpAttr(op_name, "bias_term", false); + engine_->AddOpAttr(op_name, "axis", 1); + int out_dim = weight_shape[1]; + engine_->AddOpAttr(op_name, "out_dim", out_dim); + + weight_shape.push_back(1); + weight_shape.push_back(1); + Shape anakin_shape(weight_shape); + + framework::LoDTensor weight_tensor; + weight_tensor.Resize(y_t->dims()); + TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor); + + auto *weight1 = + GraphGlobalMem::Global().template new_block(anakin_shape); + float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); + std::copy_n(weight_tensor.data(), weight_tensor.numel(), cpu_data); + weight1->d_tensor().set_shape(anakin_shape); + weight1->d_tensor().copy_from(weight1->h_tensor()); + engine_->AddOpAttr(op_name, "weight_1", *weight1); } } // namespace anakin diff --git a/paddle/fluid/inference/anakin/convert/test_fc_op.cc b/paddle/fluid/inference/anakin/convert/test_fc_op.cc index a10b1423547e854f18e9c1f49c57aaf31eb1981c..7b8ceefe28873f0ffb9cedbb04b832ba029b7de4 100644 --- a/paddle/fluid/inference/anakin/convert/test_fc_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc @@ -22,14 +22,16 @@ namespace inference { namespace anakin { TEST(fc_op, test) { - auto it = OpRegister::instance()->Get("fc"); - ASSERT_TRUE(it != nullptr); + auto fc_converter = OpRegister::instance()->Get("fc"); + ASSERT_TRUE(fc_converter != nullptr); + // Registrar register_fc("fc"); + // auto fc = std::make_shared(); std::unordered_set parameters({"mul_y"}); framework::Scope scope; AnakinConvertValidation validator(parameters, scope); validator.DeclInputVar("mul_x", {1, 1, 1, 1}); - validator.DeclParamVar("mul_y", {1, 1, 1, 2}); + validator.DeclParamVar("mul_y", {1, 2}); validator.DeclOutputVar("mul_out", {1, 1, 1, 2}); // Prepare Op description diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h index d4acce3d26fd3208a02bad5b42d02958faf0d3f2..38d8e596a738ac98c9f9870473f72dcc72b0e7aa 100644 --- a/paddle/fluid/inference/anakin/convert/ut_helper.h +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include #include @@ -127,6 +128,7 @@ class AnakinConvertValidation { engine_->SetInputShape(input, t_shape); } engine_->Optimize(); + engine_->InitGraph(); } // We use the set 'neglected_output' here, because some Ops like batch norm, @@ -138,16 +140,47 @@ class AnakinConvertValidation { platform::CUDADeviceContext ctx(place_); op_->Run(scope_, place_); + // std::vector input_vector; + // std::vector output_vector; + std::map inputs; + for (const auto& input : op_desc_->InputArgumentNames()) { + if (parameters_.count(input)) continue; + auto* var = scope_.FindVar(input); + auto tensor = var->GetMutable(); + inputs.insert({input, tensor}); + } + + std::map outputs; + std::vector> fluid_outputs; for (const auto& output : op_desc_->OutputArgumentNames()) { if (neglected_output.count(output)) continue; std::vector fluid_out; auto* var = scope_.FindVar(output); - auto* tensor = var->GetMutable(); + auto tensor = var->GetMutable(); framework::TensorToVector(*tensor, ctx, &fluid_out); + fluid_outputs.push_back(fluid_out); - size_t fluid_out_size = fluid_out.size(); - for (size_t i = 0; i < fluid_out_size; i++) { + // size_t fluid_out_size = fluid_out.size(); + /*for (size_t i = 0; i < fluid_out_size; i++) { std::cout << fluid_out[i] << std::endl; + }*/ + outputs.insert({output, tensor}); + } + + engine_->Execute(inputs, outputs); + int i_output = 0; + for (const auto& output : op_desc_->OutputArgumentNames()) { + if (neglected_output.count(output)) continue; + std::vector anakin_out; + auto* var = scope_.FindVar(output); + auto tensor = var->GetMutable(); + framework::TensorToVector(*tensor, ctx, &anakin_out); + + size_t anakin_out_size = anakin_out.size(); + auto fluid_out = fluid_outputs[i_output++]; + for (size_t i = 0; i < anakin_out_size; i++) { + LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], " + << "fluid[" << fluid_out[i] << "]"; } } } diff --git a/paddle/fluid/inference/anakin/test_anakin_engine.cc b/paddle/fluid/inference/anakin/test_anakin_engine.cc index 8451a333bb85224451f5e559bd7bddf4134bce99..571294d3e22fb9489686bfcb2f3a64198099f970 100644 --- a/paddle/fluid/inference/anakin/test_anakin_engine.cc +++ b/paddle/fluid/inference/anakin/test_anakin_engine.cc @@ -46,47 +46,51 @@ class TestAnakinEngine : public ::testing::Test { void TestAnakinEngine::SetUp() { engine_.reset(new AnakinEngine(true)); +} + +TEST_F(TestAnakinEngine, Execute) { + engine_->AddOp("op1", "Dense", {"x"}, {"y"}); + engine_->AddOpAttr("op1", "out_dim", 2); + engine_->AddOpAttr("op1", "bias_term", false); + engine_->AddOpAttr("op1", "axis", 1); + std::vector shape = {1, 1, 1, 2}; + Shape tmp_shape(shape); + // PBlock weight1(tmp_shape); + auto *weight1 = + GraphGlobalMem::Global().template new_block(tmp_shape); + // auto *weight1 = new PBlock(tmp_shape, AK_FLOAT); + + float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); + cpu_data[0] = 2.; + weight1->d_tensor().set_shape(tmp_shape); + weight1->d_tensor().copy_from(weight1->h_tensor()); + engine_->AddOpAttr("op1", "weight_1", *weight1); - TEST_F(TestAnakinEngine, Execute) { - engine_->AddOp("op1", "Dense", {"x"}, {"y"}); - engine_->AddOpAttr("op1", "out_dim", 2); - engine_->AddOpAttr("op1", "bias_term", false); - engine_->AddOpAttr("op1", "axis", 1); - std::vector shape = {1, 1, 1, 2}; - Shape tmp_shape(shape); - auto *weight1 = - GraphGlobalMem::Global().template new_block(tmp_shape); - - float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); - cpu_data[0] = 2.; - weight1->d_tensor().set_shape(tmp_shape); - weight1->d_tensor().copy_from(weight1->h_tensor()); - engine_->AddOpAttr("op1", "weight_1", *weight1); - - engine_->Freeze(); - engine_->SetInputShape("x", {1, 1, 1, 1}); - engine_->Optimize(); - engine_->InitGraph(); - framework::LoDTensor x; - framework::LoDTensor y; - x.Resize({1, 1, 1, 1}); - y.Resize({1, 1, 1, 2}); - auto *x_data = x.mutable_data(platform::CUDAPlace()); - float x_data_cpu[] = {1.}; - cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice); - - std::map inputs = {{"x", &x}}; - auto *y_data = y.mutable_data(platform::CUDAPlace()); - std::map outputs = {{"y", &y}}; - - engine_->Execute(inputs, outputs); - auto *y_data_gpu = y_data; - float y_data_cpu[2]; - cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, - cudaMemcpyDeviceToHost); - LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1]; - } + engine_->Freeze(); + // PTuple input_shape = {1}; + // engine_->AddOpAttr("x", "input_shape", input_shape); + engine_->SetInputShape("x", {1, 1, 1, 1}); + engine_->Optimize(); + engine_->InitGraph(); + framework::LoDTensor x; + framework::LoDTensor y; + x.Resize({1, 1, 1, 1}); + y.Resize({1, 1, 1, 2}); + auto *x_data = x.mutable_data(platform::CUDAPlace()); + float x_data_cpu[] = {1.}; + cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice); + + std::map inputs = {{"x", &x}}; + auto *y_data = y.mutable_data(platform::CUDAPlace()); + std::map outputs = {{"y", &y}}; + + engine_->Execute(inputs, outputs); + auto *y_data_gpu = y_data; + float y_data_cpu[2]; + cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost); + LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1]; } + } // namespace anakin } // namespace inference } // namespace paddle