提交 b0f4eae6 编写于 作者: J jackzhang235

Merge branch 'Add_GatherOp' into develop

......@@ -14,7 +14,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: modity build.sh
run: sed -i 's/DLITE_WITH_PYTHON=ON/DLITE_WITH_PYTHON=OFF/' lite/tools/build_mlu.sh && sed -i 's/WITH_TESTING=OFF/WITH_TESTING=ON/' lite/tools/build_mlu.sh && sed -i 's/PRINT_HW_TIME false/PRINT_HW_TIME true/' lite/kernels/mlu/bridges/graph.h
run: sed -i 's/DLITE_WITH_PYTHON=ON/DLITE_WITH_PYTHON=OFF/' lite/tools/build_mlu.sh && sed -i 's/WITH_TESTING=OFF/WITH_TESTING=ON/' lite/tools/build_mlu.sh && sed -i 's/PRINT_HW_TIME false/PRINT_HW_TIME true/' lite/kernels/mlu/bridges/graph.h && sed -i 's/BUILD_EXTRA=OFF/BUILD_EXTRA=ON/' lite/tools/build_mlu.sh
- name: build
run: ./lite/tools/build_mlu.sh build
- name: test_act_converter_mlu
......@@ -47,6 +47,10 @@ jobs:
run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_argmax_converter_mlu
- name: test_split_converter_mlu
run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_split_converter_mlu
- name: test_lrn_converter_mlu
run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_lrn_converter_mlu
- name: test_gather_converter_mlu
run: ./build.lite.mlu/lite/kernels/mlu/bridges/test_gather_converter_mlu
- name: test_classification
run: |
cd ..
......
......@@ -50,6 +50,8 @@ set(mlu_subgraph_bridges
if (LITE_BUILD_EXTRA)
lite_cc_library(subgraph_bridge_lrn_op_mlu SRCS lrn_op.cc DEPS ${subgraph_bridge_deps_mlu})
list(APPEND mlu_subgraph_bridges subgraph_bridge_lrn_op_mlu)
lite_cc_library(subgraph_bridge_gather_op_mlu SRCS gather_op.cc DEPS ${subgraph_bridge_deps_mlu})
list(APPEND mlu_subgraph_bridges subgraph_bridge_gather_op_mlu)
endif()
lite_cc_library(subgraph_test_helper_mlu SRCS test_helper.cc DEPS ${mlu_subgraph_bridges})
......@@ -71,5 +73,6 @@ lite_cc_test(test_argmax_converter_mlu SRCS argmax_op_test.cc DEPS scope optimiz
lite_cc_test(test_squeeze_converter_mlu SRCS squeeze_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
if (LITE_BUILD_EXTRA)
lite_cc_test(test_lrn_converter_mlu SRCS lrn_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test(test_gather_converter_mlu SRCS gather_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
endif()
message(STATUS "+++++ mlu_subgraph_bridges: ${mlu_subgraph_bridges}")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/mlu/bridges/graph.h"
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
int GatherConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[MLU] Converting " + op_type + "...";
auto x_var_name = op_info->Input("X").front();
auto index_var_name = op_info->Input("Index").front();
auto out_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(out_var_name)->GetMutable<Tensor>();
auto output_dims = output->dims().Vectorize();
CHECK(graph->HasNode(x_var_name));
auto x_tensor = graph->GetNode(x_var_name);
auto index_tensor = graph->GetNode(index_var_name);
auto output_tensor = graph->AddNode(
out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType());
cnmlBaseOp_t gather_op;
CNML_CALL(cnmlCreateGatherV2Op(&gather_op,
x_tensor->mlu_tensor(),
index_tensor->mlu_tensor(),
output_tensor->mlu_tensor(),
CNML_DIM_N));
graph->FuseOp(gather_op);
CNML_CALL(cnmlDestroyBaseOp(&gather_op));
return SUCCESS;
}
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(gather,
kMLU,
paddle::lite::subgraph::mlu::GatherConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/gather_op.h"
#include <gtest/gtest.h>
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/mlu/bridges/test_helper.h"
#include "lite/kernels/mlu/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace mlu {
template <typename dtype>
void gather_ref(const std::shared_ptr<operators::GatherOp> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto index =
scope->FindVar(op_info->Input("Index").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
auto x_dims = x->dims();
auto index_dims = index->dims();
CHECK(index_dims.size() == 1 ||
(index_dims.size() == 2 && index_dims[1] == 1));
int batch_size = index_dims[0];
DDim out_dims = x_dims;
out_dims[0] = batch_size;
out->Resize(out_dims);
auto x_data = x->data<float>();
auto index_data = index->data<int>();
auto out_data = out->mutable_data<float>();
auto slice_num = x_dims[0];
auto slice_size = x_dims.Slice(1, x_dims.size()).production();
for (int i = 0; i < batch_size; i++) {
auto index = index_data[i];
CHECK_LT(index, slice_num) << "index <= slice_num";
CHECK_GE(index, 0) << "index > 0";
memcpy(out_data + i * slice_size,
x_data + index * slice_size,
slice_size * sizeof(float));
}
}
void test_gather() {
// prepare input&output variables
std::string x_var_name = "x";
std::string out_var_name = "out";
std::string out_ref_var_name = "out_ref";
std::string index_var_name = "index";
Scope scope;
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
auto* index = scope.Var(index_var_name)->GetMutable<Tensor>();
x->Resize({5, 4, 3, 2});
index->Resize({2});
// initialize input&output data
FillTensor<float>(x);
FillTensor<int>(index, 1, 3);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("gather");
opdesc.SetInput("X", {x_var_name});
opdesc.SetInput("Index", {index_var_name});
opdesc.SetOutput("Out", {out_var_name});
auto op = CreateOp<operators::GatherOp>(opdesc, &scope);
gather_ref<float>(op);
out_ref->CopyDataFrom(*out);
Tensor input;
input.Resize({5, 4, 3, 2});
transpose<float*>(x->mutable_data<float>(),
input.mutable_data<float>(),
{static_cast<int>(5),
static_cast<int>(4),
static_cast<int>(3),
static_cast<int>(2)},
{0, 2, 3, 1});
x->CopyDataFrom(input);
LaunchOp(op, {x_var_name, index_var_name}, {out_var_name});
// compare results
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
Tensor output;
output.Resize(out->dims());
transpose<float*>(out_data,
output.mutable_data<float>(),
{static_cast<int>(out->dims()[0]),
static_cast<int>(out->dims()[2]),
static_cast<int>(out->dims()[3]),
static_cast<int>(out->dims()[1])},
{0, 3, 1, 2});
out_data = output.mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
VLOG(5) << i;
EXPECT_NEAR(out_data[i], out_ref_data[i], 5e-4);
}
}
TEST(MLUBridges, gather) { test_gather(); }
} // namespace mlu
} // namespace subgraph
} // namespace lite
} // namespace paddle
USE_SUBGRAPH_BRIDGE(gather, kMLU);
......@@ -38,5 +38,6 @@ USE_SUBGRAPH_BRIDGE(slice, kMLU);
USE_SUBGRAPH_BRIDGE(squeeze, kMLU);
USE_SUBGRAPH_BRIDGE(squeeze2, kMLU);
#ifdef LITE_BUILD_EXTRA
USE_SUBGRAPH_BRIDGE(gather, kMLU);
USE_SUBGRAPH_BRIDGE(lrn, kMLU)
#endif
......@@ -50,23 +50,54 @@ void LaunchOp(const std::shared_ptr<lite::OpLite> op,
// Convert input data var and add it into the MLU IR graph
for (auto& input_name : input_var_names) {
auto input_tensor = scope->FindMutableTensor(input_name);
auto data_type = input_tensor->precision();
cnmlDataType_t fp_type;
switch (data_type) {
case paddle::lite_api::PrecisionType::kFP16:
fp_type = CNML_DATA_FLOAT16;
break;
case paddle::lite_api::PrecisionType::kFloat:
fp_type = CNML_DATA_FLOAT32;
break;
case paddle::lite_api::PrecisionType::kInt32:
fp_type = CNML_DATA_INT32;
break;
default:
CHECK(0);
}
CHECK(input_tensor);
Tensor temp_input;
temp_input.Resize(input_tensor->dims().Vectorize());
temp_input.CopyDataFrom(*input_tensor);
auto input_node =
graph.AddNode(input_name,
input_tensor->dims().Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph.FPType(),
reinterpret_cast<void*>(
input_tensor->mutable_data<float>(TARGET(kMLU))));
CHECK(input_node);
CNRT_CHECK(cnrtMemcpy(input_tensor->mutable_data<float>(),
temp_input.mutable_data<float>(),
sizeof(float) * input_tensor->dims().production(),
CNRT_MEM_TRANS_DIR_HOST2DEV));
if (fp_type == CNML_DATA_INT32) {
auto input_node =
graph.AddNode(input_name,
input_tensor->dims().Vectorize(),
CNML_TENSOR,
CNML_NCHW,
fp_type,
reinterpret_cast<void*>(
input_tensor->mutable_data<int>(TARGET(kMLU))));
CHECK(input_node);
CNRT_CHECK(cnrtMemcpy(input_tensor->mutable_data<int>(),
temp_input.mutable_data<int>(),
sizeof(int) * input_tensor->dims().production(),
CNRT_MEM_TRANS_DIR_HOST2DEV));
} else {
auto input_node =
graph.AddNode(input_name,
input_tensor->dims().Vectorize(),
CNML_TENSOR,
CNML_NCHW,
fp_type,
reinterpret_cast<void*>(
input_tensor->mutable_data<float>(TARGET(kMLU))));
CHECK(input_node);
CNRT_CHECK(cnrtMemcpy(input_tensor->mutable_data<float>(),
temp_input.mutable_data<float>(),
sizeof(float) * input_tensor->dims().production(),
CNRT_MEM_TRANS_DIR_HOST2DEV));
}
}
op->CheckShape();
op->InferShape();
......
......@@ -100,6 +100,21 @@ class SubgraphEngine : public subgraph::Engine {
return true;
}
inline cnmlDataType_t PrecisionToDatatype(PrecisionType data_type) {
switch (data_type) {
case paddle::lite_api::PrecisionType::kFP16:
return CNML_DATA_FLOAT16;
case paddle::lite_api::PrecisionType::kFloat:
return CNML_DATA_FLOAT32;
case paddle::lite_api::PrecisionType::kInt32:
return CNML_DATA_INT32;
case paddle::lite_api::PrecisionType::kInt8:
return CNML_DATA_INT8;
default:
return PrecisionToDatatype(fp_type_);
}
}
protected:
int BuildDeviceProgram() override {
int status = 0;
......@@ -113,6 +128,8 @@ class SubgraphEngine : public subgraph::Engine {
status |= subgraph::REBUILD_WHEN_SHAPE_CHANGED;
for (auto& input_name : input_names_) {
auto input_tensor = scope_->FindMutableTensor(input_name);
auto data_type = input_tensor->precision();
cnmlDataType_t fp_type = PrecisionToDatatype(data_type);
origin_itensors_.push_back(input_tensor);
if (GetBoolFromEnv("BATCH_SIZE_CHANGEABLE")) {
auto iv = input_tensor->dims().Vectorize();
......@@ -127,7 +144,7 @@ class SubgraphEngine : public subgraph::Engine {
input_tensor->dims().Vectorize(),
CNML_TENSOR,
CNML_NCHW,
graph->FPType());
fp_type);
CHECK(input_node);
// MLU doesn't support dynamic dimensions/shapes, so need to rebuild
// the program when the shape of any input tensor is changed.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册