未验证 提交 3c9a1a1d 编写于 作者: H hong19860320 提交者: GitHub

[cherry-pick] Fix build error caused by flatbuffer if the target is...

[cherry-pick] Fix build error caused by flatbuffer if the target is tiny_publish, and fix the missing of the attr name of the output scale (#4356)

* [LITE][XPU] 1. Add sequence_unpad kernel for XPU; 2. Bugfix in sequence_unpad kernel for x86, as InferShapeImpl() is now empty in lite/operators/sequence_unpad_op.cc; 3. Refine TargetWrapperXPU; (#4237)

* [Core] Fix the missing of the attr name of the output scale (#4334)

* [NPU] Fix build error caused by flatbuffer if the target is tiny_publish (#4340)
test=develop
Co-authored-by: NCwndmiao <miaotianxiang@baidu.com>
上级 63e44464
......@@ -18,6 +18,27 @@
namespace paddle {
namespace lite {
void XPUScratchPad::Reserve(size_t new_size) {
if (new_size <= size_) {
return;
}
if (!is_l3_) {
TargetWrapperXPU::Free(addr_);
addr_ = TargetWrapperXPU::Malloc(new_size);
size_ = new_size;
} else {
CHECK(false) << "Not supported if is_l3_ == true";
}
}
void XPUScratchPadDeleter::operator()(XPUScratchPad* sp) const {
if (!sp->is_l3_) {
TargetWrapperXPU::Free(sp->addr_);
}
delete sp;
}
void* TargetWrapperXPU::Malloc(size_t size) {
void* ptr{nullptr};
XPU_CALL(xpu_malloc(&ptr, size));
......@@ -51,7 +72,7 @@ XPUScratchPadGuard TargetWrapperXPU::MallocScratchPad(size_t size,
ptr = TargetWrapperXPU::Malloc(size);
}
CHECK(ptr != nullptr) << "size = " << size << ", use_l3 = " << use_l3;
return XPUScratchPadGuard(new XPUScratchPad(ptr, use_l3));
return XPUScratchPadGuard(new XPUScratchPad(ptr, size, use_l3));
}
std::string TargetWrapperXPU::multi_encoder_precision; // NOLINT
......
......@@ -37,19 +37,19 @@ const int XPU_MAX_LOD_SEQ_LEN = 512;
using TargetWrapperXPU = TargetWrapper<TARGET(kXPU)>;
struct XPUScratchPad {
XPUScratchPad(void* addr, bool is_l3) : addr_(addr), is_l3_(is_l3) {}
XPUScratchPad(void* addr, size_t size, bool is_l3)
: addr_(addr), size_(size), is_l3_(is_l3) {}
// XXX(miaotianxiang): |size_| increases monotonically
void Reserve(size_t new_size);
void* addr_{nullptr};
size_t size_{0};
bool is_l3_{false};
};
struct XPUScratchPadDeleter {
void operator()(XPUScratchPad* sp) const {
if (!sp->is_l3_) {
XPU_CALL(xpu_free(sp->addr_));
}
delete sp;
}
void operator()(XPUScratchPad* sp) const;
};
using XPUScratchPadGuard = std::unique_ptr<XPUScratchPad, XPUScratchPadDeleter>;
......
......@@ -322,6 +322,7 @@ std::vector<float> OpInfo::GetOutputScale(const std::string &name,
int index;
CHECK(GetOutputArgname(name, &argname));
CHECK(GetOutputIndex(name, &index));
scale_name = argname + to_string(index) + "_scale";
}
return GetAttr<std::vector<float>>(scale_name);
}
......
......@@ -60,9 +60,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(output_dims[0], bs);
CHECK_EQ(output_dims[1], oc);
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
bool with_act =
op_info->HasAttr("with_act") && op_info->GetAttr<bool>("with_act");
std::string act_type =
......
......@@ -45,7 +45,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
auto global_pooling = op_info->GetAttr<bool>("global_pooling");
auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
// pool mode
if ((pooling_type == "max") || (pooling_type == "avg")) {
......
......@@ -53,9 +53,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(output_dims[0], bs);
CHECK_EQ(output_dims[1], oc);
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
bool with_act =
op_info->HasAttr("with_act") && op_info->GetAttr<bool>("with_act");
std::string act_type =
......
......@@ -59,8 +59,8 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_size = op_info->GetAttr<std::vector<int>>("output_size");
}
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
CHECK_EQ(dilations.size(), 2L);
std::string padding_algorithm =
op_info->HasAttr("padding_algorithm")
......
......@@ -35,7 +35,7 @@ int Pad2dConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
auto padding = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> padding = op_info->GetAttr<std::vector<int>>("paddings");
CHECK_EQ(padding.size(), 4);
// X node
......
......@@ -39,7 +39,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
auto global_pooling = op_info->GetAttr<bool>("global_pooling");
auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
// X node
std::shared_ptr<Node> x_node = nullptr;
......
......@@ -36,7 +36,7 @@ int ReduceMeanConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x_dims = x->dims();
auto out_name = op_info->Input("Out").front();
auto keep_dim = op_info->GetAttr<bool>("keep_dim");
auto dim = op_info->GetAttr<std::vector<int>>("dim");
std::vector<int> dim = op_info->GetAttr<std::vector<int>>("dim");
CHECK(!dim.empty()) << "[NPU] \"dim\" of reduce_mean should not be empty.";
for (size_t i = 0; i < dim.size(); i++) {
if (dim[i] < 0) {
......
......@@ -51,9 +51,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(output_dims[0], bs);
CHECK_EQ(output_dims[1], oc);
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
auto fuse_relu = op_info->GetAttr<bool>("fuse_relu");
CHECK_EQ(strides.size(), 2L);
CHECK_EQ(dilations.size(), 2L);
......
......@@ -42,7 +42,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
auto global_pooling = op_info->GetAttr<bool>("global_pooling");
auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
// for quantization
bool enable_int8 = false;
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <vector>
#include "lite/backends/x86/math/sequence_padding.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
......@@ -34,6 +35,30 @@ class SequenceUnpadCompute
auto& param = this->template Param<param_t>();
auto& ctx = this->ctx_->template As<X86Context>();
auto x_dims = param.X->dims();
auto len_dims = param.Length->dims();
auto* seq_len_ptr = param.Length->template data<int64_t>();
int64_t batch_size = len_dims[0];
std::vector<uint64_t> out_lod0(batch_size + 1, 0);
for (int64_t i = 0; i < batch_size; ++i) {
out_lod0[i + 1] = out_lod0[i] + seq_len_ptr[i];
}
paddle::lite::LoD out_lod;
out_lod.push_back(out_lod0);
int64_t out_dim0 = out_lod0.back();
std::vector<int64_t> out_dims{out_dim0};
if (x_dims.size() == 2) {
out_dims.push_back(1);
} else {
for (size_t i = 2; i < x_dims.size(); ++i) {
out_dims.push_back(x_dims[i]);
}
}
param.Out->Resize(out_dims);
param.Out->set_lod(out_lod);
param.Out->template mutable_data<T>();
int64_t padded_length = param.X->dims()[1];
math::UnpaddingLoDTensorFunctor<lite::TargetType::kX86, T>()(
......
......@@ -38,6 +38,7 @@ else()
add_kernel(match_matrix_tensor_compute_xpu XPU extra SRCS match_matrix_tensor_compute.cc DEPS ${lite_kernel_deps})
add_kernel(var_conv_2d_compute_xpu XPU extra SRCS var_conv_2d_compute.cc DEPS ${lite_kernel_deps})
add_kernel(search_grnn_compute_xpu XPU extra SRCS search_grnn_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_unpad_compute_xpu XPU extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps})
# extra(fused kernel)
add_kernel(__xpu__resnet50_compute_xpu XPU extra SRCS __xpu__resnet50_compute.cc DEPS ${lite_kernel_deps})
......
......@@ -44,9 +44,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK_EQ(input_dims.size(), 4);
CHECK_EQ(filter_dims.size(), 4);
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
std::vector<int> dilations = op_info->GetAttr<std::vector<int>>("dilations");
auto fuse_relu = op_info->GetAttr<bool>("fuse_relu");
CHECK_EQ(strides.size(), 2L);
CHECK_EQ(dilations.size(), 2L);
......
......@@ -37,7 +37,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto out_name = op_info->Output("Out").front();
auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
auto ceil_mode = op_info->GetAttr<bool>("ceil_mode");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto global_pooling = op_info->GetAttr<bool>("global_pooling");
auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
auto strides = op_info->GetAttr<std::vector<int>>("strides");
......
......@@ -42,6 +42,8 @@ void XPUSequencePoolCompute::Run() {
xdnn::Pooling_t pool_type = xdnn::Pooling_t::MAX_WITHOUT_INDEX;
if (pool_type_str == "MAX") {
} else if (pool_type_str == "SUM") {
pool_type = xdnn::Pooling_t::SUM;
} else if (pool_type_str == "LAST") {
pool_type = xdnn::Pooling_t::LAST;
} else {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/sequence_unpad_compute.h"
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void SequenceUnpadCompute::PrepareForRun() {
lod_xpu_guard_ = TargetWrapperXPU::MallocScratchPad(
XPU_MAX_LOD_SIZE * sizeof(int), false /* use_l3 */);
lod_cpu_.reserve(XPU_MAX_LOD_SIZE);
}
void SequenceUnpadCompute::Run() {
auto& param = this->template Param<param_t>();
auto& ctx = this->ctx_->template As<XPUContext>();
auto x_dims = param.X->dims();
auto len_dims = param.Length->dims();
// XXX(miaotianxiang): Target of tensor |Length| is |kHost|.
auto* seq_len_ptr = param.Length->template data<int64_t>();
int64_t batch_size = len_dims[0];
std::vector<uint64_t> out_lod0(batch_size + 1, 0);
for (int64_t i = 0; i < batch_size; ++i) {
out_lod0[i + 1] = out_lod0[i] + seq_len_ptr[i];
}
paddle::lite::LoD out_lod;
out_lod.push_back(out_lod0);
int64_t out_dim0 = out_lod0.back();
std::vector<int64_t> out_dims{out_dim0};
if (x_dims.size() == 2) {
out_dims.push_back(1);
} else {
for (size_t i = 2; i < x_dims.size(); ++i) {
out_dims.push_back(x_dims[i]);
}
}
param.Out->Resize(out_dims);
param.Out->set_lod(out_lod);
lod_cpu_ = {0};
for (int64_t i = 0; i < batch_size; ++i) {
int offset =
lod_cpu_.back() + static_cast<int>(param.Length->data<int64_t>()[i]);
lod_cpu_.push_back(offset);
}
lod_xpu_guard_->Reserve((batch_size + 1) * sizeof(int));
TargetWrapperXPU::MemcpySync(lod_xpu_guard_->addr_,
lod_cpu_.data(),
(batch_size + 1) * sizeof(int),
IoDirection::HtoD);
int dim = param.Out->numel() / out_dim0;
int r = xdnn::sequence_unpad(
ctx.GetRawContext(), /* ctx */
param.X->data<float>(), /* pad_data */
param.Out->mutable_data<float>(TARGET(kXPU)), /* seq_data */
reinterpret_cast<int*>(lod_xpu_guard_->addr_), /* sequence */
param.X->dims()[1], /* pad_seq_len */
batch_size, /* batch_size */
dim /* dim */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(sequence_unpad,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::SequenceUnpadCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Length",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <vector>
#include "lite/backends/xpu/target_wrapper.h" // XPUScratchPadGuard
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class SequenceUnpadCompute
: public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::SequenceUnpadParam;
void PrepareForRun() override;
void Run() override;
private:
XPUScratchPadGuard lod_xpu_guard_;
std::vector<int> lod_cpu_;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册