提交 73bac0f2 编写于 作者: C chenjiaoAngel

fix conflict, test=develop

上级 40482334
......@@ -583,7 +583,7 @@ void pooling_basic(const float* din,
"vld2.f32 {d0-d3}, [%[dr0]]! @ load \n" \
"vld2.f32 {d4-d7}, [%[dr1]]! @ load \n" \
"vadd.f32 q5, q9, q8 @ max reduce\n" \
"subs %[cnt_num], %[cnt_num], #1 @ subs cnt_num \n" \
"subs %[cnt_num], #1 @ subs cnt_num \n" \
"vmul.f32 q4, q5, %q[vcoef_left] @ mul coef \n" \
"vst1.f32 {d8-d9}, [%[dr_out]]! @ store 4 out \n" \
"ble 2f @ bne\n"
......@@ -1345,19 +1345,16 @@ void pooling2x2s2p1_max(const float* din,
[dr1] "+r"(dr1),
[dr_out] "+r"(dr_out),
[cnt_num] "+r"(cnt_num)
: [vzero] "w" (vzero)
: [vzero] "w"(vzero)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v8");
#else
// cnt_num -= 1;
asm volatile(
P2x2S2_INIT
P2x2S2P1_MAX
P2x2S2P0_MAX "2: \n" /* end */
P2x2S2_INIT P2x2S2P1_MAX P2x2S2P0_MAX "2: \n" /* end */
: [dr0] "+r"(dr0),
[dr1] "+r"(dr1),
[dr_out] "+r"(dr_out),
[cnt_num] "+r"(cnt_num)
: [vzero] "w" (vzero)
: [vzero] "w"(vzero)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q8", "q9");
#endif
dr0 -= 8;
......@@ -1455,7 +1452,8 @@ void pooling2x2s2p1_avg(const float* din,
}
float coef_left_most = exclusive ? coef_h : coef_h / 2;
float32x4_t vcoef = vdupq_n_f32(coef_h / 2);
float coef_left[4] = {coef_left_most, coef_h / 2, coef_h / 2, coef_h / 2};
float coef_left[4] =
{coef_left_most, coef_h / 2, coef_h / 2, coef_h / 2};
float32x4_t vcoef_left = vld1q_f32(coef_left);
int cnt_num = w_unroll_size;
if (w_unroll_size > 0) {
......@@ -1466,7 +1464,9 @@ void pooling2x2s2p1_avg(const float* din,
[dr1] "+r"(dr1),
[dr_out] "+r"(dr_out),
[cnt_num] "+r"(cnt_num)
: [vcoef] "w"(vcoef), [vzero] "w"(vzero), [vcoef_left] "w"(vcoef_left)
: [vcoef] "w"(vcoef),
[vzero] "w"(vzero),
[vcoef_left] "w"(vcoef_left)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v8");
#else
asm volatile(
......@@ -1475,7 +1475,9 @@ void pooling2x2s2p1_avg(const float* din,
[dr1] "+r"(dr1),
[dr_out] "+r"(dr_out),
[cnt_num] "+r"(cnt_num)
: [vcoef] "w"(vcoef), [vzero] "w"(vzero), [vcoef_left] "w"(vcoef_left)
: [vcoef] "w"(vcoef),
[vzero] "w"(vzero),
[vcoef_left] "w"(vcoef_left)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q8", "q9");
#endif
dr0 -= 8;
......@@ -2571,7 +2573,6 @@ void pooling3x3s2p0_max(const float* din,
[dr1] "+r"(dr1),
[dr2] "+r"(dr2),
[dr_out] "+r"(dr_out),
[remain] "+r" (cnt_remain),
[cnt_num] "+r"(cnt_num)
:
: "cc",
......@@ -2633,7 +2634,7 @@ void pooling3x3s2p0_max(const float* din,
[dr1] "+r"(dr1),
[dr2] "+r"(dr2),
[dr_out] "+r"(dr_out),
[remain] "+r" (cnt_remain),
[remain] "+r"(cnt_remain),
[cnt_num] "+r"(cnt_num)
:
: "cc",
......@@ -2650,20 +2651,17 @@ void pooling3x3s2p0_max(const float* din,
"q9",
"q10",
"q11");
#endif
// dr0 -= 8;
// dr1 -= 8;
// dr2 -= 8;
if (right){
if (right) {
int wstart = (w_unroll_size * 4 + remain) * S;
int wend = std::min(wstart + K, win);
float tmp = dr0[wstart];//std::numeric_limits<float>::min();
for(int i = wstart; i < wend; i++){
tmp = std::max(tmp,std::max(dr0[i],dr1[i]));
tmp = std::max(tmp,dr2[i]);
float tmp = dr0[wstart]; //std::numeric_limits<float>::min();
for (int i = wstart; i < wend; i++) {
tmp = std::max(tmp, std::max(dr0[i], dr1[i]));
tmp = std::max(tmp, dr2[i]);
}
*(dr_out++) = tmp;
}
#endif
}
r0 = r2;
......
......@@ -70,8 +70,7 @@ class PoolingPE : public PE {
param_.poolingArgs = args;
// use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1
// &&
// (k_width > 7 || k_height > 7);
// && (k_width > 7 || k_height > 7);
use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 &&
(k_width > 255 || k_height > 255);
// use_cpu_ = param_.type == AVERAGE;
......
......@@ -64,20 +64,6 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
return FAILED;
}
auto padding = op_info->GetAttr<std::vector<int>>("paddings");
bool pads_equal = (padding[0] == padding[1]) && (padding[2] == padding[3]);
if (!pads_equal) {
LOG(FATAL)
<< "padding requires pad_left == pad_right, pad_top == pad_bottom";
}
auto npu_pad =
ge::AttrValue::LIST_INT{padding[0], padding[1], padding[2], padding[3]};
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto npu_stride = ge::AttrValue::LIST_INT(strides.begin(), strides.end());
int npu_ceil_mode = 0;
if (op_info->HasAttr("ceil_mode")) {
npu_ceil_mode = op_info->GetAttr<bool>("ceil_mode") ? 1 : 0;
// pad mode
int pad_mode = 0;
std::string padding_algorithm("");
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/pool_op.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/xpu/bridges/registry.h"
#include "lite/kernels/xpu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
namespace bridges {
void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
auto& in_dims = x->dims();
auto& out_dims = out->dims();
const float* src_ptr = x->data<const float>();
float* dst_ptr = out->mutable_data<float>();
std::vector<int> ksize = op_info->GetAttr<std::vector<int>>("ksize");
std::vector<int> strides = op_info->GetAttr<std::vector<int>>("strides");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
bool exclusive = op_info->GetAttr<bool>("exclusive");
std::string pooling_type = op_info->GetAttr<std::string>("pooling_type");
bool global_pooling = op_info->GetAttr<bool>("global_pooling");
int in_n = in_dims[0];
int in_c = in_dims[1];
int in_h = in_dims[2];
int in_w = in_dims[3];
int size_in_n = in_c * in_h * in_w;
int size_in_c = in_h * in_w;
int out_h = out_dims[2];
int out_w = out_dims[3];
int size_out_n = in_c * out_h * out_w;
int size_out_c = out_h * out_w;
int window_h = ksize[0];
int window_w = ksize[1];
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[2];
if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) {
for (int c = 0; c < in_c; ++c) {
const float* src = src_ptr + n * size_in_n + c * size_in_c;
float res = src[0];
if (pooling_type == "max") {
for (int i = 1; i < size_in_c; ++i) {
float cur_val = src[i];
res = cur_val > res ? cur_val : res;
}
} else if (pooling_type == "avg") {
for (int i = 1; i < size_in_c; ++i) {
float cur_val = src[i];
res += cur_val;
}
res /= size_in_c;
}
dst_ptr[n * size_out_n + c] = res;
}
}
} else {
for (int n = 0; n < in_n; ++n) {
for (int c = 0; c < in_c; ++c) {
for (int h = 0; h < out_h; ++h) {
int sh = h * stride_h;
int eh = sh + window_h;
sh = (sh - pad_h) < 0 ? 0 : sh - pad_h;
eh = (eh - pad_h) > in_h ? in_h : eh - pad_h;
for (int w = 0; w < out_w; ++w) {
int sw = w * stride_w;
int ew = sw + window_w;
sw = (sw - pad_w) < 0 ? 0 : sw - pad_w;
ew = (ew - pad_w) > in_w ? in_w : ew - pad_w;
int pooling_size = (ew - sw) * (eh - sh);
if (pooling_size == 0) continue;
float res = 0.f;
for (int kh = sh; kh < eh; ++kh) {
for (int kw = sw; kw < ew; ++kw) {
int src_idx = n * size_in_n + c * size_in_c + kh * in_w + kw;
if (kh == sh && kw == sw) {
res = src_ptr[src_idx];
} else {
if (pooling_type == "max") {
res = res >= src_ptr[src_idx] ? res : src_ptr[src_idx];
}
if (pooling_type == "avg") {
res += src_ptr[src_idx];
}
}
}
}
if (pooling_type == "avg") {
if (exclusive) {
res /= pooling_size;
} else {
res /= window_h * window_w;
}
}
dst_ptr[n * size_out_n + c * size_out_c + h * out_w + w] = res;
}
}
}
}
}
}
void test_pool(int bs,
int ic,
int ih,
int iw,
std::string pooling_type,
bool ceil_mode,
bool global_pooling,
bool exclusive,
int ksize,
int stride,
int padding) {
// prepare input&output variables
Scope scope;
std::string x_var_name = "x";
std::string out_var_name = "out";
std::string out_ref_var_name = "out_ref";
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize({bs, ic, ih, iw});
// initialize input&output data
FillTensor<float>(x);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("pool2d");
opdesc.SetInput("X", {x_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("pooling_type", pooling_type);
opdesc.SetAttr("ksize", std::vector<int>({ksize, ksize}));
opdesc.SetAttr("global_pooling", global_pooling);
opdesc.SetAttr("exclusive", exclusive);
opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
opdesc.SetAttr("paddings",
std::vector<int>({padding, padding, padding, padding}));
opdesc.SetAttr("ceil_mode", ceil_mode);
// create and convert op to XPU model, then run it on XPU
auto op = CreateOp<operators::PoolOpLite>(opdesc, &scope);
LauchOp(op, {x_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
// execute reference implementation and save to output tensor
pool_ref(op);
// compare results
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5);
}
}
TEST(XPUBridges, pool) {
for (auto pooling_type : {"max", "avg"}) {
for (auto bs : {1, 3}) {
for (auto ic : {2}) {
for (auto ih : {3}) {
for (auto iw : {4}) {
test_pool(bs, ic, ih, iw, pooling_type, true, true, true, 0, 1, 0);
}
}
}
}
}
for (auto pooling_type : {"max"}) {
for (auto ceil_mode : {true, false}) {
for (auto ksize : {2, 3}) {
for (auto stride : {1, 2}) {
for (auto padding : {0, 1}) {
for (auto bs : {1, 3}) {
for (auto ic : {2}) {
for (auto ih : {3}) {
for (auto iw : {4}) {
test_pool(bs,
ic,
ih,
iw,
pooling_type,
ceil_mode,
false,
true,
ksize,
stride,
padding);
}
}
}
}
}
}
}
}
}
for (auto pooling_type : {"avg"}) {
for (auto ceil_mode : {true, false}) {
for (auto exclusive : {true, false}) {
for (auto ksize : {2, 3}) {
for (auto stride : {1, 2}) {
for (auto padding : {0, 1}) {
for (auto bs : {1, 3}) {
for (auto ic : {2}) {
for (auto ih : {3}) {
for (auto iw : {4}) {
test_pool(bs,
ic,
ih,
iw,
pooling_type,
ceil_mode,
false,
exclusive,
ksize,
stride,
padding);
}
}
}
}
}
}
}
}
}
}
}
} // namespace bridges
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(pool2d);
USE_XPU_BRIDGE(pool2d);
......@@ -41,39 +41,6 @@ bool PoolOpLite::CheckShape() const {
return true;
}
inline void UpdatePadding(std::vector<int>* paddings,
const bool global_pooling,
const bool adaptive,
const std::string padding_algorithm,
const lite::DDim data_dims,
const std::vector<int>& strides,
const std::vector<int>& ksize) {
// when padding_algorithm is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (int i = 0; i < strides.size(); ++i) {
int out_size = (data_dims[i + 2] + strides[i] - 1) / strides[i];
int pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2],
(int64_t)0);
int pad_0 = pad_sum / 2;
int pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
// if global_pooling == true or adaptive == true, padding will be ignore
if (global_pooling || adaptive) {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
int PoolOutputSize(int input_size,
int filter_size,
int pad_left,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册