提交 ad5087c9 编写于 作者: H hjchen2

Optimize int8 depthwise conv

上级 de37013f
...@@ -88,8 +88,8 @@ template <> ...@@ -88,8 +88,8 @@ template <>
inline int8_t Round<ROUND_NEAREST_TO_EVEN>(const float &x) { inline int8_t Round<ROUND_NEAREST_TO_EVEN>(const float &x) {
float v = std::round(x); float v = std::round(x);
int32_t q = static_cast<int32_t>(v); int32_t q = static_cast<int32_t>(v);
if (abs(abs(q - v) - 0.5) <= 0) { if (std::abs(std::abs(q - v) - 0.5) <= 0) {
if (abs(q) % 2 != 0) { if (std::abs(q) % 2 != 0) {
q = q + ((q > 0) ? -1 : 1); q = q + ((q > 0) ? -1 : 1);
} }
} }
......
...@@ -180,10 +180,10 @@ inline void DepthwiseConv3x3(const ConvParam<CPU> &param) { ...@@ -180,10 +180,10 @@ inline void DepthwiseConv3x3(const ConvParam<CPU> &param) {
Tensor in_batch = input->Slice(i, i + 1); Tensor in_batch = input->Slice(i, i + 1);
Tensor out_batch = output->Slice(i, i + 1); Tensor out_batch = output->Slice(i, i + 1);
if (strides[0] == 1) { if (strides[0] == 1) {
math::DepthwiseConv3x3s1<Itype, Otype>(in_batch, *filter, paddings, math::DepthwiseConv3x3S1<Itype, Otype>(in_batch, *filter, paddings,
&out_batch); &out_batch);
} else if (strides[0] == 2) { } else if (strides[0] == 2) {
math::DepthwiseConv3x3s2<Itype, Otype>(in_batch, *filter, paddings, math::DepthwiseConv3x3S2<Itype, Otype>(in_batch, *filter, paddings,
&out_batch); &out_batch);
} else { } else {
// math::DepthwiseConv3x3<Itype, Otype>(input_pad, *filter, // math::DepthwiseConv3x3<Itype, Otype>(input_pad, *filter,
......
...@@ -74,13 +74,13 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input, ...@@ -74,13 +74,13 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
// framework::Tensor *output); // framework::Tensor *output);
template <typename Itype, typename Otype> template <typename Itype, typename Otype>
void DepthwiseConv3x3s1(const framework::Tensor &input, void DepthwiseConv3x3S1(const framework::Tensor &input,
const framework::Tensor &filter, const framework::Tensor &filter,
const std::vector<int> &paddings, const std::vector<int> &paddings,
framework::Tensor *output); framework::Tensor *output);
template <typename Itype, typename Otype> template <typename Itype, typename Otype>
void DepthwiseConv3x3s2(const framework::Tensor &input, void DepthwiseConv3x3S2(const framework::Tensor &input,
const framework::Tensor &filter, const framework::Tensor &filter,
const std::vector<int> &paddings, const std::vector<int> &paddings,
framework::Tensor *output); framework::Tensor *output);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册