// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include "lite/core/context.h" #include "lite/core/target_wrapper.h" #include "lite/operators/op_params.h" namespace paddle { namespace lite { namespace arm { namespace math { /// conv 3x3s1 size_t conv3x3s1_direct_workspace_size(const operators::ConvParam& param, ARMContext* ctx); void conv_3x3s1_direct_fp32(const float* din, float* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx); template void conv_3x3s1_direct_int8(const int8_t* din, Dtype* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const int8_t* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); /// conv3x3s2 size_t conv3x3s2_direct_workspace_size(const operators::ConvParam& param, ARMContext* ctx); void conv_3x3s2_direct_fp32(const float* din, float* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx); int conv_3x3s2_direct_int8_c_num(); template void conv_3x3s2_direct_int8(const int8_t* din, Dtype* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const int8_t* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); void conv_1x5s1_direct(const float* din, float* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weights, const float* bias, int group, int kernel_w, int kernel_h, int stride_w, int stride_h, int dila_w, int dila_h, int pad_w, int pad_h, bool flag_bias, bool flag_relu, ARMContext& ctx); // NOLINT void conv_5x1s1_direct(const float* din, float* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weights, const float* bias, int group, int kernel_w, int kernel_h, int stride_w, int stride_h, int dila_w, int dila_h, int pad_w, int pad_h, bool flag_bias, bool flag_relu, ARMContext& ctx); // NOLINT void conv1x1s1_gemm(const float* din, float* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx); template void conv1x1s1_gemm_int8(const int8_t* din, Dtype* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const int8_t* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); void conv_im2col_gemm(const float* din, float* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx); template void conv_im2col_gemm_int8(const int8_t* din, Dtype* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const int8_t* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); /// depthwise conv void conv_depthwise_3x3_fp32(const void* din, void* dout, int num, int ch_out, int h_out, int w_out, int ch_in, int h_in, int w_in, const void* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); void conv_depthwise_3x3_int8_fp32(const void* din, void* dout, int num, int ch_out, int h_out, int w_out, int ch_in, int h_in, int w_in, const void* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); void conv_depthwise_3x3_int8_int8(const void* din, void* dout, int num, int ch_out, int h_out, int w_out, int ch_in, int h_in, int w_in, const void* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); void conv_depthwise_5x5_fp32(const void* din, void* dout, int num, int ch_out, int h_out, int w_out, int ch_in, int h_in, int w_in, const void* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); void conv_depthwise_5x5_int8_fp32(const void* din, void* dout, int num, int ch_out, int h_out, int w_out, int ch_in, int h_in, int w_in, const void* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); void conv_depthwise_5x5_int8_int8(const void* din, void* dout, int num, int ch_out, int h_out, int w_out, int ch_in, int h_in, int w_in, const void* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx, const float* scale); /// winograd conv, only support 3x3s1 void conv_winograd3x3(const float* din, float* dout, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weights, const float* bias, const operators::ConvParam& param, ARMContext* ctx); void winograd_transform_weights( void* dout, const void* din, int ch_out, int ch_in, void* work_space); void fill_bias(float* tensor, const float* bias, int channel, int channel_size); void fill_bias_int8(int* tensor, const int* bias, int channel, int channel_size); // new winograd void weight_trans_c4_8x8( float* dest, const float* src, int ic, int oc, void* workspace); void weight_trans_c4_4x4( float* dest, const float* src, int ic, int oc, void* workspace); void conv_compute_6x6_3x3(const float* input, float* output, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weight, const float* bias, const operators::ConvParam& param, ARMContext* ctx); void conv_compute_2x2_3x3(const float* input, float* output, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weight, const float* bias, const operators::ConvParam& param, ARMContext* ctx); void conv_compute_2x2_3x3_small(const float* input, float* output, int num, int chout, int hout, int wout, int chin, int hin, int win, const float* weight, const float* bias, const operators::ConvParam& param, ARMContext* ctx); void input_trans_c8_4x4_int8(const int8_t* src, int src_stride, int src_h_stride, int16_t* dest, int dest_stride, int dest_h_stride); void output_trans_c8_post_2x4_int8(const int32_t* src, int src_stride, int src_h_stride, int32_t* dest, int dest_stride, int dest_h_stride); void weight_trans_c8_4x4_int8( int16_t* dest, const int8_t* src, int ic, int oc, void* workspace); template void conv_compute_2x2_3x3_int8(const int8_t* input, Dtype* output, int num, int chout, int hout, int wout, int chin, int hin, int win, const int16_t* weight, const float* bias, const float* scale, const operators::ConvParam& param, ARMContext* ctx); template void im2col(const Dtype* data_im, int channels, int height, int width, int kernel_h, int kernel_w, int pad_top, int pad_bottom, int pad_left, int pad_right, int stride_h, int stride_w, int dilation_h, int dilation_w, Dtype* data_col); } // namespace math } // namespace arm } // namespace lite } // namespace paddle