提交 69564072 编写于 作者: Z ZhenWang

update the usage of matmul

上级 4b9cace6
develop _release/v2.6.2 gh-pages github/fork/AnBaolei1984/baolei/bitmain github/fork/AnBaolei1984/bitmain github/fork/Cambricon/develop github/fork/DannyIsFunny/Android5 github/fork/DannyIsFunny/Update_MemOpt github/fork/DannyIsFunny/fix_pow github/fork/DannyIsFunny/fix_v26_windows github/fork/GaoWei8/reduce_sum github/fork/GaoWei8/reduce_sum_test_con github/fork/LDOUBLEV/ocr github/fork/Leonardo-Ding/dwh_dev github/fork/MaxwellDing/develop github/fork/MyPandaShaoxiang/fpga_patch github/fork/MyPandaShaoxiang/int8 github/fork/MyPandaShaoxiang/nlp_correct github/fork/MyPandaShaoxiang/opencl_valid github/fork/MyPandaShaoxiang/release/v2.3 github/fork/NHZlX/more_jeston_support github/fork/PaddleLite-EB/merge1.4 github/fork/PaddleLite-EB/new_dev github/fork/Wangzheee/matrix_nms_op github/fork/Xreki/step_rnn/opt_ddim_lite github/fork/cathwong/patch-1 github/fork/cclauss/patch-1 github/fork/chenjiaoAngel/cherry_pic github/fork/chenjiaoAngel/conv_dw_5x5 github/fork/chenjiaoAngel/conv_dw_5x5s2 github/fork/edimetia3d/arm_update_elementwise_op github/fork/edimetia3d/host_deformable_conv github/fork/edimetia3d/matrix_nms_host github/fork/edimetia3d/update_pow_op github/fork/edimetia3d/update_yolo_box github/fork/haozech/develop github/fork/haozech/infershape_chz github/fork/haozech/parl-develop github/fork/jackzhang235/develop github/fork/jameswu2014/develop github/fork/jiansowa/jiansowa/img_nna github/fork/jiweibo/stream_manage github/fork/juncaipeng/add_cast github/fork/lijianshe02/lite-x86 github/fork/qili93/update_sup_model_v26 github/fork/qjing666/develop github/fork/qnqinan/develop github/fork/qnqinan/track-develop github/fork/sangoly/python_compa github/fork/smilejames/develop github/fork/sunsetlh/sunsetlh/xpu_multi_test github/fork/wangqunbaidu/develop github/fork/weihaoji/whj_27 github/fork/weihaoji/xpu_res2net_fusion github/fork/weihaoji/xpu_weihaoji_dev github/fork/xiebaiyuan/fix_leak_opencl github/fork/xiebaiyuan/opencl_depthwised1 github/fork/xiebaiyuan/opencl_softmax github/fork/yanghongtian/yanghongtian/add_ascend310_target_place github/fork/yiicy/computelib github/fork/yongqiangma/bm_card github/fork/yongqiangma/calib github/fork/yongqiangma/copytocpu github/fork/yongqiangma/gpu github/fork/yongqiangma/pass github/fork/yongqiangma/pool github/fork/yongqiangma/priorbox github/fork/yongqiangma/shape github/fork/yongqiangma/split_c github/fork/yongqiangma/trans github/fork/yongqiangma/trans2 github/fork/yongqiangma/workspace github/fork/ysh329/add-cl-kernel-member-for-opencl github/fork/ysh329/add-get-output github/fork/ysh329/cherry-pick-precision-profiler-enhance github/fork/ysh329/fix-opencl-concat github/fork/ysh329/support-int64-copy-from-to-cpu github/fork/zhaoyang-star/enable_prifile_in_tiny_publish github/fork/zhaoyang-star/fix_openc_demo github/fork/zhaoyang-star/patch-1 github/fork/zhupengyang/opt release/v2.0.0 release/v2.0.0-beta1 release/v2.0.0-beta2 release/v2.0.0-rc release/v2.1.0 release/v2.2.0 release/v2.3 release/v2.6 release/v2.6.0 release/v2.7 revert-4368-hongming/test_v26 2.0.0-beta 1.5.0 v2.7-beta v2.6.3-beta2 v2.6.3-beta1 v2.6.2 v2.6.1 v2.6.0 v2.3.0 v2.2.0 v2.1.0 v2.0.0 v2.0.0-rc v2.0.0-beta1 v2.0.0-beta1-prerel release/1.4
5 合并请求!3489pull code,!3210[Opencl] fix opencl bug,!3154[arm]resize nnv12 bug,!3074[opencl]add grid_sampler op,!1334Add pooling int8
......@@ -73,13 +73,14 @@ void MulCompute(const MulParam<CPU> &param) {
}
if (param.InputX()->type() == typeid(int8_t)) {
out->mutable_data<int32_t>();
math::matmul(x_matrix, false, y_matrix, false, static_cast<float>(1), out,
static_cast<float>(0), false, static_cast<int32_t *>(nullptr));
math::matmul<float, int32_t>(x_matrix, false, y_matrix, false,
static_cast<float>(1), out,
static_cast<float>(0));
} else {
out->mutable_data<float>();
math::matmul(x_matrix, false, y_matrix, false, static_cast<float>(1), out,
static_cast<float>(0), false, static_cast<float *>(nullptr));
math::matmul<float>(x_matrix, false, y_matrix, false, static_cast<float>(1),
out, static_cast<float>(0));
}
if (out_dim.size() != 2) {
out->Resize(out_dim);
......
......@@ -85,16 +85,16 @@ int main() {
// int8_t without bias
// warm-up 10 times
for (int j = 0; j < 10; ++j) {
paddle_mobile::operators::math::matmul(
paddle_mobile::operators::math::matmul<float, int32_t>(
aa_int8, false, bb_int8, false, static_cast<float>(1), &cc_int32,
static_cast<float>(0), false, static_cast<int32_t*>(nullptr));
static_cast<float>(0));
}
auto time3 = time();
for (int j = 0; j < 10; ++j) {
paddle_mobile::operators::math::matmul(
paddle_mobile::operators::math::matmul<float, int32_t>(
aa_int8, false, bb_int8, false, static_cast<float>(1), &cc_int32,
static_cast<float>(0), false, static_cast<int32_t*>(nullptr));
static_cast<float>(0));
}
auto time4 = time();
std::cout << "int8_t gemm cost :" << time_diff(time3, time4) / 10 << "ms\n";
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#ifdef FUSION_CONVADDRELU_INT8_OP
#include <limits>
#include <iostream>
#include <limits>
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/fusion_conv_add_relu_int8_op.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册