From 3bca4649a881a60a7665785fa25bc849c32a167f Mon Sep 17 00:00:00 2001 From: BUG1989 <248857878@qq.com> Date: Fri, 9 Apr 2021 16:27:54 +0800 Subject: [PATCH] Fix, some bug of rv64 cpu implement (#613) --- include/compiler_fp16.h | 2 +- src/dev/cpu/op/cast/compiler_fp16.h | 2 +- src/dev/cpu/op/conv/conv_hcl_rv64.c | 5 ++++- src/dev/cpu/op/conv/rv64/conv_kernel_rv64.c | 10 ++++++++-- src/dev/cpu/op/conv/rv64/conv_kernel_rv64.h | 4 ++-- src/dev/cpu/op/rv64/{sgemm_4x16_a72.S => sgemm_4x16.S} | 1 - src/dev/cpu/op/rv64/{sgemm_4x4_a72.S => sgemm_4x4.S} | 1 - 7 files changed, 16 insertions(+), 9 deletions(-) rename src/dev/cpu/op/rv64/{sgemm_4x16_a72.S => sgemm_4x16.S} (99%) rename src/dev/cpu/op/rv64/{sgemm_4x4_a72.S => sgemm_4x4.S} (99%) diff --git a/include/compiler_fp16.h b/include/compiler_fp16.h index 53b0310a..1857d7ee 100644 --- a/include/compiler_fp16.h +++ b/include/compiler_fp16.h @@ -32,7 +32,7 @@ extern "C" { #endif -#ifdef __ARM_ARCH +#if defined __ARM_ARCH || defined __riscv #define fp16_to_fp32(data) \ ({ \ diff --git a/src/dev/cpu/op/cast/compiler_fp16.h b/src/dev/cpu/op/cast/compiler_fp16.h index f2905896..ebd19e14 100644 --- a/src/dev/cpu/op/cast/compiler_fp16.h +++ b/src/dev/cpu/op/cast/compiler_fp16.h @@ -33,7 +33,7 @@ extern "C" { #else -#ifdef __ARM_ARCH +#if defined __ARM_ARCH || defined __riscv #define fp16_to_fp32(data) \ ({ \ diff --git a/src/dev/cpu/op/conv/conv_hcl_rv64.c b/src/dev/cpu/op/conv/conv_hcl_rv64.c index 86ee626c..770a9655 100644 --- a/src/dev/cpu/op/conv/conv_hcl_rv64.c +++ b/src/dev/cpu/op/conv/conv_hcl_rv64.c @@ -17,6 +17,10 @@ * under the License. */ +/* + * Copyright (c) 2021, OPEN AI LAB + * Author: ddzhao@openailab.com + */ #include "sys_port.h" #include "module.h" @@ -92,7 +96,6 @@ static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph) { - fprintf(stderr, "conv hcl start\n"); struct ir_node* ir_node = exec_node->ir_node; struct ir_graph* ir_graph = ir_node->graph; struct ir_tensor* input_tensor; diff --git a/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.c b/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.c index 0b6431c9..d889f3d3 100644 --- a/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.c +++ b/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.c @@ -18,9 +18,10 @@ */ /* - * Copyright (c) 2020, OPEN AI LAB - * Author: haoluo@openailab.com + * Copyright (c) 2021, OPEN AI LAB + * Author: ddzhao@openailab.com */ + #include #include #include @@ -381,6 +382,11 @@ static void sgemm4x4(float* col, float* kernel, float* biases, float* output, in { cur_col = ( float* )(col + col_line * kernel_size); sgemm_4x4_rv64(cur_biases, cur_col, cur_kernel, kernel_size, result, 4, activation, 0); + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < (col_end3); j++) + *(output + (kernel_num + i) * output_xy + col_line + j) = result[(i << 2) + j]; + } } } if (kernel_end3) diff --git a/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.h b/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.h index 2957067e..d1ee7661 100644 --- a/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.h +++ b/src/dev/cpu/op/conv/rv64/conv_kernel_rv64.h @@ -18,8 +18,8 @@ */ /* - * Copyright (c) 2020, Martin Han - * Author: hansh-sz@hotmail.com + * Copyright (c) 2021, OPEN AI LAB + * Author: ddzhao@openailab.com */ #ifndef _CONV_KERNEL_RV64_H_ diff --git a/src/dev/cpu/op/rv64/sgemm_4x16_a72.S b/src/dev/cpu/op/rv64/sgemm_4x16.S similarity index 99% rename from src/dev/cpu/op/rv64/sgemm_4x16_a72.S rename to src/dev/cpu/op/rv64/sgemm_4x16.S index 6826d27e..b8b7431e 100644 --- a/src/dev/cpu/op/rv64/sgemm_4x16_a72.S +++ b/src/dev/cpu/op/rv64/sgemm_4x16.S @@ -36,7 +36,6 @@ // input 4 x p kernel p x 16 biases 4 x 16 output 4 x 16 p = kernel size // // -// optimised for Cortex-A72 pipeline 64 cycle per loop (4*16*4 dot product) // load 4 more input and 8 more kernel to improve loop performance // // input: diff --git a/src/dev/cpu/op/rv64/sgemm_4x4_a72.S b/src/dev/cpu/op/rv64/sgemm_4x4.S similarity index 99% rename from src/dev/cpu/op/rv64/sgemm_4x4_a72.S rename to src/dev/cpu/op/rv64/sgemm_4x4.S index 22e1dedc..8fe4218b 100644 --- a/src/dev/cpu/op/rv64/sgemm_4x4_a72.S +++ b/src/dev/cpu/op/rv64/sgemm_4x4.S @@ -36,7 +36,6 @@ // input 4 x p kernel p x 4 biases 4 x 4 output 4 x 4 p = kernel size // // -// optimised for Cortex-A72 pipeline 18 cycle per loop (4*4*4 dot product) // // input: // x0 arg0 biases address {b0,b1,b2,b3} nullptr means no biases -- GitLab