未验证 提交 3bca4649 编写于 作者: B BUG1989 提交者: GitHub

Fix, some bug of rv64 cpu implement (#613)

上级 c7989a4b
......@@ -32,7 +32,7 @@
extern "C" {
#endif
#ifdef __ARM_ARCH
#if defined __ARM_ARCH || defined __riscv
#define fp16_to_fp32(data) \
({ \
......
......@@ -33,7 +33,7 @@ extern "C" {
#else
#ifdef __ARM_ARCH
#if defined __ARM_ARCH || defined __riscv
#define fp16_to_fp32(data) \
({ \
......
......@@ -17,6 +17,10 @@
* under the License.
*/
/*
* Copyright (c) 2021, OPEN AI LAB
* Author: ddzhao@openailab.com
*/
#include "sys_port.h"
#include "module.h"
......@@ -92,7 +96,6 @@ static int prerun(struct node_ops* node_ops, struct exec_node* exec_node, struct
static int run(struct node_ops* node_ops, struct exec_node* exec_node, struct exec_graph* exec_graph)
{
fprintf(stderr, "conv hcl start\n");
struct ir_node* ir_node = exec_node->ir_node;
struct ir_graph* ir_graph = ir_node->graph;
struct ir_tensor* input_tensor;
......
......@@ -18,9 +18,10 @@
*/
/*
* Copyright (c) 2020, OPEN AI LAB
* Author: haoluo@openailab.com
* Copyright (c) 2021, OPEN AI LAB
* Author: ddzhao@openailab.com
*/
#include <stdint.h>
#include <stdlib.h>
#include <math.h>
......@@ -381,6 +382,11 @@ static void sgemm4x4(float* col, float* kernel, float* biases, float* output, in
{
cur_col = ( float* )(col + col_line * kernel_size);
sgemm_4x4_rv64(cur_biases, cur_col, cur_kernel, kernel_size, result, 4, activation, 0);
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < (col_end3); j++)
*(output + (kernel_num + i) * output_xy + col_line + j) = result[(i << 2) + j];
}
}
}
if (kernel_end3)
......
......@@ -18,8 +18,8 @@
*/
/*
* Copyright (c) 2020, Martin Han
* Author: hansh-sz@hotmail.com
* Copyright (c) 2021, OPEN AI LAB
* Author: ddzhao@openailab.com
*/
#ifndef _CONV_KERNEL_RV64_H_
......
......@@ -36,7 +36,6 @@
// input 4 x p kernel p x 16 biases 4 x 16 output 4 x 16 p = kernel size
//
//
// optimised for Cortex-A72 pipeline 64 cycle per loop (4*16*4 dot product)
// load 4 more input and 8 more kernel to improve loop performance
//
// input:
......
......@@ -36,7 +36,6 @@
// input 4 x p kernel p x 4 biases 4 x 4 output 4 x 4 p = kernel size
//
//
// optimised for Cortex-A72 pipeline 18 cycle per loop (4*4*4 dot product)
//
// input:
// x0 arg0 biases address {b0,b1,b2,b3} nullptr means no biases
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册