提交 f50a235c 编写于 作者: S sunsuodong

fix_arm32_comple

上级 33fdc43f
...@@ -25,7 +25,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i ...@@ -25,7 +25,7 @@ void PRelu(float *input, float *output, PReluParameter *prelu_param_, int task_i
for (int j = task_id; j < prelu_param_->tile_block_; j += prelu_param_->op_parameter_.thread_num_) { for (int j = task_id; j < prelu_param_->tile_block_; j += prelu_param_->op_parameter_.thread_num_) {
float *input_ptr = input + j * TILE_NUM * channel_num; float *input_ptr = input + j * TILE_NUM * channel_num;
float *output_ptr = input_ptr; float *output_ptr = input_ptr;
#ifdef ENABLE_NEON #ifdef ENABLE_ARM64
for (int i = 0; i < c4; i++) { for (int i = 0; i < c4; i++) {
int c_offset = i * C4NUM; int c_offset = i * C4NUM;
float32x4_t slope_value = vld1q_f32(negetive_slope_value + c_offset); float32x4_t slope_value = vld1q_f32(negetive_slope_value + c_offset);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册