提交 b5a0488a 编写于 作者: qnqinan's avatar qnqinan 提交者: zhangyang0701

update FPGA v2 pe cpp file and ew kernel files, test=develop , closes #2432 (#2433)

* update proposal and psroipool kernel file in FPGA V2 track

* update, test=develop

* update FPGA v2 pe cpp file and ew kernel files, test=develop
上级 ce21ff5d
...@@ -623,7 +623,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -623,7 +623,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg->concat_arg.images_in[i] = arg->concat_arg.images_in[i] =
(int8_t *)arg->conv_arg[i].output.address; // NOLINT (int8_t *)arg->conv_arg[i].output.address; // NOLINT
arg->concat_arg.scales_in[i] = arg->conv_arg[i].output.scale_address; arg->concat_arg.scales_in[i] = out->scale;
arg->concat_arg.channel_num[i] = arg->conv_arg[i].filter_num; arg->concat_arg.channel_num[i] = arg->conv_arg[i].filter_num;
expand_conv_arg(&arg->conv_arg[i]); expand_conv_arg(&arg->conv_arg[i]);
......
此差异已折叠。
...@@ -37,7 +37,7 @@ bool AnchorGeneratorKernel<FPGA, float>::Init( ...@@ -37,7 +37,7 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23, int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23,
-20, 39, 36, -43, -34, 59, 49, -63, -54, -20, 39, 36, -43, -34, 59, 49, -63, -54,
79, 69, -96, -77, 112, 93, -137, -118, 153, 79, 69, -96, -77, 112, 93, -137, -118, 153,
134, -204, -188, 220, 204, -281, -395, 296, 441}; 134, -204, -188, 220, 204, -281, -395, 296, 411};
int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103, int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103,
0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58, 0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58,
......
...@@ -12,12 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,12 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef ELEMENTWISEADD_OP #ifdef ELEMENTWISEADD_OP
#include <math.h>
#include "operators/kernel/elementwise_add_kernel.h" #include "operators/kernel/elementwise_add_kernel.h"
#include <string>
#include "fpga/V2/api.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -60,10 +57,36 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { ...@@ -60,10 +57,36 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
return true; return true;
} }
void ComputeCPUEWAdd(fpga::EWAddArgs ewaddArgs) {
int inputc = ewaddArgs.image0.channels;
int inputh = ewaddArgs.image0.height;
int inputw = ewaddArgs.image0.width;
float inScale0 =
(reinterpret_cast<float*>(ewaddArgs.image0.scale_address))[0];
float inScale1 =
(reinterpret_cast<float*>(ewaddArgs.image1.scale_address))[0];
float outScale =
(reinterpret_cast<float*>(ewaddArgs.output.scale_address))[0];
int8_t* inPtr0 = reinterpret_cast<int8_t*>(ewaddArgs.image0.address);
int8_t* inPtr1 = reinterpret_cast<int8_t*>(ewaddArgs.image1.address);
int8_t* outPtr = reinterpret_cast<int8_t*>(ewaddArgs.output.address);
int datasize = inputc * inputh * inputw;
float const0 = inScale0 / outScale;
float const1 = inScale1 / outScale;
fpga::fpga_invalidate(inPtr0, datasize * sizeof(int8_t));
fpga::fpga_invalidate(inPtr1, datasize * sizeof(int8_t));
for (int i = 0; i < datasize; i++) {
float tmpF = inPtr0[i] * const0 + inPtr1[i] * const1;
int tmpI = static_cast<int>(round(tmpF));
outPtr[i] = (int8_t)((tmpI > 127 ? 127 : (tmpI < -127 ? -127 : tmpI)));
}
fpga::fpga_flush(outPtr, datasize * sizeof(int8_t));
}
template <> template <>
void ElementwiseAddKernel<FPGA, float>::Compute( void ElementwiseAddKernel<FPGA, float>::Compute(
const ElementwiseAddParam<FPGA> &param) { const ElementwiseAddParam<FPGA> &param) {
fpga::ComputeFpgaEWAdd(param.FpgaArgs()); // fpga::ComputeFpgaEWAdd(param.FpgaArgs());
ComputeCPUEWAdd(param.FpgaArgs());
} }
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_ELEMENTWISEADDRELU_OP #ifdef FUSION_ELEMENTWISEADDRELU_OP
#include <math.h>
#include "operators/kernel/elementwise_add_relu_kernel.h" #include "operators/kernel/elementwise_add_relu_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -58,10 +58,37 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -58,10 +58,37 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
return true; return true;
} }
void ComputeCPUEWAddRelu(fpga::EWAddArgs ewaddArgs) {
int inputc = ewaddArgs.image0.channels;
int inputh = ewaddArgs.image0.height;
int inputw = ewaddArgs.image0.width;
float inScale0 =
(reinterpret_cast<float*>(ewaddArgs.image0.scale_address))[0];
float inScale1 =
(reinterpret_cast<float*>(ewaddArgs.image1.scale_address))[0];
float outScale =
(reinterpret_cast<float*>(ewaddArgs.output.scale_address))[0];
int8_t* inPtr0 = reinterpret_cast<int8_t*>(ewaddArgs.image0.address);
int8_t* inPtr1 = reinterpret_cast<int8_t*>(ewaddArgs.image1.address);
int8_t* outPtr = reinterpret_cast<int8_t*>(ewaddArgs.output.address);
int datasize = inputc * inputh * inputw;
float const0 = inScale0 / outScale;
float const1 = inScale1 / outScale;
fpga::fpga_invalidate(inPtr0, datasize * sizeof(int8_t));
fpga::fpga_invalidate(inPtr1, datasize * sizeof(int8_t));
for (int i = 0; i < datasize; i++) {
float tmpF = inPtr0[i] * const0 + inPtr1[i] * const1;
int tmpI = static_cast<int>(round(tmpF));
outPtr[i] = (int8_t)((tmpI > 127 ? 127 : (tmpI < 0 ? 0 : tmpI)));
}
fpga::fpga_flush(outPtr, datasize * sizeof(int8_t));
}
template <> template <>
void ElementwiseAddReluKernel<FPGA, float>::Compute( void ElementwiseAddReluKernel<FPGA, float>::Compute(
const ElementwiseAddReluParam<FPGA> &param) { const ElementwiseAddReluParam<FPGA> &param) {
fpga::ComputeFpgaEWAdd(param.FpgaArgs()); // fpga::ComputeFpgaEWAdd(param.FpgaArgs());
ComputeCPUEWAddRelu(param.FpgaArgs());
} }
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册