提交 b5a0488a 编写于 作者: qnqinan's avatar qnqinan 提交者: zhangyang0701

update FPGA v2 pe cpp file and ew kernel files, test=develop , closes #2432 (#2433)

* update proposal and psroipool kernel file in FPGA V2 track

* update, test=develop

* update FPGA v2 pe cpp file and ew kernel files, test=develop
上级 ce21ff5d
......@@ -623,7 +623,7 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg->concat_arg.images_in[i] =
(int8_t *)arg->conv_arg[i].output.address; // NOLINT
arg->concat_arg.scales_in[i] = arg->conv_arg[i].output.scale_address;
arg->concat_arg.scales_in[i] = out->scale;
arg->concat_arg.channel_num[i] = arg->conv_arg[i].filter_num;
expand_conv_arg(&arg->conv_arg[i]);
......
此差异已折叠。
......@@ -37,7 +37,7 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
int anchors_offset[] = {-2, -2, 18, 18, -10, -9, 26, 25, -23,
-20, 39, 36, -43, -34, 59, 49, -63, -54,
79, 69, -96, -77, 112, 93, -137, -118, 153,
134, -204, -188, 220, 204, -281, -395, 296, 441};
134, -204, -188, 220, 204, -281, -395, 296, 411};
int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103,
0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58,
......
......@@ -12,12 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#include <math.h>
#include "operators/kernel/elementwise_add_kernel.h"
#include <string>
#include "fpga/V2/api.h"
namespace paddle_mobile {
namespace operators {
......@@ -60,10 +57,36 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
return true;
}
void ComputeCPUEWAdd(fpga::EWAddArgs ewaddArgs) {
int inputc = ewaddArgs.image0.channels;
int inputh = ewaddArgs.image0.height;
int inputw = ewaddArgs.image0.width;
float inScale0 =
(reinterpret_cast<float*>(ewaddArgs.image0.scale_address))[0];
float inScale1 =
(reinterpret_cast<float*>(ewaddArgs.image1.scale_address))[0];
float outScale =
(reinterpret_cast<float*>(ewaddArgs.output.scale_address))[0];
int8_t* inPtr0 = reinterpret_cast<int8_t*>(ewaddArgs.image0.address);
int8_t* inPtr1 = reinterpret_cast<int8_t*>(ewaddArgs.image1.address);
int8_t* outPtr = reinterpret_cast<int8_t*>(ewaddArgs.output.address);
int datasize = inputc * inputh * inputw;
float const0 = inScale0 / outScale;
float const1 = inScale1 / outScale;
fpga::fpga_invalidate(inPtr0, datasize * sizeof(int8_t));
fpga::fpga_invalidate(inPtr1, datasize * sizeof(int8_t));
for (int i = 0; i < datasize; i++) {
float tmpF = inPtr0[i] * const0 + inPtr1[i] * const1;
int tmpI = static_cast<int>(round(tmpF));
outPtr[i] = (int8_t)((tmpI > 127 ? 127 : (tmpI < -127 ? -127 : tmpI)));
}
fpga::fpga_flush(outPtr, datasize * sizeof(int8_t));
}
template <>
void ElementwiseAddKernel<FPGA, float>::Compute(
const ElementwiseAddParam<FPGA> &param) {
fpga::ComputeFpgaEWAdd(param.FpgaArgs());
// fpga::ComputeFpgaEWAdd(param.FpgaArgs());
ComputeCPUEWAdd(param.FpgaArgs());
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_ELEMENTWISEADDRELU_OP
#include <math.h>
#include "operators/kernel/elementwise_add_relu_kernel.h"
namespace paddle_mobile {
......@@ -58,10 +58,37 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
return true;
}
void ComputeCPUEWAddRelu(fpga::EWAddArgs ewaddArgs) {
int inputc = ewaddArgs.image0.channels;
int inputh = ewaddArgs.image0.height;
int inputw = ewaddArgs.image0.width;
float inScale0 =
(reinterpret_cast<float*>(ewaddArgs.image0.scale_address))[0];
float inScale1 =
(reinterpret_cast<float*>(ewaddArgs.image1.scale_address))[0];
float outScale =
(reinterpret_cast<float*>(ewaddArgs.output.scale_address))[0];
int8_t* inPtr0 = reinterpret_cast<int8_t*>(ewaddArgs.image0.address);
int8_t* inPtr1 = reinterpret_cast<int8_t*>(ewaddArgs.image1.address);
int8_t* outPtr = reinterpret_cast<int8_t*>(ewaddArgs.output.address);
int datasize = inputc * inputh * inputw;
float const0 = inScale0 / outScale;
float const1 = inScale1 / outScale;
fpga::fpga_invalidate(inPtr0, datasize * sizeof(int8_t));
fpga::fpga_invalidate(inPtr1, datasize * sizeof(int8_t));
for (int i = 0; i < datasize; i++) {
float tmpF = inPtr0[i] * const0 + inPtr1[i] * const1;
int tmpI = static_cast<int>(round(tmpF));
outPtr[i] = (int8_t)((tmpI > 127 ? 127 : (tmpI < 0 ? 0 : tmpI)));
}
fpga::fpga_flush(outPtr, datasize * sizeof(int8_t));
}
template <>
void ElementwiseAddReluKernel<FPGA, float>::Compute(
const ElementwiseAddReluParam<FPGA> &param) {
fpga::ComputeFpgaEWAdd(param.FpgaArgs());
// fpga::ComputeFpgaEWAdd(param.FpgaArgs());
ComputeCPUEWAddRelu(param.FpgaArgs());
}
} // namespace operators
} // namespace paddle_mobile
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册