提交 ef219662 编写于 作者: qnqinan's avatar qnqinan 提交者: GitHub

Merge pull request #792 from chonwhite/develop

fix:#791
......@@ -56,11 +56,17 @@ void *fpga_malloc(size_t size) {
return reinterpret_cast<void *>(
mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
#else
return NULL;
return malloc(size);
#endif
}
void fpga_free(void *ptr) { munmap(ptr, 0); }
void fpga_free(void *ptr) {
#ifdef PADDLE_MOBILE_OS_LINUX
munmap(ptr, 0);
#else
free(ptr);
#endif
}
void fpga_copy(void *dest, const void *src, size_t num) {
memcpy(dest, src, num);
......
......@@ -48,16 +48,11 @@ static Dtype find_max(Dtype* data, int64_t num) {
// template <typename Dtype>
void quantize_filter(framework::Tensor* filter) {
DLOG << "quantilize_filter........";
DLOG << "quantilize_filter........" << filter->dims();
float scale = 0;
auto fix_range = static_cast<float>(std::pow(2, 8 - 1) - 1);
const auto batch_size = filter->dims()[0];
const auto channel = filter->dims()[1];
const auto height = filter->dims()[2];
const auto width = filter->dims()[3];
auto* tmp_data = new int8_t[filter->numel()];
// 32bit filter -> 8bit filter;
......@@ -76,9 +71,19 @@ void quantize_filter(framework::Tensor* filter) {
scale = (fix_range / max);
std::memcpy(tmp_data, filter->data<int8_t>(), (size_t)filter->numel());
}
// NCHW -> NHWC;
chw_to_hwc<int8_t>(tmp_data, filter->mutable_data<int8_t>(), batch_size,
channel, height, width);
if (filter->dims().size() == 4) {
const auto batch_size = filter->dims()[0];
const auto channel = filter->dims()[1];
const auto height = filter->dims()[2];
const auto width = filter->dims()[3];
chw_to_hwc<int8_t>(tmp_data, filter->mutable_data<int8_t>(), batch_size,
channel, height, width);
} else if (filter->dims().size() == 2) {
std::memcpy(filter->mutable_data<int8_t>(), tmp_data,
(size_t)filter->numel());
}
delete tmp_data;
filter->SetFpgaScale(scale);
}
......
......@@ -26,7 +26,7 @@ namespace paddle_mobile {
namespace memory {
const int MALLOC_ALIGN = 64;
#ifdef PADDLE_MOBILE_FPGA__VV
#ifdef PADDLE_MOBILE_FPGA
namespace fpga = paddle_mobile::fpga;
void Copy(void *dst, const void *src, size_t num) {
......
......@@ -32,9 +32,9 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam *param) {
args.convert_type = fpga::DATA_FP16_TO_FP32;
args.layout_type = fpga::LAYOUT_HWC_TO_CHW;
args.image.address = (void *)(input_ptr);
args.image.height = input->dims()[1];
args.image.width = input->dims()[2];
args.image.channels = input->dims()[3];
args.image.height = input->dims()[0];
args.image.width = input->dims()[1];
args.image.channels = 1;
args.output.address = output_ptr;
param->SetFpgaArgs(args);
}
......
......@@ -31,6 +31,9 @@ elseif("FPGAnets" IN_LIST NET)
# target_link_libraries(test-resnet paddle-mobile)
ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-tensor-quant paddle-mobile)
ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h test_include.h)
target_link_libraries(test-fpga-concat-op paddle-mobile)
elseif("mobilenetssd" IN_LIST NET)
# gen test
ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/concat_op.h"
int main() {
paddle_mobile::Loader<paddle_mobile::FPGA> loader;
auto program = loader.Load(g_googlenet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::FPGA,
paddle_mobile::operators::ConcatOp<paddle_mobile::FPGA, float>>
executor(program, "concat");
// 1. input_tensors;
vector<Tensor> input_tensors;
Tensor input1;
auto input1_data = CreateInput<float>(&input1, {4, 10, 2, 2}, 0, 1);
input_tensors.push_back(input1);
Tensor input2;
auto input2_data = CreateInput<float>(&input2, {4, 20, 2, 2}, 0, 1);
input_tensors.push_back(input2);
Tensor input3;
auto input3_data = CreateInput<float>(&input3, {4, 30, 2, 2}, 0, 1);
input_tensors.push_back(input3);
Tensor input4;
auto input4_data = CreateInput<float>(&input4, {4, 40, 2, 2}, 0, 1);
input_tensors.push_back(input4);
// 2. input_names
vector<string> input_names({
"conv2d_3.tmp_1",
"conv2d_5.tmp_1",
"conv2d_7.tmp_1",
"conv2d_8.tmp_1",
});
// 3. output_names
vector<string> output_names({"concat_0.tmp_0"});
// 4. out_dims;
vector<DDim> out_ddims;
auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2});
out_ddims.push_back(out_ddim);
auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
output_names, out_ddims);
auto output0_data = output[0]->data<float>();
// 5. test one example.
int input_n = 1;
int input_c = 2;
int input_h = 0;
int input_w = 1;
int stride0 = input3.numel() / input3.dims()[0];
int stride1 = input3.numel() / input3.dims()[0] / input3.dims()[1];
int stride2 = input3.dims()[3];
/// inputx1 (4,10,2,2),
/// inputx2 (4,20,2,2),
/// inputx3 (4,30,2,2),
/// inputx4 (4,40,2,2),
/// axis = 1
/// output (4,100,2,2)
int input_index =
input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
int output_index = input_n * 100 * 2 * 2 +
(input_c + input1.dims()[1] + input2.dims()[1]) * 2 * 2 +
input_h * 2 + input_w;
DLOG << " input3 [1, 2,0,1] = " << input3_data[input_index];
DLOG << " output [1,32,0,1] = " << output0_data[output_index];
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册