提交 32f138a6 编写于 作者: H Houjiang Chen 提交者: GitHub

Merge branch 'develop' into dev-latest

......@@ -196,19 +196,35 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg->conv_arg[i].image.pad_height = (uint32_t)padding_h;
arg->conv_arg[i].image.pad_width = (uint32_t)padding_w;
arg->conv_arg[i].filter_scale_address = filter->scale;
arg->conv_arg[i].filter_address = &(
(int8_t *)filter_ptr)[i * element_num * filter_num_per_div]; // NOLINT
arg->conv_arg[i].sb_address = &bs_ptr[i * filter_num_per_div * 2];
// arg->conv_arg[i].filter_address = &(
// (int8_t *)filter_ptr)[i * element_num * filter_num_per_div]; //
// NOLINT
// arg->conv_arg[i].sb_address = &bs_ptr[i * filter_num_per_div * 2];
arg->conv_arg[i].filter_num = (uint32_t)(
i == n - 1 ? channel - (n - 1) * filter_num_per_div // NOLINT
: filter_num_per_div);
size_t filter_size =
element_num * arg->conv_arg[i].filter_num * sizeof(int8_t);
auto filter_head =
&((int8_t *)filter_ptr)[i * element_num * filter_num_per_div];
arg->conv_arg[i].filter_address = fpga_malloc(filter_size);
memcpy(arg->conv_arg[i].filter_address, filter_head, filter_size);
fpga_flush(arg->conv_arg[i].filter_address, filter_size);
size_t bs_size = 2 * arg->conv_arg[i].filter_num * sizeof(float);
auto bs_head = &bs_ptr[i * filter_num_per_div * 2];
arg->conv_arg[i].sb_address = fpga_malloc(bs_size);
memcpy(arg->conv_arg[i].sb_address, bs_head, bs_size);
fpga_flush(arg->conv_arg[i].sb_address, bs_size);
if (n > 1) {
arg->conv_arg[i].output.scale_address =
(float *)fpga_malloc(2 * sizeof(float)); // NOLINT
arg->conv_arg[i].output.address =
fpga_malloc(input->dims()[2] *
align_to_x(input->dims()[3] * arg->conv_arg[i].filter_num,
fpga_malloc(out->dims()[2] *
align_to_x(out->dims()[3] * arg->conv_arg[i].filter_num,
IMAGE_ALIGNMENT) *
sizeof(half));
} else {
......@@ -221,6 +237,8 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
arg->concat_arg.scales_in[i] = arg->conv_arg[i].output.scale_address;
arg->concat_arg.channel_num[i] = arg->conv_arg[i].filter_num;
}
filter->reset_data_ptr(nullptr);
fpga_free(bs_ptr);
}
} // namespace fpga
......
......@@ -137,7 +137,7 @@ void align_num(char **data_in, int num_per_div_before_alignment, int num,
int align_chw = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
if (num_per_div_after_alignment != num_per_div_before_alignment) {
char *tmp = *data_in;
int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
......@@ -154,7 +154,6 @@ void align_num(char **data_in, int num_per_div_before_alignment, int num,
*data_in = data_tmp;
fpga_free(tmp);
}
}
void reorder(char **data_in, int num_after_alignment, int chw) {
......@@ -223,7 +222,10 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
char **quantize_data = (char **)data_in; // NOLINT
convert_to_hwc(quantize_data, num, channel, height, width);
align_element(quantize_data, num, chw);
if (num_after_alignment != num) {
align_num(quantize_data, num_per_div_before_alignment, num, chw);
}
reorder(quantize_data, num_after_alignment, chw);
interleave(quantize_data, num_after_alignment, chw);
fpga_flush(*quantize_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) *
......@@ -254,15 +256,18 @@ void format_fc_filter(float **data_in, int num, int channel, int height,
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment * div_num;
int residual = num % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment *
((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT);
quantize(data_in, data_size, max);
char **quantize_data = (char **)data_in; // NOLINT
convert_fc_filter(quantize_data, num, chw);
align_element(quantize_data, num, chw);
if (num_after_alignment != num) {
align_num(quantize_data, num_per_div_before_alignment, num, chw);
}
reorder(quantize_data, num_after_alignment, chw);
interleave(quantize_data, num_after_alignment, chw);
fpga_flush(*quantize_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) *
......
此差异已折叠。
......@@ -137,11 +137,13 @@ int fpga_regpoll(uint64_t reg, uint64_t val, int time) {
for (i = 0; i < timeout; i++) {
if (val == reg_readq(reg)) {
std::cout << "fpga_regpoll:" << i << "val:" << val << "reg:" << reg
<< std::endl;
break;
}
}
if (i <= timeout) {
if (i < timeout) {
return 0;
} else {
return -1;
......@@ -153,6 +155,12 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
uint64_t _nr = DIV_ROUND_UP(size, FPGA_PAGE_SIZE);
unsigned int nr = (unsigned int)_nr;
int ret = 0;
DLOG << size;
DLOG << _nr;
DLOG << nr;
uint64_t a_size = FPGA_PAGE_SIZE * nr;
DLOG << a_size;
pthread_mutex_lock(&memory->mutex);
......@@ -166,6 +174,7 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
*addr = address_ofset;
} else {
DLOG << "memory request failed!";
ret = -ENOMEM;
}
......@@ -282,7 +291,7 @@ uint64_t vaddr_to_paddr(void *address) {
if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) {
paddr = iter->second;
} else {
DLOG << "Invalid pointer";
DLOG << "Invalid pointer: " << address;
}
return paddr;
......@@ -348,6 +357,11 @@ void fpga_free_driver(void *ptr) {
fpga_bitmap::bitmap_clear(g_fpgainfo.memory_info->bitmap, pos,
g_fpgainfo.memory_info->nr[pos]);
pthread_mutex_unlock(&g_fpgainfo.memory_info->mutex);
auto iter = g_fpgainfo.fpga_vaddr2paddr_map.find(ptr);
if (iter != g_fpgainfo.fpga_vaddr2paddr_map.end()) {
g_fpgainfo.fpga_vaddr2paddr_map.erase(iter);
}
} else {
DLOG << "Invalid pointer";
}
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <cstring>
#include <map>
......@@ -44,7 +45,7 @@ const int PE_IDX_POOLING = 1;
const int PE_IDX_EW = 2;
const int PE_IDX_BYPASS = 3;
enum pe_status { IDLE = 0, BUSY = 1 };
enum pe_status { IDLE = 0, BUSY = 1, ERROR = 2 };
struct MemoryCacheArgs {
void *offset;
......@@ -58,7 +59,7 @@ struct MemoryCacheArgs {
struct fpga_pe {
char type_name[MAX_TYPE_NAME_LENTH + 1];
struct pe_data_s *outer;
pe_status status; // 0=idle 1=busy -1=fail
pe_status status;
uint64_t interrupt_cnt;
};
......@@ -106,6 +107,8 @@ inline uint64_t reg_readq(uint32_t offset) {
uint64_t value =
*(volatile uint64_t *)((uint8_t *)g_fpgainfo.FpgaRegVirAddr + // NOLINT
offset); // NOLINT
// DLOG << "read end";
usleep(10);
return value;
}
......@@ -114,6 +117,8 @@ inline void reg_writeq(uint64_t value, uint32_t offset) {
// DLOG << "offset : " << offset << ", value : " << value;
*(volatile uint64_t *)((uint8_t *)g_fpgainfo.FpgaRegVirAddr + // NOLINT
offset) = value;
// DLOG << "write end";
usleep(10);
}
int open_device_driver();
......
......@@ -74,12 +74,21 @@ struct ConcatArgs {
void* image_out;
float* scale_out;
uint32_t* channel_num;
// uint32_t* aligned_channel_num;
// uint32_t out_channel;
uint32_t* aligned_channel_num;
uint32_t out_channel;
uint32_t height;
uint32_t width;
};
struct SplitConvArgs {
uint32_t split_num;
uint32_t group_num;
uint32_t filter_num;
struct ImageOutputArgs output;
struct ConvArgs* conv_arg;
struct ConcatArgs concat_arg;
};
struct SplitArgs {
uint32_t image_num;
int16_t* image_in;
......@@ -91,15 +100,6 @@ struct SplitArgs {
uint32_t width;
};
struct SplitConvArgs {
uint32_t split_num;
uint32_t group_num;
uint32_t filter_num;
struct ImageOutputArgs output;
struct ConvArgs* conv_arg;
struct ConcatArgs concat_arg;
};
struct PoolingArgs {
int16_t mode; // mode: 0:max, 1:avg
int16_t kernel_reciprocal;
......@@ -127,7 +127,14 @@ struct BypassArgs {
};
struct DeconvArgs {
struct ConvArgs conv_arg;
uint32_t sub_conv_num;
uint32_t group_num;
uint32_t filter_num;
uint32_t omit_size;
uint32_t sub_output_width;
uint32_t sub_output_height;
struct ImageOutputArgs output;
struct ConvArgs* conv_args;
};
static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; }
......
......@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iomanip>
#include <iostream>
#include "../test_include.h"
#ifdef PADDLE_MOBILE_FPGA_V1
......@@ -87,26 +89,29 @@ int main() {
paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
if (paddle_mobile.Load(std::string(g_resnet50), true)) {
Tensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(0),
static_cast<float>(1));
SetupTensor<float>(&input_tensor, {1, 3, 224, 224}, static_cast<float>(2),
static_cast<float>(2));
readStream(g_image_src_float,
input_tensor.mutable_data<float>({1, 3, 224, 224}));
paddle_mobile.FeedData(input_tensor);
paddle_mobile.Predict_To(-1);
/*for(int i = 0; i < 73; i++)
{
for (int i = 0; i < 73; i++) {
auto tensor_ptr = paddle_mobile.FetchResult(i);
std::string saveName = "resnet50_result_" + std::to_string (i);
std::string saveName = "resnet50_result_" + std::to_string(i);
paddle_mobile::fpga::fpga_invalidate((*tensor_ptr).data<float>(),
tensor_ptr->numel()); dump_stride(saveName, (*tensor_ptr), 20);
//dump(saveName, (*tensor_ptr));
}*/
tensor_ptr->numel() * sizeof(half));
dump_stride(saveName, (*tensor_ptr), 20);
// dump(saveName, (*tensor_ptr));
}
/*std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(73);
(*output_tensor).dump<float>("resnet50_result_73");
std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(73);
//(*output_tensor).dump<float>("resnet50_result_73");
output_tensor = paddle_mobile.FetchResult(74);
(*output_tensor).dump<float>("resnet50_result_74");*/
std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(74);
//(*output_tensor).dump<float>("resnet50_result_74");
// std::shared_ptr<Tensor> output_tensor = paddle_mobile.FetchResult(74);
// output_tensor = paddle_mobile.FetchResult(74);
float max = 0;
auto data_ptr = output_tensor->data<float>();
int maximumIdx = 0;
......@@ -116,7 +121,7 @@ int main() {
max = data_ptr[i];
}
}
std::cout << "index : " << maximumIdx << ", value : " << max
std::cout << "index : " << std::dec << maximumIdx << ", value : " << max
<< std::endl;
std::cout << "Computation done" << std::endl;
return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册