提交 1e633fd1 编写于 作者: 饶先宏's avatar 饶先宏

202108060624

上级 bc2e6006
......@@ -98,6 +98,7 @@ MODULE_DECLARE(cnncell_convolution)
float* line_out;
short *coeff;
short *bias;
int reset;
#if CNNCELLDEBUG
FILE* pDumpFile;
#endif
......@@ -113,7 +114,7 @@ enum convstate {
};
DEFINE_FUNC(cnncell_convolution_gen_output_coeff) {
if (vget(nwReset) == 0) {
if (pobj->reset == 0) {
vput(wCoeffRead, 0);
vput(bCoeffReadAddr, 0);
}
......@@ -154,7 +155,7 @@ DEFINE_FUNC(cnncell_convolution_gen_output_coeff) {
DEFINE_FUNC(cnncell_convolution_gen_output_read) {
vput(wDataRead,0);
if (vget(nwReset) != 0) {
if (pobj->reset != 0) {
int state = vget(state);
if (state == CONV_START) {
if (vget(wDataReadValid)) {
......@@ -229,14 +230,20 @@ END_DEFINE_FUNC
DEFINE_FUNC(cnncell_convolution_clktick) {
//wDataRead = 0;
if (vget(nwReset) == 0) {
vput(state, READ_COEFF);
vput(index, 0);
vput(readline, 0);
vput(writeline, 0);
if (pobj->reset == 0) {
if (vget(nwReset) == 0) {
vput(state, READ_COEFF);
vput(index, 0);
vput(readline, 0);
vput(writeline, 0);
}
else {
pobj->reset = 1;
}
}
else {
if (pobj->reset) {
int state = vget(state);
pobj->reset = 1;
if (state == READ_COEFF) {
int coeffvalid;
coeffvalid = vget(wCoeffReadValid);
......@@ -390,6 +397,7 @@ MODULE_INIT(cnncell_convolution)
pobj->pw = (int)MODULE_PARAM(10);
pobj->ph = (int)MODULE_PARAM(11);
pobj->actfunc= (int)MODULE_PARAM(12);
pobj->reset = 0;
pobj->coeffcount = pobj->output_c * pobj->kw * pobj->kh * pobj->input_c;
pobj->coeff = (short *)malloc(pobj->coeffcount * sizeof(short));
pobj->bias = (short *)malloc(pobj->output_c * sizeof(short));
......
......@@ -47,6 +47,8 @@
#include "hdl4secell.h"
#include "threadlock.h"
#include "windows.h"
#define IMPLEMENT_GUID
#include "hdl4sesim.h"
#undef IMPLEMENT_GUID
......@@ -216,6 +218,8 @@ static int hdl4sesim_hdl4se_simulator_SetReset(HOBJECT object, int reset)
return 0;
}
#define THREADCOUNT 4
static int hdl4sesim_hdl4se_simulator_ClkTick(HOBJECT object)
{
sHDL4SESim* pobj;
......@@ -226,14 +230,11 @@ static int hdl4sesim_hdl4se_simulator_ClkTick(HOBJECT object)
for (i = 0; i < pobj->singlethreadmodules.itemcount; i++) {
hdl4se_module_ClkTick(&((IHDL4SEModuleVar*)(pobj->singlethreadmodules.array[i]))->data);
}
#pragma omp parallel for num_threads(2)
//for (i = 0; i < pobj->multithreadmodules.itemcount; i++) {
#pragma omp parallel for num_threads(THREADCOUNT)
for (i = 0; i < pobj->multithreadmodules.itemcount; i++) {
// int j = i / 16 + (i % 16) * 16;
// if (j < pobj->multithreadmodules.itemcount)
hdl4se_module_ClkTick(&((IHDL4SEModuleVar*)(pobj->multithreadmodules.array[i]))->data);
hdl4se_module_ClkTick(&((IHDL4SEModuleVar*)(pobj->multithreadmodules.array[i]))->data);
}
return 0;
return 0;
}
static int hdl4sesim_hdl4se_simulator_Setup(HOBJECT object)
......@@ -248,7 +249,7 @@ static int hdl4sesim_hdl4se_simulator_Setup(HOBJECT object)
for (i = 0; i < pobj->singlethreadmodules.itemcount; i++) {
hdl4se_module_Setup(&((IHDL4SEModuleVar*)(pobj->singlethreadmodules.array[i]))->data);
}
#pragma omp parallel for num_threads(2)
#pragma omp parallel for num_threads(THREADCOUNT)
for (i = 0; i < pobj->multithreadmodules.itemcount; i++) {
hdl4se_module_Setup(&((IHDL4SEModuleVar*)(pobj->multithreadmodules.array[i]))->data);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册