提交 5c75e5ee 编写于 作者: B bjjwwang

fix bsf bug by YSL

上级 6403fd18
......@@ -163,10 +163,10 @@ int GeneralRecOp::inference() {
argmax_idx = int(std::distance(
&output_tensor_1_data[n * predict_shape[1]],
std::max_element(&output_tensor_1_data[n * predict_shape[1]],
&output_tensor_1_data[(n + 1) * predict_shape[1]])));
output_tensor_1_data + (n + 1) * predict_shape[1])));
max_value = float(
*std::max_element(&output_tensor_1_data[n * predict_shape[1]],
&output_tensor_1_data[(n + 1) * predict_shape[1]]));
output_tensor_1_data + (n + 1) * predict_shape[1]));
if (blank - 1 - argmax_idx > 1e-5) {
score_vector[index] += max_value;
count += 1;
......
......@@ -19,10 +19,8 @@
#else
#include <butil/atomicops.h>
#endif
#include <sys/syscall.h>
#include <boost/bind.hpp>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/memory.h"
......@@ -34,7 +32,7 @@ template <typename InItemT, typename OutItemT>
bool Task<InItemT, OutItemT>::task_fetch_init(BatchTasks<TaskT>& batchTask) {
// 双检锁,减少加锁的粒度
if (!fetch_init) {
if (taskmeta_num > 1) {
if (total_taskmeta_num > 1) {
// 对于task被拆分为多个taskmeta,需要加锁。
AutoMutex lock(task_mut);
task_fetch_create(batchTask);
......@@ -102,7 +100,7 @@ bool Task<InItemT, OutItemT>::task_fetch_create(BatchTasks<TaskT>& batchTask) {
// 当task被分为多个taskMeta时,需要临时对象记录
// 收齐后再一起合并
if (taskmeta_num > 1) {
if (total_taskmeta_num > 1) {
taskMetaOutLodTensor.push_back(tensor_out);
}
}
......@@ -110,7 +108,7 @@ bool Task<InItemT, OutItemT>::task_fetch_create(BatchTasks<TaskT>& batchTask) {
}
// outLodTensorVector实际是一个双层vector
// shape为taskmeta_num * vector_fetch_lod_index.size();
outLodTensorVector.resize(taskmeta_num, taskMetaOutLodTensor);
outLodTensorVector.resize(total_taskmeta_num, taskMetaOutLodTensor);
fetch_init = true;
}
return true;
......
......@@ -78,6 +78,7 @@ struct Task {
std::set<size_t> set_fetch_nobatch_index;
butil::atomic<size_t> index;
size_t taskmeta_num;
size_t total_taskmeta_num;
THREAD_MUTEX_T task_mut;
bool fetch_init;
// taskmeta_num * set_feed_lod_index.size()
......@@ -100,6 +101,7 @@ struct Task {
index.store(0, butil::memory_order_relaxed);
THREAD_MUTEX_INIT(&task_mut, NULL);
fetch_init = false;
total_taskmeta_num = 1;
outLodTensorVector.clear();
}
~Task() {
......@@ -115,6 +117,7 @@ struct Task {
rem = -1;
total_feed_batch = 0;
taskmeta_num = 0;
total_taskmeta_num = 1;
index.store(0, butil::memory_order_relaxed);
THREAD_MUTEX_DESTROY(&task_mut);
fetch_init = false;
......@@ -134,6 +137,7 @@ struct Task {
rem = -1;
total_feed_batch = 0;
taskmeta_num = 0;
total_taskmeta_num = 1;
index.store(0, butil::memory_order_relaxed);
THREAD_MUTEX_INIT(&task_mut, NULL);
fetch_init = false;
......@@ -352,14 +356,14 @@ struct Task {
bool combine_taskmeta() {
// 只有含有lod类型的fetch输出,且task被拆分为多个taskmeta的情况
// 才需要将数据从outLodTensorVector搬运到outVectorT_ptr
if (vector_fetch_lod_index.size() > 0 && taskmeta_num > 1) {
if (vector_fetch_lod_index.size() > 0 && total_taskmeta_num > 1) {
for (size_t index = 0; index < vector_fetch_lod_index.size(); ++index) {
size_t data_length = 0;
size_t lod_length = 0;
size_t total_shape0 = 0;
size_t feedvar_index = vector_fetch_lod_index[index];
// 由于PaddleTensor的resize实现,是每次都会清空,所以必须先统计总长度。
for (size_t taskmeta_index = 0; taskmeta_index < taskmeta_num;
for (size_t taskmeta_index = 0; taskmeta_index < total_taskmeta_num;
++taskmeta_index) {
data_length +=
outLodTensorVector[taskmeta_index][index].data.length();
......@@ -368,7 +372,7 @@ struct Task {
}
// 一次性扩容PaddleTensor中的data和lod
paddle::PaddleTensor& fetchVarTensor = (*outVectorT_ptr)[feedvar_index];
fetchVarTensor.shape[0] = total_shape0;
void* databuf_data = MempoolWrapper::instance().malloc(data_length,memoryPtr);
paddle::PaddleBuf paddleBuf(databuf_data, data_length);
fetchVarTensor.data = paddleBuf;
......@@ -387,15 +391,18 @@ struct Task {
size_t lod_length_offset = 0;
size_t once_data_length = 0;
size_t once_lod_length = 0;
size_t last_lod_value = fetchVarTensor.lod[0][lod_length_offset];
for (size_t taskmeta_index = 0; taskmeta_index < taskmeta_num;
for (size_t taskmeta_index = 0; taskmeta_index < total_taskmeta_num;
++taskmeta_index) {
//process data
void* dst_ptr = fetchVarTensor.data.data() + data_length_offset;
void* source_ptr =
outLodTensorVector[taskmeta_index][index].data.data();
once_data_length =
outLodTensorVector[taskmeta_index][index].data.length();
memcpy(dst_ptr, source_ptr, once_data_length);
data_length_offset += once_data_length;
//process lod
size_t last_lod_value = fetchVarTensor.lod[0][lod_length_offset];
once_lod_length =
outLodTensorVector[taskmeta_index][index].lod[0].size();
for (size_t once_index = 0; once_index < once_lod_length;
......@@ -403,9 +410,9 @@ struct Task {
fetchVarTensor.lod[0][lod_length_offset + 1] =
last_lod_value +
outLodTensorVector[taskmeta_index][index].lod[0][once_index];
lod_length_offset++;
}
data_length_offset += once_data_length;
lod_length_offset += once_lod_length;
}
}
}
......@@ -536,6 +543,11 @@ class BatchTasks {
}
int start_index = task->batch_size() - task->rem;
TaskMetaT tm(task, start_index, add, task->taskmeta_num);
task->rem -= add;
_rem_size -= add;
if(task->taskmeta_num == 0){
task->total_taskmeta_num = 1 + (task->rem + _batch_size - 1)/_batch_size;
}
task->taskmeta_num += 1;
_taskmeta_vector.push_back(tm);
if (_batch_in_offset.size() == 0) {
......@@ -586,8 +598,6 @@ class BatchTasks {
tm.feed_shape0_range[feedvar_index][0];
}
}
task->rem -= add;
_rem_size -= add;
return _rem_size;
}
......@@ -613,7 +623,6 @@ class BatchTasks {
if (_taskmeta_vector.size() <= 0) {
return;
}
for (size_t ti = 0; ti < _taskmeta_vector.size(); ++ti) {
TaskMetaT& tm = _taskmeta_vector[ti];
......@@ -812,7 +821,6 @@ class BatchTasks {
for (size_t fetchvar_index = 0; fetchvar_index < fetchvar_num;
++fetchvar_index) {
size_t fetchvar_bytesize_index = fetchvar_bytesize(fetchvar_index);
if (set_fetch_nobatch_index.size() > 0 &&
set_fetch_nobatch_index.find(fetchvar_index) !=
set_fetch_nobatch_index.end()) {
......@@ -841,11 +849,12 @@ class BatchTasks {
// task被拆分为多个taskmeta时,不能直接拷入task->outVectorT_ptr
// 此时,先拷入task->outLodTensorVector[taskmeta_index]
// 当task所有的taskmeta都完成时,再按照顺序进行拷贝回task->outVectorT_ptr。
if (task->taskmeta_num > 1) {
if (task->total_taskmeta_num > 1) {
paddle::PaddleTensor& fetchVarTensor =
task->outLodTensorVector[taskmeta_index][fetch_lod_index];
size_t length = fetchvar_bytesize_index * shape0_length;
fetchVarTensor.shape[0] = shape0_length;
fetch_lod_index++;
void* databuf_data = MempoolWrapper::instance().malloc(length,task->memoryPtr);
paddle::PaddleBuf paddleBuf(databuf_data, length);
......@@ -910,7 +919,6 @@ class BatchTasks {
last_lod_value);
}
}
fetch_lod_index++;
} else {
// 普通fetchvar情况,此时该Task总的fetchvar_batch =
// 输入的总的batch_size()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册