提交 a7c4d0c0 编写于 作者: C caoying03

Merge branch 'develop' into print_attention_weight

......@@ -93,7 +93,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(NOT APPLE)
find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT})
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -ldl")
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -ldl -lrt")
endif(NOT APPLE)
function(merge_static_libs TARGET_NAME)
......
......@@ -1395,17 +1395,23 @@ void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
batchMachineStartPos_.resize(totalSeqNum + 1, 0);
if (isSeq) {
ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions;
CHECK_EQ(inputSeqStartPos->getSize() - 1, finalPaths_.size());
CHECK_EQ(static_cast<size_t>(inputSeqStartPos->getSize() - 1),
getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size());
int* starts = inputSeqStartPos->getMutableData(false);
int seqId = 0;
for (int i = 0; i < finalPaths_.size(); ++i) {
for (int j = 0; j < finalPaths_[i].size(); ++j) {
copySize[seqId] = starts[i + 1] - starts[i];
copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i]
: starts[j + 1] - starts[j];
batchMachineStartPos_[seqId + 1] =
batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size();
seqId++;
}
}
} else {
for (size_t i = 0; i < finalPaths_[0].size(); ++i)
batchMachineStartPos_[i + 1] =
batchMachineStartPos_[i] + finalPaths_[0][i].ids.size();
}
}
......
......@@ -477,8 +477,6 @@ private:
* outlinks.
* @note In beam search, only one generated sequence with the hightest log
* probabilites are retained.
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
*/
void createDataOutlink();
void createDataOutlinkCopySizeInfo(bool isSeq,
......
......@@ -310,8 +310,8 @@ void Argument::concat(const std::vector<Argument>& args,
auto copyIds = [batchSize, stream](IVectorPtr& dst,
const IVectorPtr& src,
int startRow,
int pos,
int desStartRow,
int srcStartRow,
int size,
bool useGpu) {
if (!src) {
......@@ -319,13 +319,14 @@ void Argument::concat(const std::vector<Argument>& args,
return;
}
IVector::resizeOrCreate(dst, batchSize, useGpu);
dst->subVec(startRow, size)->copyFrom(*src->subVec(pos, size), stream);
dst->subVec(desStartRow, size)
->copyFrom(*src->subVec(srcStartRow, size), stream);
};
auto copyStrs = [batchSize, stream](SVectorPtr& dst,
const SVectorPtr& src,
int startRow,
int pos,
int desStartRow,
int srcStartRow,
int size,
bool useGpu) {
if (!src) {
......@@ -337,8 +338,9 @@ void Argument::concat(const std::vector<Argument>& args,
} else {
dst->resize(batchSize);
}
std::copy(
src->begin() + pos, src->begin() + pos + size, dst->begin() + startRow);
std::copy(src->begin() + srcStartRow,
src->begin() + srcStartRow + size,
dst->begin() + desStartRow);
};
dataId = args[0].dataId;
......
......@@ -1370,7 +1370,14 @@ def simple_attention(encoded_sequence,
param_attr=softmax_param_attr,
name="%s_softmax" % name,
bias_attr=False)
return attention_weight
scaled = scaling_layer(
weight=attention_weight,
input=encoded_sequence,
name='%s_scaling' % name)
return pooling_layer(
input=scaled, pooling_type=SumPooling(),
name="%s_pooling" % name), attention_weight
def inputs(layers, *args):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册