提交 cc5f0951 编写于 作者: D dangqingqing

Fix bug for WarpCTCLayer.

上级 44486b6f
......@@ -269,8 +269,7 @@ void hl_sequence2batch_copy_padding(real* batch,
int blockDimY = CUDA_BLOCK_SIZE / blockDimX;
dim3 threads(blockDimX, blockDimY);
int gridDimX = (maxSequenceLength * blockDimX + CUDA_BLOCK_SIZE - 1) /
CUDA_BLOCK_SIZE;
int gridDimX = (maxSequenceLength + blockDimY - 1)/blockDimY;
int gridDimY = numSequences;
dim3 grid(gridDimX, gridDimY);
......
......@@ -30,6 +30,8 @@ using namespace std; // NOLINT
using autotest::TensorCheckEqual;
using autotest::TensorCheckErr;
// clang-format off
void testMatrixMaxSequence(int batchSize, int inputDim) {
// forward
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim);
......@@ -1141,4 +1143,75 @@ TEST(CpuMatrix, copyFrom) {
TensorCheckEqual(cpu, copy);
}
void testBatch2seqPadding(int batchSize, int inputDim) {
MatrixPtr cpuInput = std::make_shared<CpuMatrix>(batchSize, inputDim);
MatrixPtr gpuInput = std::make_shared<GpuMatrix>(batchSize, inputDim);
cpuInput->randomizeUniform();
gpuInput->copyFrom(*cpuInput);
IVectorPtr cpuSequence;
generateSequenceStartPositions(batchSize, cpuSequence);
IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true);
gpuSequence->copyFrom(*cpuSequence);
int newBatchSize = cpuSequence->getSize() - 1;
MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(newBatchSize, inputDim);
MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(newBatchSize, inputDim);
cpuOutput->zero();
gpuOutput->zero();
size_t maxSeqLen = 0;
size_t numSeq = cpuSequence->getSize() - 1;
maxSeqLen = *std::max_element(
cpuSequence->getData(), cpuSequence->getData() + numSeq);
MatrixPtr cBatch = std::make_shared<CpuMatrix>(numSeq * maxSeqLen, inputDim);
MatrixPtr gBatch = std::make_shared<GpuMatrix>(numSeq * maxSeqLen, inputDim);
MatrixPtr cCheck = std::make_shared<CpuMatrix>(numSeq * maxSeqLen, inputDim);
hl_sequence2batch_copy_padding(gBatch->getData(),
gpuInput->getData(),
cpuSequence->getData(),
inputDim,
maxSeqLen,
numSeq,
false,
true);
cCheck->copyFrom(*gBatch);
// CPU
int* seqStart = cpuSequence->getData();
float* batchData = cBatch->getData();
float* seqData = cpuInput->getData();
for (size_t i = 0; i < maxSeqLen; i++) {
for (size_t j = 0; j < numSeq; j++) {
size_t sequenceStart = seqStart[j];
size_t sequenceLength = seqStart[j + 1] - seqStart[j];
if (i < sequenceLength) {
memcpy(batchData + (i * numSeq + j) * inputDim,
seqData + (sequenceStart + i) * inputDim,
inputDim * sizeof(real));
} else {
memset(batchData + (i * numSeq + j) * inputDim,
0,
inputDim * sizeof(real));
}
}
}
TensorCheckErr(*cBatch, *cCheck);
}
TEST(Matrix, warpCTC) {
for (auto batchSize : {51, 1285, 3884}) {
for (auto inputDim : {32, 512, 3026}) {
VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim;
testBatch2seqPadding(batchSize, inputDim);
}
}
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册