Fix out of memory.

d6ca03eb · dangqingqing · aaff2ddd · d6ca03eb · d6ca03eb
显示空白变更内容
内联并排

Showing with 5 addition and 12 deletion

paddle/cuda/src/hl_cuda_sequence.cu paddle/cuda/src/hl_cuda_sequence.cu +1 -1

paddle/math/tests/test_matrixCompare.cpp paddle/math/tests/test_matrixCompare.cpp +4 -11

未找到文件。
--- a/paddle/cuda/src/hl_cuda_sequence.cu
+++ b/paddle/cuda/src/hl_cuda_sequence.cu
@@ -269,7 +269,7 @@ void hl_sequence2batch_copy_padding(real* batch,
  int blockDimY = CUDA_BLOCK_SIZE / blockDimX;
  dim3 threads(blockDimX, blockDimY);

-  int gridDimX = (maxSequenceLength + blockDimY - 1)/blockDimY;
+  int gridDimX = (maxSequenceLength + blockDimY - 1) / blockDimY;
  int gridDimY = numSequences;
  dim3 grid(gridDimX, gridDimY);


--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@@ -1152,15 +1152,8 @@ void testBatch2seqPadding(int batchSize, int inputDim) {
  IVectorPtr gpuSequence = IVector::create(cpuSequence->getSize(), true);
  gpuSequence->copyFrom(*cpuSequence);

-  int newBatchSize = cpuSequence->getSize() - 1;
-  MatrixPtr cpuOutput = std::make_shared<CpuMatrix>(newBatchSize, inputDim);
-  MatrixPtr gpuOutput = std::make_shared<GpuMatrix>(newBatchSize, inputDim);
-  cpuOutput->zero();
-  gpuOutput->zero();
-
-  size_t maxSeqLen = 0;
  size_t numSeq = cpuSequence->getSize() - 1;
-  maxSeqLen = *std::max_element(cpuSequence->getData(),
+  size_t maxSeqLen = *std::max_element(cpuSequence->getData(),
                                       cpuSequence->getData() + numSeq);

  MatrixPtr cBatch = std::make_shared<CpuMatrix>(numSeq * maxSeqLen, inputDim);
@@ -1200,8 +1193,8 @@ void testBatch2seqPadding(int batchSize, int inputDim) {
 }

 TEST(Matrix, warpCTC) {
-  for (auto batchSize : {51, 1285, 3884}) {
-    for (auto inputDim : {32, 512, 3026}) {
+  for (auto batchSize : {51, 526, 2884}) {
+    for (auto inputDim : {32, 512, 2026}) {
      VLOG(3) << " batchSize=" << batchSize << " inputDim=" << inputDim;
      testBatch2seqPadding(batchSize, inputDim);
    }