diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index ac6cf624e82c0a346fea42fa29fe9bab6ace8d47..7f2ab1fb11d847217a2294e4122904d5bada18fb 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -18,7 +18,7 @@ SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc) SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc) SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) set(WARPCTC_REPOSITORY https://github.com/baidu-research/warp-ctc.git) -set(WARPCTC_TAG fc7f226b93758216a03b1be9d24593a12819b984) +set(WARPCTC_TAG 95a461eddeabd51099ef059dcfada1117eb1bfb8) SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) @@ -44,8 +44,9 @@ ExternalProject_Add( "${WARPCTC_DOWNLOAD_CMD}" PREFIX ${WARPCTC_PREFIX_DIR} SOURCE_DIR ${WARPCTC_SOURCE_DIR} - UPDATE_COMMAND "" + #UPDATE_COMMAND "" PATCH_COMMAND "" + BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} diff --git a/paddle/fluid/operators/math/sequence_scale.cc b/paddle/fluid/operators/math/sequence_scale.cc index 78cbdf311ad1982a5de8913e3fb432e2d824b643..8e58411a1f247f11c160d6ba6e365d3e415dd437 100644 --- a/paddle/fluid/operators/math/sequence_scale.cc +++ b/paddle/fluid/operators/math/sequence_scale.cc @@ -46,6 +46,7 @@ class ScaleLoDTensorFunctor { }; template class ScaleLoDTensorFunctor; +template class ScaleLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_scale.cu b/paddle/fluid/operators/math/sequence_scale.cu index 079338c1d3dac6a9403c5871f3face9f1f8e77d2..4a952afe15f756fde9019dfa2ef2f35c8cd45469 100644 --- a/paddle/fluid/operators/math/sequence_scale.cu +++ b/paddle/fluid/operators/math/sequence_scale.cu @@ -52,6 +52,7 @@ class ScaleLoDTensorFunctor { }; template class ScaleLoDTensorFunctor; +template class ScaleLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/warpctc_op.cc b/paddle/fluid/operators/warpctc_op.cc index 5dcbabc96b4dfd287b81e29b37db196435dc7ce2..f043b0179491979f2dc1ae35da5b99d2800d8764 100644 --- a/paddle/fluid/operators/warpctc_op.cc +++ b/paddle/fluid/operators/warpctc_op.cc @@ -103,13 +103,13 @@ class WarpCTCOpMaker : public framework::OpProtoAndCheckerMaker { "Target sequence length for Label when Label is a 2-D tensor.") .AsDispensable(); AddOutput("WarpCTCGrad", - "(Tensor, default: Tensor), a temporary " + "(Tensor), a temporary " "output Tensor to store the gradients of warp-ctc, which is " "computed with loss together in one call. It is a 3-D Tensor of " "the shape [max_sequence_length, batch_size, num_classes + 1].") .AsIntermediate(); AddOutput("Loss", - "(Tensor, default: Tensor), the Connectionist " + "(Tensor), the Connectionist " "Temporal Classification (CTC) loss, which is a 2-D Tensor of " "the shape [batch_size, 1]"); AddAttr("blank", @@ -197,7 +197,9 @@ REGISTER_OPERATOR(warpctc, ops::WarpCTCOp, ops::WarpCTCOpMaker, REGISTER_OPERATOR(warpctc_grad, ops::WarpCTCGradOp, ops::WarpCTCGradOpNoNeedBufferVarInferer); REGISTER_OP_CPU_KERNEL( - warpctc, ops::WarpCTCKernel); + warpctc, ops::WarpCTCKernel, + ops::WarpCTCKernel); REGISTER_OP_CPU_KERNEL( warpctc_grad, - ops::WarpCTCGradKernel); + ops::WarpCTCGradKernel, + ops::WarpCTCGradKernel); diff --git a/paddle/fluid/operators/warpctc_op.cu.cc b/paddle/fluid/operators/warpctc_op.cu.cc index 6f8559f542f6034661b6ff346beccd9626e2370a..a42093aaa29e33d98ebf90f1e344835f8bfc22b5 100644 --- a/paddle/fluid/operators/warpctc_op.cu.cc +++ b/paddle/fluid/operators/warpctc_op.cu.cc @@ -16,7 +16,9 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - warpctc, ops::WarpCTCKernel); + warpctc, ops::WarpCTCKernel, + ops::WarpCTCKernel); REGISTER_OP_CUDA_KERNEL( warpctc_grad, - ops::WarpCTCGradKernel); + ops::WarpCTCGradKernel, + ops::WarpCTCGradKernel); diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 951a258fd21d13d458d6d1ceff018e82f2213394..8b9276d4fa03f51e18b93e538aa944e8b719dc86 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -27,7 +27,52 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; +template +class ComputeCtcLossFunctor { + public: + ctcStatus_t operator()(const T* const activations, T* gradients, + const int* const flat_labels, + const int* const label_lengths, + const int* const input_lengths, int alphabet_size, + int minibatch, T* costs, void* workspace, + ctcOptions options) { + return CTC_STATUS_EXECUTION_FAILED; + } +}; + +template +class ComputeCtcLossFunctor { + public: + ctcStatus_t operator()(const float* const activations, float* gradients, + const int* const flat_labels, + const int* const label_lengths, + const int* const input_lengths, int alphabet_size, + int minibatch, float* costs, void* workspace, + ctcOptions options) { + return platform::dynload::compute_ctc_loss( + activations, gradients, flat_labels, label_lengths, input_lengths, + static_cast(alphabet_size), static_cast(minibatch), costs, + workspace, options); + } +}; + template +class ComputeCtcLossFunctor { + public: + ctcStatus_t operator()(const double* const activations, double* gradients, + const int* const flat_labels, + const int* const label_lengths, + const int* const input_lengths, int alphabet_size, + int minibatch, double* costs, void* workspace, + ctcOptions options) { + return platform::dynload::compute_ctc_loss_double( + activations, gradients, flat_labels, label_lengths, input_lengths, + static_cast(alphabet_size), static_cast(minibatch), costs, + workspace, options); + } +}; + +template class WarpCTCFunctor { public: /* @@ -51,21 +96,29 @@ class WarpCTCFunctor { * \param blank blank label used in ctc loss function. * \param cpu_losss cost of each sequence in CPU memory. */ - void operator()(const framework::ExecutionContext& ctx, const float* input, - float* gradient, const int* cpu_labels, + void operator()(const framework::ExecutionContext& ctx, const T* input, + T* gradient, const int* cpu_labels, const int* cpu_label_lengths, const int* cpu_input_lengths, const size_t sequence_width, const size_t num_sequences, - const size_t blank, float* cpu_loss) { + const size_t blank, T* cpu_loss) { // Init warp-ctc options init(ctx, blank); // Compute the required workspace size. // There is no memory allocated operations within warp-ctc. size_t workspace_bytes = 0; - ctcStatus_t status = platform::dynload::get_workspace_size( - cpu_label_lengths, cpu_input_lengths, static_cast(sequence_width), - static_cast(num_sequences), options_, &workspace_bytes); - + ctcStatus_t status = CTC_STATUS_UNKNOWN_ERROR; + if (sizeof(T) == 4) { + status = platform::dynload::get_workspace_size( + cpu_label_lengths, cpu_input_lengths, + static_cast(sequence_width), static_cast(num_sequences), + options_, &workspace_bytes); + } else { + status = platform::dynload::get_workspace_size_double( + cpu_label_lengths, cpu_input_lengths, + static_cast(sequence_width), static_cast(num_sequences), + options_, &workspace_bytes); + } PADDLE_ENFORCE_EQ( CTC_STATUS_SUCCESS, status, platform::errors::PreconditionNotMet( @@ -79,17 +132,17 @@ class WarpCTCFunctor { workspace_bytes)); auto& dev_ctx = ctx.template device_context(); - size_t workspace_elements = workspace_bytes / sizeof(float) + 1UL; - Tensor workspace = ctx.AllocateTmpTensor( + size_t workspace_elements = workspace_bytes / sizeof(T) + 1UL; + Tensor workspace = ctx.AllocateTmpTensor( framework::make_ddim({static_cast(workspace_elements)}), dev_ctx); - float* workspace_data = workspace.data(); - math::SetConstant()( + T* workspace_data = workspace.data(); + math::SetConstant()( ctx.template device_context(), &workspace, - static_cast(0)); + static_cast(0)); // compute loss and gradient - status = platform::dynload::compute_ctc_loss( + status = ComputeCtcLossFunctor()( input, gradient, cpu_labels, cpu_label_lengths, cpu_input_lengths, static_cast(sequence_width), static_cast(num_sequences), cpu_loss, workspace_data, options_); @@ -112,7 +165,8 @@ class WarpCTCFunctor { ctx.device_context()) .stream(); #else - PADDLE_THROW("[warpctc init] GPU is not enabled."); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "[warpctc init] GPU is not enabled.")); #endif } else { options_.loc = CTC_CPU; @@ -292,7 +346,7 @@ class WarpCTCKernel : public framework::OpKernel { const size_t blank = static_cast(ctx.Attr("blank")); - WarpCTCFunctor()( + WarpCTCFunctor()( ctx, warpctc_logits_data, warpctc_grad_data, warpctc_label_data, warpctc_label_lengths.data(), warpctc_logits_lengths.data(), sequence_width, num_sequences, blank, warpctc_loss_data); diff --git a/paddle/fluid/platform/dynload/warpctc.h b/paddle/fluid/platform/dynload/warpctc.h index e10a7233b6248705492f641044771e6531fb00a4..5f1b7612117ffe9728caf75fd1db3bb8ca1e09f3 100644 --- a/paddle/fluid/platform/dynload/warpctc.h +++ b/paddle/fluid/platform/dynload/warpctc.h @@ -53,7 +53,9 @@ extern void* warpctc_dso_handle; __macro(get_warpctc_version); \ __macro(ctcGetStatusString); \ __macro(compute_ctc_loss); \ - __macro(get_workspace_size) + __macro(compute_ctc_loss_double); \ + __macro(get_workspace_size); \ + __macro(get_workspace_size_double) WARPCTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_WARPCTC_WRAP); diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index d3052ebd351ef4844d7563935172ed4b7eb1654c..9bc603c0ecc2c9da9eaf34cf0791fe2767d52a9a 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -48,6 +48,7 @@ std::map> op_ins_map = { {"collect_fpn_proposals", {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}}, {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}}, + {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}}, }; // NOTE(zhiqiu): Like op_ins_map. diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 013a842e1123dd7e330e7e34c776e1c66026456d..498e7126d67c75056386da44a90ef90fe8416edd 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -3609,18 +3609,18 @@ def switch_case(branch_index, branch_fns, default=None, name=None): This operator is like a C++ switch/case statement. Args: - branch_index(Variable): A Tensor with shape [1] to specify which branch to execute. The data type is ``int32``, ``int64`` or ``uint8``. + branch_index(Tensor): A Tensor with shape [1] to specify which branch to execute. The data type is ``int32``, ``int64`` or ``uint8``. branch_fns(dict|list|tuple): If it's a list or tuple, the elements in it could be pairs of (int, callable) or simple callables whose actual index will be used as the index of callable. If it's a dict, its key is a python integer and the value is a callable. All callables return the same structure of Tensors. default(callable, optional): Callable that returns a structure of Tensors. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: - Variable|list(Variable): Tensors returned by the callable specified by ``branch_index`` in ``branch_fns``, + Tensor|list(Tensor): Tensors returned by the callable specified by ``branch_index`` in ``branch_fns``, or Tensors returned by ``default`` if ``default`` is not None and no index matches in ``branch_fns``, or Tensors returned by the callable with the max index in ``branch_fns`` if ``default`` is None and no index matches in ``branch_fns``. Raises: - TypeError: If the type of ``branch_index`` is not Variable. + TypeError: If the type of ``branch_index`` is not Tensor. TypeError: If the data type of ``branch_index`` is not ``int32``, ``int64`` or ``uint8``. TypeError: If the type of ``branch_fns`` is not dict, list or tuple. TypeError: If the elements of ``branch_fns`` is not 2-tuple. @@ -3632,40 +3632,41 @@ def switch_case(branch_index, branch_fns, default=None, name=None): Examples: .. code-block:: python - import paddle.fluid as fluid - import paddle.fluid.layers as layers + import paddle + + paddle.enable_static() def fn_1(): - return layers.fill_constant(shape=[1, 2], dtype='float32', value=1) + return paddle.fill_constant(shape=[1, 2], dtype='float32', value=1) def fn_2(): - return layers.fill_constant(shape=[2, 2], dtype='int32', value=2) + return paddle.fill_constant(shape=[2, 2], dtype='int32', value=2) def fn_3(): - return layers.fill_constant(shape=[3], dtype='int32', value=3) + return paddle.fill_constant(shape=[3], dtype='int32', value=3) - main_program = fluid.default_startup_program() - startup_program = fluid.default_main_program() - with fluid.program_guard(main_program, startup_program): - index_1 = layers.fill_constant(shape=[1], dtype='int32', value=1) - index_2 = layers.fill_constant(shape=[1], dtype='int32', value=2) + main_program = paddle.static.default_startup_program() + startup_program = paddle.static.default_main_program() + with paddle.static.program_guard(main_program, startup_program): + index_1 = paddle.fill_constant(shape=[1], dtype='int32', value=1) + index_2 = paddle.fill_constant(shape=[1], dtype='int32', value=2) - out_1 = layers.switch_case( + out_1 = paddle.static.nn.switch_case( branch_index=index_1, branch_fns={1: fn_1, 2: fn_2}, default=fn_3) - out_2 = layers.switch_case( + out_2 = paddle.static.nn.switch_case( branch_index=index_2, branch_fns=[(1, fn_1), (2, fn_2)], default=fn_3) # Argument default is None and no index matches. fn_3 will be called because of the max index 7. - out_3 = layers.switch_case( + out_3 = paddle.static.nn.switch_case( branch_index=index_2, branch_fns=[(0, fn_1), (4, fn_2), (7, fn_3)]) - exe = fluid.Executor(fluid.CPUPlace()) + exe = paddle.static.Executor(paddle.CPUPlace()) res_1, res_2, res_3 = exe.run(main_program, fetch_list=[out_1, out_2, out_3]) print(res_1) # [[1. 1.]] print(res_2) # [[2 2] [2 2]] diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index f468815c99ea2751913c5535c721ee9a6a5c5052..037c7e850040f5097aa53b0499e3194aae4470a6 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -541,7 +541,7 @@ def warpctc(input, (not including the blank label). When it is a 3-D Tensor, its shape is `[max_logit_length, batch_size, num_classes + 1]`, where `max_logit_length` is the longest length of - input logit sequence. The data type must be float32. + input logit sequence. The data type should be float32 or float64. label (Variable): The ground truth of variable-length sequence, which must be a 2-D Tensor with LoD information or a 3-D Tensor without LoD information, needs to be consistent with the coressponding input. @@ -571,6 +571,7 @@ def warpctc(input, .. code-block:: python # using LoDTensor + import paddle import paddle.fluid as fluid import numpy as np @@ -581,6 +582,7 @@ def warpctc(input, # class num class_num = 5 + paddle.enable_static() logits = fluid.data(name='logits',shape=[None, class_num+1], dtype='float32',lod_level=1) label = fluid.data(name='label', shape=[None, 1], @@ -602,6 +604,7 @@ def warpctc(input, .. code-block:: python # using Tensor + import paddle import paddle.fluid as fluid import numpy as np @@ -613,6 +616,7 @@ def warpctc(input, batch_size = 16 # class num class_num = 5 + paddle.enable_static() logits = fluid.data(name='logits', shape=[max_seq_length, batch_size, class_num+1], dtype='float32') @@ -637,8 +641,23 @@ def warpctc(input, fetch_list=[cost.name]) print(output) """ + if in_dygraph_mode(): + if input_length is None or label_length is None: + raise ValueError( + "input_length and label_length must not be None in dygraph mode!" + ) + grad, loss_out = core.ops.warpctc( + input, + label, + input_length, + label_length, + 'blank', + blank, + 'norm_by_times', + norm_by_times, ) + return loss_out helper = LayerHelper('warpctc', **locals()) - check_variable_and_dtype(input, 'input', ['float32'], "warpctc") + check_variable_and_dtype(input, 'input', ['float32', 'float64'], "warpctc") check_variable_and_dtype(label, 'label', ['int32'], "warpctc") this_inputs = {'Logits': [input], 'Label': [label]} if input_length is not None and label_length is not None: diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py index 5df04ddfc3d26492323153b8b26658db4325b7ec..d85521f76621d011660f9c3f45bb44c7d01ff390 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py @@ -341,10 +341,12 @@ class TestMathOpPatchesVarBase(unittest.TestCase): np.array_equal(x.rank().numpy(), paddle.rank(x).numpy())) self.assertTrue( np.array_equal(x[0].t().numpy(), paddle.t(x[0]).numpy())) - m = paddle.to_tensor(np.random.uniform(1, 2, [3, 3]), 'float32') - m = m.matmul(m.t()) + d = paddle.to_tensor([[1.2285208, 1.3491015, 1.4899898], + [1.30058, 1.0688717, 1.4928783], + [1.0958099, 1.3724753, 1.8926544]]) + d = d.matmul(d.t()) self.assertTrue( - np.array_equal(m.cholesky().numpy(), paddle.cholesky(m).numpy())) + np.array_equal(d.cholesky().numpy(), paddle.cholesky(d).numpy())) self.assertTrue( np.array_equal(x.is_empty().numpy(), paddle.is_empty(x).numpy())) diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index c4155e0d8260fe1fdc4a0e49e955fc2bbff0fc89..b82ab04c9860de88a142ac31e5934202463d3073 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -24,7 +24,7 @@ from paddle.fluid import Program, program_guard import paddle import paddle.nn.functional as F -CUDA_BLOCK_SIZE = 512 +CUDA_BLOCK_SIZE = 32 class CTCForward(object): @@ -41,8 +41,8 @@ class CTCForward(object): self.num_classes = num_classes self.batch_size = batch_size - self.loss = np.zeros([self.batch_size, 1], dtype="float32") - self.gradient = np.zeros(self.softmax.shape, dtype="float32") + self.loss = np.zeros([self.batch_size, 1], dtype=softmax.dtype) + self.gradient = np.zeros(self.softmax.shape, dtype=softmax.dtype) # float64 self.EXP_MAX = sys.float_info.max @@ -112,13 +112,15 @@ class CTCForward(object): # calculate the forward and backward variables, # reference Chapter 7.3 of "Alex Grave, Supervised Sequence # Labelling with Recurrent Neural Networks" - log_acts = np.zeros([total_times, self.num_classes], dtype="float32") + log_acts = np.zeros( + [total_times, self.num_classes], dtype=softmax_a_sequence.dtype) for i in range(total_times): for j in range(self.num_classes): log_acts[i, j] = self.safe_log(softmax_a_sequence[i, j]) # calculate the forward variables - forward_vars = np.zeros([total_times, total_segments], dtype="float32") + forward_vars = np.zeros( + [total_times, total_segments], dtype=softmax_a_sequence.dtype) for i in range(total_times): for j in range(total_segments): forward_vars[i, j] = self.LOG_ZERO @@ -219,7 +221,7 @@ class TestWarpCTCOp(OpTest): self.logits_lod[0][i]) self.gradient = np.zeros( [max_sequence_length, self.batch_size, self.num_classes], - dtype="float32") + dtype=logits.dtype) self.inputs = { "Logits": (logits, self.logits_lod), @@ -287,7 +289,7 @@ class TestWarpCTCOpWithPadding(OpTest): # reshape logits to T*N*S new_logits = np.zeros( [max_sequence_length, self.batch_size, self.num_classes], - dtype="float32") + dtype=logits.dtype) cur = 0 for batch_id in range(self.batch_size): @@ -312,7 +314,7 @@ class TestWarpCTCOpWithPadding(OpTest): self.gradient = np.zeros( [max_sequence_length, self.batch_size, self.num_classes], - dtype="float32") + dtype=logits.dtype) self.inputs = { "Logits": new_logits, @@ -347,6 +349,90 @@ class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding): self.norm_by_times = False +class TestWarpCTCOpFp64(OpTest): + def config(self): + self.batch_size = 4 + self.num_classes = 8 + self.logits_lod = [[4, 1, 5, 5]] + self.labels_lod = [[3, 1, 4, 2]] + self.logits_length = np.array([4, 1, 5, 5], dtype=np.int64) + self.labels_length = np.array([3, 1, 4, 2], dtype=np.int64) + self.blank = self.num_classes - 1 + self.norm_by_times = False + + def setUp(self): + self.op_type = "warpctc" + self.config() + + logits = np.random.uniform( + 0.1, 1.0, + [sum(self.logits_length), self.num_classes]).astype("float64") + softmax = np.apply_along_axis(stable_softmax, 1, logits) + # labels should not be blank + labels = np.random.randint( + 0, + self.num_classes - 1, [sum(self.labels_length), 1], + dtype="int32") + + ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod, + self.num_classes, self.batch_size, self.blank, + self.norm_by_times) + loss = ctc.forward() + + max_sequence_length = 0 + for i in range(self.batch_size): + max_sequence_length = max(max_sequence_length, + self.logits_length[i]) + # reshape logits to T*N*S + new_logits = np.zeros( + [max_sequence_length, self.batch_size, self.num_classes], + dtype=logits.dtype) + + cur = 0 + for batch_id in range(self.batch_size): + for i in range(self.logits_length[batch_id]): + for j in range(self.num_classes): + new_logits[i, batch_id, j] = logits[cur + i, j] + cur = cur + self.logits_length[batch_id] + + # reshape labels to N*S + max_target_seq_length = 0 + for i in range(self.batch_size): + max_target_seq_length = max(max_target_seq_length, + self.labels_length[i]) + new_labels = np.zeros( + [self.batch_size, max_target_seq_length], dtype="int32") + + cur = 0 + for batch_id in range(self.batch_size): + for i in range(self.labels_length[batch_id]): + new_labels[batch_id, i] = labels[cur + i] + cur = cur + self.labels_length[batch_id] + + self.gradient = np.zeros( + [max_sequence_length, self.batch_size, self.num_classes], + dtype=logits.dtype) + + self.inputs = { + "Logits": new_logits, + "Label": new_labels, + "LogitsLength": self.logits_length, + "LabelLength": self.labels_length + } + self.outputs = {"Loss": loss} + self.attrs = { + "blank": self.blank, + "norm_by_times": self.norm_by_times, + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.outputs['WarpCTCGrad'] = self.gradient + self.check_grad(["Logits"], "Loss") + + class TestWarpCTCOpError(unittest.TestCase): def test_errors(self): with program_guard(Program(), Program()): @@ -359,7 +445,7 @@ class TestWarpCTCOpError(unittest.TestCase): name='labels_length', shape=[None], dtype='int64') def test_logits_Variable(): - logits_data = np.random.rand(5, 16, 6).astype("float32") + logits_data = np.random.rand(5, 16, 6).astype(logits.dtype) fluid.layers.warpctc( input=logits_data, label=label, @@ -398,6 +484,21 @@ class TestWarpCTCOpError(unittest.TestCase): self.assertRaises(TypeError, test_label_len_Variable) + def test_dygraph_errors(self): + def test_dygraph_with_lod(): + + logits = np.random.uniform(0.1, 1.0, [20, 15]).astype("float32") + # labels should not be blank + labels = np.random.randint(0, 15 - 1, [15, 1], dtype="int32") + softmax = paddle.to_variable(logits) + labels = paddle.to_variable(labels) + + fluid.layers.warpctc(input=softmax, label=labels) + + paddle.disable_static() + self.assertRaises(ValueError, test_dygraph_with_lod) + paddle.enable_static() + class TestCTCLossAPICase(unittest.TestCase): def test_functinal_api(self): diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 47a8668362e5e0b3901cda602b483d3e96bce29a..b79b965f5b9023b09df6dbf905561f192145dbf0 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -41,7 +41,6 @@ from .clip import clip_by_norm #DEFINE_ALIAS from .control_flow import cond #DEFINE_ALIAS # from .control_flow import DynamicRNN #DEFINE_ALIAS # from .control_flow import StaticRNN #DEFINE_ALIAS -from .control_flow import switch_case #DEFINE_ALIAS from .control_flow import while_loop #DEFINE_ALIAS # from .control_flow import rnn #DEFINE_ALIAS # from .decode import BeamSearchDecoder #DEFINE_ALIAS diff --git a/python/paddle/nn/control_flow.py b/python/paddle/nn/control_flow.py index 85f2fbcbe6eccf0052a10fce2960211be2244af4..a78b65c3c6c82ce65c66ce5d43889642beb51d0e 100644 --- a/python/paddle/nn/control_flow.py +++ b/python/paddle/nn/control_flow.py @@ -16,13 +16,10 @@ from ..fluid.layers import cond #DEFINE_ALIAS from ..fluid.layers import while_loop #DEFINE_ALIAS -from ..fluid.layers import switch_case #DEFINE_ALIAS - __all__ = [ 'cond', # 'DynamicRNN', # 'StaticRNN', - 'switch_case', 'while_loop', # 'rnn' ] diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 4395520eec70e8483cb61097a166576f4040cb4d..d27bac14d0a847169d8009641bfd011f7dba62e0 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -933,7 +933,7 @@ def ctc_loss(log_probs, is interated to the Warp-CTC library to normalize values for each row of the input tensor. Parameters: - log_probs (Tensor): The unscaled probability sequence with padding, which is a 3-D Tensor. The tensor shape is [max_logit_length, batch_size, num_classes + 1], where max_logit_length is the longest length of input logit sequence. The data type must be float32. + log_probs (Tensor): The unscaled probability sequence with padding, which is a 3-D Tensor. The tensor shape is [max_logit_length, batch_size, num_classes + 1], where max_logit_length is the longest length of input logit sequence. The data type should be float32 or float64. labels (Tensor): The ground truth sequence with padding, which must be a 3-D Tensor. The tensor shape is [batch_size, max_label_length], where max_label_length is the longest length of label sequence. The data type must be int32. input_lengths (Tensor): The length for each input sequence, it should have shape [batch_size] and dtype int64. label_lengths (Tensor): The length for each label sequence, it should have shape [batch_size] and dtype int64. diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index a610693a0a46b7e21d2c6d83716a7bc029677583..3cc6a5a15b66c89ca7d0d35638c0e2e086adbeb9 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -248,7 +248,7 @@ class Conv1d(_ConvNd): padding = 0 if self._padding_mode != "zeros": x = F.pad(x, - self._padding, + self._reversed_padding_repeated_twice, mode=self._padding_mode, data_format=self._data_format) else: diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 271dc9b4e685ce06cdb12ccdcb6bb0704a5ef2a1..98048bb7e64cf6944460f666e93702351e69fd78 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -773,7 +773,7 @@ class CTCLoss(fluid.dygraph.Layer): reduction (string, optional): Indicate how to average the loss, the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. If :attr:`reduction` is ``'mean'``, the output loss will be divided by the label_lengths, and then return the mean of quotient; If :attr:`reduction` is ``'sum'``, return the sum of loss; If :attr:`reduction` is ``'none'``, no reduction will be applied. Default is ``'mean'``. Shape: - log_probs (Tensor): The unscaled probability sequence with padding, which is a 3-D Tensor. The tensor shape is [max_logit_length, batch_size, num_classes + 1], where max_logit_length is the longest length of input logit sequence. The data type must be float32. + log_probs (Tensor): The unscaled probability sequence with padding, which is a 3-D Tensor. The tensor shape is [max_logit_length, batch_size, num_classes + 1], where max_logit_length is the longest length of input logit sequence. The data type should be float32 or float64. labels (Tensor): The ground truth sequence with padding, which must be a 3-D Tensor. The tensor shape is [batch_size, max_label_length], where max_label_length is the longest length of label sequence. The data type must be int32. input_lengths (Tensor): The length for each input sequence, it should have shape [batch_size] and dtype int64. label_lengths (Tensor): The length for each label sequence, it should have shape [batch_size] and dtype int64. diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py index f58bcc99411b5952971f257dc92374fed3728de5..510e11312f4ce1d037e687b18f79d36b0b8f1104 100644 --- a/python/paddle/static/nn/__init__.py +++ b/python/paddle/static/nn/__init__.py @@ -35,7 +35,7 @@ __all__ = [ 'prelu', 'row_conv', 'spectral_norm', - 'reshape', + 'switch_case', ] from ...fluid.layers import fc #DEFINE_ALIAS @@ -59,6 +59,6 @@ from ...fluid.layers import nce #DEFINE_ALIAS from ...fluid.layers import prelu #DEFINE_ALIAS from ...fluid.layers import row_conv #DEFINE_ALIAS from ...fluid.layers import spectral_norm #DEFINE_ALIAS +from ...fluid.layers import switch_case #DEFINE_ALIAS from ...fluid.input import embedding #DEFINE_ALIAS -from ...fluid.layers import reshape #DEFINE_ALIAS