From 1bc8de32091d11a57cda7af0b38b1766d51a06d5 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Mon, 22 Jan 2018 16:59:54 +0800 Subject: [PATCH] 1. Add sequence_num as edit distance op's output 2. Fix evaluator using 'reduce_sum' op instead of 'mean' op --- paddle/operators/CMakeLists.txt | 1 + paddle/operators/edit_distance_op.cc | 4 ++++ paddle/operators/edit_distance_op.cu | 9 +++++++- paddle/operators/edit_distance_op.h | 4 +++- python/paddle/v2/fluid/evaluator.py | 22 +++++++++---------- python/paddle/v2/fluid/layers/nn.py | 6 +++-- .../v2/fluid/tests/test_edit_distance_op.py | 6 +++-- 7 files changed, 35 insertions(+), 17 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 6745a8da177..15f7cb6b560 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -156,6 +156,7 @@ op_library(parallel_do_op DEPS executor) # Regist multiple Kernel to pybind if (WITH_GPU) op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col) +op_library(edit_distance_op SRCS edit_distance_op.cc edit_distance_op.cu DEPS math_function) op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling) op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc conv_transpose_cudnn_op.cu.cc DEPS vol2col) diff --git a/paddle/operators/edit_distance_op.cc b/paddle/operators/edit_distance_op.cc index 62a1fcebe7b..7e7dfc79eba 100644 --- a/paddle/operators/edit_distance_op.cc +++ b/paddle/operators/edit_distance_op.cc @@ -25,6 +25,8 @@ class EditDistanceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput("Hyps"), "Input(Hyps) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("Refs"), "Input(Refs) shouldn't be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasOutput("SequenceNum"), + "Output(SequenceNum) shouldn't be null."); auto hyp_dims = ctx->GetInputDim("Hyps"); auto ref_dims = ctx->GetInputDim("Refs"); PADDLE_ENFORCE(hyp_dims.size() == 2 && hyp_dims[1] == 1, @@ -34,6 +36,7 @@ class EditDistanceOp : public framework::OperatorWithKernel { "Input(Refs) must be a 2-D LoDTensor with the 2nd dimension " "equal to 1."); ctx->SetOutputDim("Out", ctx->GetInputDim("Refs")); + ctx->SetOutputDim("SequenceNum", {1}); } protected: @@ -54,6 +57,7 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Refs", "(2-D LoDTensor, 2nd dim. equal to 1) " "The indices for reference strings."); + AddOutput("SequenceNum", "The sequence count of current batch"); AddAttr("normalized", "(bool, default false) Indicated whether to normalize " "the edit distance by the length of reference string.") diff --git a/paddle/operators/edit_distance_op.cu b/paddle/operators/edit_distance_op.cu index 338fd79bcc1..c3e116af086 100644 --- a/paddle/operators/edit_distance_op.cu +++ b/paddle/operators/edit_distance_op.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" #include "paddle/platform/cuda_helper.h" #include "paddle/platform/gpu_info.h" @@ -72,6 +73,8 @@ class EditDistanceGPUKernel : public framework::OpKernel { auto* x1_t = ctx.Input("Hyps"); auto* x2_t = ctx.Input("Refs"); + auto* sequence_num = ctx.Output("SequenceNum"); + sequence_num->mutable_data(ctx.GetPlace()); auto normalized = ctx.Attr("normalized"); auto stream = reinterpret_cast( @@ -88,7 +91,11 @@ class EditDistanceGPUKernel : public framework::OpKernel { "Reference string %d is empty.", i); } - auto num_strs = hyp_lod.size() - 1; + const size_t num_strs = hyp_lod.size() - 1; + math::SetConstant set_constant; + set_constant(ctx.template device_context(), + sequence_num, static_cast(num_strs)); + out_t->Resize({static_cast(num_strs), 1}); out_t->mutable_data(ctx.GetPlace()); auto out = out_t->data(); diff --git a/paddle/operators/edit_distance_op.h b/paddle/operators/edit_distance_op.h index 4c5a29813ce..974299e604d 100644 --- a/paddle/operators/edit_distance_op.h +++ b/paddle/operators/edit_distance_op.h @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" - namespace paddle { namespace operators { @@ -28,6 +27,8 @@ class EditDistanceKernel : public framework::OpKernel { auto* x1_t = ctx.Input("Hyps"); auto* x2_t = ctx.Input("Refs"); + auto* sequence_num = ctx.Output("SequenceNum"); + int64_t* seq_num_data = sequence_num->mutable_data(ctx.GetPlace()); auto normalized = ctx.Attr("normalized"); @@ -41,6 +42,7 @@ class EditDistanceKernel : public framework::OpKernel { "Reference string %d is empty.", i); } auto num_strs = hyp_lod.size() - 1; + *seq_num_data = static_cast(num_strs); out_t->Resize({static_cast(num_strs), 1}); out_t->mutable_data(ctx.GetPlace()); diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index 351db4f12d0..67e99a70ad3 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -219,15 +219,14 @@ class EditDistance(Evaluator): self.total_error = self.create_state( dtype='float32', shape=[1], suffix='total') - self.batch_num = self.create_state( - dtype='float32', shape=[1], suffix='total') - error = layers.edit_distance(input=input, label=label) - error = layers.cast(x=error, dtype='float32') - mean_error = layers.mean(x=error) - layers.sums(input=[self.total_error, mean_error], out=self.total_error) - const1 = layers.fill_constant(shape=[1], value=1.0, dtype="float32") - layers.sums(input=[self.batch_num, const1], out=self.batch_num) - self.metrics.append(mean_error) + self.seq_num = self.create_state( + dtype='int64', shape=[1], suffix='total') + error, seq_num = layers.edit_distance(input=input, label=label) + #error = layers.cast(x=error, dtype='float32') + sum_error = layers.reduce_sum(error) + layers.sums(input=[self.total_error, sum_error], out=self.total_error) + layers.sums(input=[self.seq_num, seq_num], out=self.seq_num) + self.metrics.append(sum_error) def eval(self, executor, eval_program=None): if eval_program is None: @@ -235,6 +234,7 @@ class EditDistance(Evaluator): block = eval_program.current_block() with program_guard(main_program=eval_program): total_error = _clone_var_(block, self.total_error) - batch_num = _clone_var_(block, self.batch_num) - out = layers.elementwise_div(x=total_error, y=batch_num) + seq_num = _clone_var_(block, self.seq_num) + seq_num = layers.cast(x=seq_num, dtype='float32') + out = layers.elementwise_div(x=total_error, y=seq_num) return np.array(executor.run(eval_program, fetch_list=[out])[0]) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index c57811df1da..9a1fc2f1202 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -1918,14 +1918,16 @@ def edit_distance(input, label, normalized=False, tokens=None, name=None): # edit distance op edit_distance_out = helper.create_tmp_variable(dtype="int64") + sequence_num = helper.create_tmp_variable(dtype="int64") helper.append_op( type="edit_distance", inputs={"Hyps": [input], "Refs": [label]}, - outputs={"Out": [edit_distance_out]}, + outputs={"Out": [edit_distance_out], + "SequenceNum": [sequence_num]}, attrs={"normalized": normalized}) - return edit_distance_out + return edit_distance_out, sequence_num def ctc_greedy_decoder(input, blank, name=None): diff --git a/python/paddle/v2/fluid/tests/test_edit_distance_op.py b/python/paddle/v2/fluid/tests/test_edit_distance_op.py index 5f5634e2979..01e7e64d052 100644 --- a/python/paddle/v2/fluid/tests/test_edit_distance_op.py +++ b/python/paddle/v2/fluid/tests/test_edit_distance_op.py @@ -60,6 +60,7 @@ class TestEditDistanceOp(OpTest): num_strs = len(x1_lod) - 1 distance = np.zeros((num_strs, 1)).astype("float32") + sequence_num = np.array(2).astype("int64") for i in range(0, num_strs): distance[i] = Levenshtein( hyp=x1[x1_lod[i]:x1_lod[i + 1]], @@ -69,7 +70,7 @@ class TestEditDistanceOp(OpTest): distance[i] = distance[i] / len_ref self.attrs = {'normalized': normalized} self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} - self.outputs = {'Out': distance} + self.outputs = {'Out': distance, 'SequenceNum': sequence_num} def test_check_output(self): self.check_output() @@ -88,6 +89,7 @@ class TestEditDistanceOpNormalized(OpTest): num_strs = len(x1_lod) - 1 distance = np.zeros((num_strs, 1)).astype("float32") + sequence_num = np.array(3).astype("int64") for i in range(0, num_strs): distance[i] = Levenshtein( hyp=x1[x1_lod[i]:x1_lod[i + 1]], @@ -97,7 +99,7 @@ class TestEditDistanceOpNormalized(OpTest): distance[i] = distance[i] / len_ref self.attrs = {'normalized': normalized} self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} - self.outputs = {'Out': distance} + self.outputs = {'Out': distance, 'SequenceNum': sequence_num} def test_check_output(self): self.check_output() -- GitLab