diff --git a/paddle/fluid/operators/kldiv_loss_op.cc b/paddle/fluid/operators/kldiv_loss_op.cc index d04221054089d51f0e567fdd83a00f5f835f3b1c..f1b35351274c2013d1f2c7d19348a30a9b2a5db4 100644 --- a/paddle/fluid/operators/kldiv_loss_op.cc +++ b/paddle/fluid/operators/kldiv_loss_op.cc @@ -81,7 +81,7 @@ class KLDivLossOpMaker : public framework::OpProtoAndCheckerMaker { "The reduction type to apply to the output, available types " "are 'none' | 'batchmean' | 'mean' | 'sum', 'none' for no " "reduction, 'batchmean' for the sum of output divided by " - "batch size, 'mean' for the average valud of all output, " + "batchmean size, 'mean' for the average valud of all output, " "'sum' for the sum of the output.") .SetDefault("mean"); diff --git a/paddle/fluid/operators/kldiv_loss_op.cu b/paddle/fluid/operators/kldiv_loss_op.cu index ef394feb64236ddadf6bb819fceafdd3e2e9389d..5226cb8c08e3db4a0bfbbe4440c27264903f06e3 100644 --- a/paddle/fluid/operators/kldiv_loss_op.cu +++ b/paddle/fluid/operators/kldiv_loss_op.cu @@ -13,9 +13,10 @@ limitations under the License. */ namespace ops = paddle::operators; namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( - sum, ops::KLDivLossKernel, + kldiv_loss, + ops::KLDivLossKernel, ops::KLDivLossKernel); REGISTER_OP_CUDA_KERNEL( - sum_grad, + kldiv_loss_grad, ops::KLDivLossGradKernel, ops::KLDivLossGradKernel); diff --git a/paddle/fluid/operators/kldiv_loss_op.h b/paddle/fluid/operators/kldiv_loss_op.h index 2867e44e759e1cd233897690717be3b44ab30824..fa53753d0ed3940b34166106218212afe0e47dc8 100644 --- a/paddle/fluid/operators/kldiv_loss_op.h +++ b/paddle/fluid/operators/kldiv_loss_op.h @@ -54,13 +54,12 @@ class KLDivLossKernel : public framework::OpKernel { auto input_t = EigenVector::Flatten(*input); auto target_t = EigenVector::Flatten(*target); auto loss_t = EigenVector::Flatten(*loss); - // auto target_mask = (target_t > target_t.constant(0)).template cast(); - // auto output = (target_t * (target_t.log() - input_t)) * target_mask; auto output = target_t.binaryExpr(input_t, KLDivLossForward()); if ("none" == reduction) { loss_t.device(place) = output; } else if ("batchmean" == reduction) { - loss_t.device(place) = output.sum() / static_cast(n); + auto output_sum = output.sum().eval(); + loss_t.device(place) = output_sum / output_sum.constant(n); } else if ("mean" == reduction) { loss_t.device(place) = output.mean(); } else if ("sum" == reduction) { @@ -74,19 +73,17 @@ class KLDivLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& place = *ctx.template device_context().eigen_device(); - auto* input = ctx.Input("X"); auto* target = ctx.Input("Target"); auto reduction = ctx.Attr("reduction"); auto* input_grad = ctx.Output(framework::GradVarName("X")); auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); - const int n = input->dims()[0]; - const int numel = input->numel(); + const int n = input_grad->dims()[0]; + const int numel = input_grad->numel(); const int expand = numel / loss_grad->numel(); input_grad->mutable_data(ctx.GetPlace()); - auto input_t = EigenVector::Flatten(*input); auto target_t = EigenVector::Flatten(*target); auto input_grad_t = EigenVector::Flatten(*input_grad); @@ -96,14 +93,6 @@ class KLDivLossGradKernel : public framework::OpKernel { auto loss_grad_expand = loss_grad_t.broadcast(Array1(expand)); input_grad_t.device(place) = target_t * target_t.constant(-1.0) * loss_grad_expand * target_mask; - // if (reduction == "none") { - // input_grad_t.device(place) = - // target_t * loss_grad_t * target_t.constant(-1.0); - // } else { - // auto loss_grad_expand = loss_grad_t.broadcast(Array1(numel)); - // input_grad_t.device(place) = - // target_t * loss_grad_expand * target_t.constant(-1.0); - // } if ("mean" == reduction) { input_grad_t.device(place) = input_grad_t / static_cast(numel); diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py index 21bac67326f915c56a381aba784717792f0bec58..b1d4e7f6ed5f5376fd00d3c0cabd30b5d61a1fea 100644 --- a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py @@ -47,36 +47,37 @@ class TestKLDivLossOp(OpTest): 'Target': target, } loss = kldiv_loss(x, target, self.reduction) - self.outputs = {'Loss': loss} + self.outputs = {'Loss': loss.astype('float32')} def test_check_output(self): self.check_output() def test_check_grad(self): self.check_grad( - ['X'], 'Loss', no_grad_set=set(["Target"]), max_relative_error=0.1) + ['X'], 'Loss', no_grad_set=set(["Target"]), max_relative_error=0.06) + def initTestCase(self): + self.x_shape = (3, 7, 7) + self.reduction = 'none' + + +class TestKLDivLossOp2(TestKLDivLossOp): def initTestCase(self): self.x_shape = (2, 3, 5, 5) self.reduction = 'batchmean' -# class TestKLDivLossOp2(TestKLDivLossOp): -# def initTestCase(self): -# self.x_shape = (3, 7, 7) -# self.reduction = 'batchmean' -# -# -# class TestKLDivLossOp3(TestKLDivLossOp): -# def initTestCase(self): -# self.x_shape = (2, 3, 5, 7, 9) -# self.reduction = 'mean' -# -# -# class TestKLDivLossOp4(TestKLDivLossOp): -# def initTestCase(self): -# self.x_shape = (5, 7) -# self.reduction = 'sum' +class TestKLDivLossOp3(TestKLDivLossOp): + def initTestCase(self): + self.x_shape = (2, 3, 5, 7, 9) + self.reduction = 'mean' + + +class TestKLDivLossOp4(TestKLDivLossOp): + def initTestCase(self): + self.x_shape = (5, 7) + self.reduction = 'sum' + if __name__ == "__main__": unittest.main()