From e63ad0a6bdb36967d417633a074e0e966ca55e78 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 28 Aug 2017 15:15:26 +0800 Subject: [PATCH] HuberRegressionLoss and HuberTwoClassification support multi-dimension data --- paddle/gserver/layers/CostLayer.cpp | 67 ++++++++++++++++++----------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 7f648070f2..aa4a26a83f 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -611,22 +611,26 @@ void HuberRegressionLoss::forwardImp(Matrix& output, Matrix& target) { HuberCost::forwardImp(output, label, target); size_t numSamples = target.getHeight(); + size_t dim = output.getWidth(); CHECK(label.value); CHECK_EQ((*label.value).getHeight(), numSamples); CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), (*label.value).getWidth()); + CHECK_EQ(dim, (*label.value).getWidth()); CHECK_EQ(target.getWidth(), (size_t)1); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); real* lbl = useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); - std::vector cost(numSamples); + std::vector cost(numSamples, 0); for (size_t i = 0; i < numSamples; ++i) { - real a = std::abs(lbl[i] - out[i]); - if (a <= delta_) - cost[i] = a * a / 2; - else - cost[i] = delta_ * (a - delta_ / 2); + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = std::abs(lbl[index] - out[index]); + if (a <= delta_) + cost[i] += a * a / 2; + else + cost[i] += delta_ * (a - delta_ / 2); + } } target.copyFrom(cost.data(), numSamples); } @@ -635,18 +639,22 @@ void HuberRegressionLoss::backwardImp(Matrix& output, Argument& label, Matrix& outputG) { size_t numSamples = output.getHeight(); + size_t dim = output.getWidth(); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); real* lbl = useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData(); real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); for (size_t i = 0; i < numSamples; ++i) { - real a = lbl[i] - out[i]; - if (std::abs(a) <= delta_) - grad[i] += -a; - else - grad[i] += a > 0 ? -delta_ : delta_; + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = lbl[index] - out[index]; + if (std::abs(a) <= delta_) + grad[index] += -a; + else + grad[index] += a > 0 ? -delta_ : delta_; + } } - if (useGpu_) outputG.copyFrom(grad, numSamples); + if (useGpu_) outputG.copyFrom(grad, numSamples * dim); } // @@ -664,23 +672,25 @@ void HuberTwoClassification::forwardImp(Matrix& output, Matrix& target) { HuberCost::forwardImp(output, label, target); size_t numSamples = target.getHeight(); + size_t dim = output.getWidth(); CHECK(label.ids); CHECK_EQ((*label.ids).getSize(), numSamples); CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(output.getWidth(), (size_t)1); CHECK_EQ(target.getWidth(), (size_t)1); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); - std::vector cost(numSamples); + std::vector cost(numSamples, 0); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; - if (out[i] * y < -1) - cost[i] = -4 * out[i] * y; - else if (out[i] * y < 1) - cost[i] = (1 - out[i] * y) * (1 - out[i] * y); - else - cost[i] = 0; + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = out[index] * y; + if (a < -1) + cost[i] += -4 * a; + else if (a < 1) + cost[i] += (1 - a) * (1 - a); + } } target.copyFrom(cost.data(), numSamples); } @@ -689,17 +699,22 @@ void HuberTwoClassification::backwardImp(Matrix& output, Argument& label, Matrix& outputG) { size_t numSamples = output.getHeight(); + size_t dim = output.getWidth(); real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; - if (y * out[i] < -1) - grad[i] += -4 * y; - else if (y * out[i] < 1) - grad[i] += -2 * (1 - y * out[i]) * y; + for (size_t j = 0; j < dim; ++j) { + int index = i * dim + j; + real a = out[index] * y; + if (a < -1) + grad[index] += -4 * y; + else if (a < 1) + grad[index] += -2 * (1 - a) * y; + } } - if (useGpu_) outputG.copyFrom(grad, numSamples); + if (useGpu_) outputG.copyFrom(grad, numSamples * dim); } /** * This cost layer compute the sum of its input as loss. -- GitLab