diff --git a/paddle/fluid/operators/cvm_op.h b/paddle/fluid/operators/cvm_op.h index 38e5a2afa11feace17b8d870cdc3ef0ed38745d7..77cb7e446b7bc8179dc4832fa55cce4754e06ced 100644 --- a/paddle/fluid/operators/cvm_op.h +++ b/paddle/fluid/operators/cvm_op.h @@ -22,36 +22,60 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; +template +void CvmComputeKernel(const bool use_cvm, const int64_t item_width, const T** X, + T** Y) { + const auto cvm_offset = use_cvm ? 0 : 2; + + std::memcpy(*Y, *X + cvm_offset, (item_width - cvm_offset) * sizeof(T)); + + if (use_cvm) { + (*Y)[0] = log((*Y)[0] + 1); + (*Y)[1] = log((*Y)[1] + 1) - (*Y)[0]; + } + + (*X) += item_width; + (*Y) += item_width - cvm_offset; +} + +template +void CvmGradComputeKernel(const bool use_cvm, const int64_t item_width, + const T& CVM, const T** DY, T** DX) { + const auto cvm_offset = use_cvm ? 0 : 2; + + std::memcpy(*DX + cvm_offset, *DY, (item_width - cvm_offset) * sizeof(T)); + + (*DX)[0] = (&CVM)[0]; + (*DX)[1] = (&CVM)[1]; + + (*DX) += item_width; + (*DY) += item_width - cvm_offset; +} + template class CVMOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - const LoDTensor* x = context.Input("X"); + const auto* x = context.Input("X"); const T* x_data = x->data(); - auto lod = x->lod()[0]; - int64_t item_size = x->numel() / x->dims()[0]; - int offset = 2; - if (!context.Attr("use_cvm")) { - item_size -= offset; - } - LoDTensor* y = context.Output("Y"); + + auto batch_size = x->dims()[0]; + auto item_size = x->numel() / batch_size; + auto use_cvm = context.Attr("use_cvm"); + + auto* y = context.Output("Y"); T* y_data = y->mutable_data(context.GetPlace()); - int seq_num = static_cast(lod.size()) - 1; - for (int i = 0; i < seq_num; ++i) { - int64_t seq_len = static_cast(lod[i + 1] - lod[i]); - - for (int j = 0; j < seq_len; ++j) { - if (context.Attr("use_cvm")) { - std::memcpy(y_data, x_data, item_size * sizeof(T)); - y_data[0] = log(y_data[0] + 1); - y_data[1] = log(y_data[1] + 1) - y_data[0]; - x_data += item_size; - y_data += item_size; - } else { - std::memcpy(y_data, x_data + offset, item_size * sizeof(T)); - x_data += item_size + offset; - y_data += item_size; + // for Input X do not have Lod Information. + if (x->NumLevels() == 0) { + for (int i = 0; i < batch_size; i++) { + CvmComputeKernel(use_cvm, item_size, &x_data, &y_data); + } + } else { + auto lod = x->lod()[0]; + for (int i = 0; i < lod.size() - 1; ++i) { + for (int j = 0; j < lod[i + 1] - lod[i]; ++j) { + CvmComputeKernel(use_cvm, item_size, &x_data, &y_data); } } } @@ -62,42 +86,39 @@ template class CVMGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - LoDTensor* dx = context.Output(framework::GradVarName("X")); + auto* dx = context.Output(framework::GradVarName("X")); T* dx_data = dx->mutable_data(context.GetPlace()); const Tensor* cvm = context.Input("CVM"); const T* cvm_data = cvm->data(); - int offset = 2; - const framework::LoDTensor* dOut = + + const auto* dOut = context.Input(framework::GradVarName("Y")); const T* dout_data = dOut->data(); - auto lod = dx->lod()[0]; - int64_t item_size = dx->numel() / dx->dims()[0]; - if (!context.Attr("use_cvm")) { - item_size -= offset; - } + auto use_cvm = context.Attr("use_cvm"); - int seq_num = static_cast(lod.size()) - 1; - for (int i = 0; i < seq_num; ++i) { - int64_t seq_len = static_cast(lod[i + 1] - lod[i]); - - for (int j = 0; j < seq_len; ++j) { - if (context.Attr("use_cvm")) { - std::memcpy(dx_data, dout_data, item_size * sizeof(T)); - dx_data[0] = cvm_data[0]; - dx_data[1] = cvm_data[1]; - dx_data += item_size; - dout_data += item_size; - } else { - std::memcpy(dx_data + offset, dout_data, item_size * sizeof(T)); - dx_data[0] = cvm_data[0]; - dx_data[1] = cvm_data[1]; - dx_data += item_size + offset; - dout_data += item_size; + auto offset = 2; + auto batch_size = dx->dims()[0]; + auto item_size = dx->numel() / batch_size; + + // for Input X do not have Lod Information. + if (dx->NumLevels() == 0) { + for (int x = 0; x < batch_size; ++x) { + CvmGradComputeKernel(use_cvm, item_size, *cvm_data, &dout_data, + &dx_data); + cvm_data += offset; + } + } else { + auto lod = dx->lod()[0]; + int seq_num = static_cast(lod.size()) - 1; + for (int i = 0; i < seq_num; ++i) { + for (int j = 0; j < lod[i + 1] - lod[i]; ++j) { + CvmGradComputeKernel(use_cvm, item_size, *cvm_data, &dout_data, + &dx_data); } + cvm_data += offset; } - cvm_data += offset; } } }; diff --git a/python/paddle/fluid/tests/unittests/test_cvm_op.py b/python/paddle/fluid/tests/unittests/test_cvm_op.py index 67c310bd2f1155e4c5492e90a96cbdac9e8a3481..69bc0b66510fefb2f7ae0d34a206bac2d47a1a84 100644 --- a/python/paddle/fluid/tests/unittests/test_cvm_op.py +++ b/python/paddle/fluid/tests/unittests/test_cvm_op.py @@ -19,15 +19,50 @@ from op_test import OpTest import unittest -class TestCVMOp(OpTest): +def cvm_compute(X, item_width, use_cvm): + cvm_offset = 0 if use_cvm else 2 + batch_size = X.shape[0] + + Y = np.ones([batch_size, item_width - cvm_offset], np.float32) + + for idx in range(batch_size): + if use_cvm: + Y[idx] = X[idx] + Y[idx][0] = log(Y[idx][0] + 1) + Y[idx][1] = log(Y[idx][1] + 1) - Y[idx][0] + else: + Y[idx] = X[idx][2:] + + return Y + + +def cvm_grad_compute(DY, CVM, item_width, use_cvm): + batch_size = DY.shape[0] + DX = np.ones([batch_size, item_width], np.float32) + + for idx in range(batch_size): + DX[idx][0] = CVM[idx][0] + DX[idx][1] = CVM[idx][1] + + if use_cvm: + DX[idx][2:] = DY[idx][2:] + else: + DX[idx][2:] = DY[idx] + return DX + + +class TestCVMOpWithLodTensor(OpTest): """ Test cvm op with discrete one-hot labels. """ def setUp(self): self.op_type = "cvm" - batch_size = 4 + self.use_cvm = True + + batch_size = 8 dims = 11 + lod = [[1]] self.inputs = { 'X': (np.random.uniform(0, 1, [1, dims]).astype("float32"), lod), @@ -43,5 +78,55 @@ class TestCVMOp(OpTest): self.check_output() +class TestCVMOpWithOutLodTensor1(OpTest): + """ + Test cvm op with discrete one-hot labels. + """ + + def setUp(self): + self.op_type = "cvm" + self.use_cvm = True + + batch_size = 2 + item_width = 11 + + input = np.random.uniform(0, 1, + (batch_size, item_width)).astype('float32') + output = cvm_compute(input, item_width, self.use_cvm) + cvm = np.array([[0.6, 0.4]]).astype("float32") + + self.inputs = {'X': input, 'CVM': cvm} + self.attrs = {'use_cvm': self.use_cvm} + self.outputs = {'Y': output} + + def test_check_output(self): + self.check_output() + + +class TestCVMOpWithOutLodTensor2(OpTest): + """ + Test cvm op with discrete one-hot labels. + """ + + def setUp(self): + self.op_type = "cvm" + self.use_cvm = False + + batch_size = 2 + item_width = 11 + + input = np.random.uniform(0, 1, + (batch_size, item_width)).astype('float32') + output = cvm_compute(input, item_width, self.use_cvm) + cvm = np.array([[0.6, 0.4]]).astype("float32") + + self.inputs = {'X': input, 'CVM': cvm} + self.attrs = {'use_cvm': self.use_cvm} + self.outputs = {'Y': output} + + def test_check_output(self): + self.check_output() + + if __name__ == '__main__': unittest.main()