未验证 提交 b656d97e 编写于 作者: F fengjiayi 提交者: GitHub

Merge pull request #12485 from JiayiFeng/dev_ops_tensor_support

Make lookup_table_op and softmax_op supporting high rank tensor
...@@ -32,11 +32,16 @@ class LookupTableOp : public framework::OperatorWithKernel { ...@@ -32,11 +32,16 @@ class LookupTableOp : public framework::OperatorWithKernel {
auto table_dims = ctx->GetInputDim("W"); auto table_dims = ctx->GetInputDim("W");
auto ids_dims = ctx->GetInputDim("Ids"); auto ids_dims = ctx->GetInputDim("Ids");
int ids_rank = ids_dims.size();
PADDLE_ENFORCE_EQ(ids_dims.size(), 2); PADDLE_ENFORCE_EQ(table_dims.size(), 2);
PADDLE_ENFORCE_EQ(ids_dims[1], 1); PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1,
"The last dimension of the 'Ids' tensor must be 1.");
ctx->SetOutputDim("Out", {ids_dims[0], table_dims[1]}); auto output_dims =
framework::vectorize(framework::slice_ddim(ids_dims, 0, ids_rank - 1));
output_dims.push_back(table_dims[1]);
ctx->SetOutputDim("Out", framework::make_ddim(output_dims));
if (ctx->GetOutputsVarType("Out")[0] == if (ctx->GetOutputsVarType("Out")[0] ==
framework::proto::VarType::LOD_TENSOR) { framework::proto::VarType::LOD_TENSOR) {
...@@ -61,8 +66,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -61,8 +66,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Ids", AddInput("Ids",
"An input with type int32 or int64 " "An input with type int32 or int64 "
"contains the ids to be looked up in W. " "contains the ids to be looked up in W. "
"Ids must be a column vector with rank = 2. " "The last dimension size must be 1.");
"The 2nd dimension size must be 1.");
AddOutput("Out", "The lookup results, which have the same type as W."); AddOutput("Out", "The lookup results, which have the same type as W.");
AddAttr<bool>("is_sparse", AddAttr<bool>("is_sparse",
"(boolean, default false) " "(boolean, default false) "
......
...@@ -118,28 +118,31 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> { ...@@ -118,28 +118,31 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W")); auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W"));
auto *ids_data = ids->data<int64_t>(); auto *ids_data = ids->data<int64_t>();
auto ids_dim = ids->dims(); int64_t ids_num = ids->numel();
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
// copy GPU memory to CPU pinned memory // copy GPU memory to CPU pinned memory
framework::Vector<int64_t> new_rows; framework::Vector<int64_t> new_rows;
new_rows.resize(ids_dim[0]); new_rows.resize(ids_num);
auto gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace()); auto gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace());
// TODO(yuyang18): Strange code here. // TODO(yuyang18): Strange code here.
memory::Copy(platform::CPUPlace(), memory::Copy(platform::CPUPlace(),
new_rows.CUDAMutableData(context.GetPlace()), gpu_place, new_rows.CUDAMutableData(context.GetPlace()), gpu_place,
ids_data, ids_dim[0] * sizeof(int64_t), stream); ids_data, ids_num * sizeof(int64_t), stream);
d_table->set_rows(new_rows); d_table->set_rows(new_rows);
auto *d_table_value = d_table->mutable_value(); auto *d_table_value = d_table->mutable_value();
d_table_value->Resize({ids_dim[0], table->dims()[1]}); d_table_value->Resize({ids_num, table->dims()[1]});
d_table_value->mutable_data<T>(context.GetPlace()); d_table_value->mutable_data<T>(context.GetPlace());
auto *d_table_data = d_table_value->data<T>(); auto *d_table_data = d_table_value->data<T>();
auto *d_output_data = d_output->data<T>(); auto *d_output_data = d_output->data<T>();
PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output->dims()); auto d_output_dims = d_output->dims();
PADDLE_ENFORCE_EQ(
d_table_value->dims(),
framework::flatten_to_2d(d_output_dims, d_output_dims.size() - 1));
memory::Copy(gpu_place, d_table_data, gpu_place, d_output_data, memory::Copy(gpu_place, d_table_data, gpu_place, d_output_data,
d_output->numel() * sizeof(T), stream); d_output->numel() * sizeof(T), stream);
......
...@@ -109,17 +109,17 @@ class LookupTableGradKernel : public framework::OpKernel<T> { ...@@ -109,17 +109,17 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W")); auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W"));
auto *ids_data = ids->data<int64_t>(); auto *ids_data = ids->data<int64_t>();
auto ids_dim = ids->dims(); int64_t ids_num = ids->numel();
framework::Vector<int64_t> new_rows; framework::Vector<int64_t> new_rows;
new_rows.reserve(ids_dim[0]); new_rows.reserve(ids_num);
for (int64_t i = 0; i < ids_dim[0]; i++) { for (int64_t i = 0; i < ids_num; i++) {
new_rows.push_back(ids_data[i]); new_rows.push_back(ids_data[i]);
} }
d_table->set_rows(new_rows); d_table->set_rows(new_rows);
auto *d_table_value = d_table->mutable_value(); auto *d_table_value = d_table->mutable_value();
d_table_value->Resize({ids_dim[0], table_dim[1]}); d_table_value->Resize({ids_num, table_dim[1]});
d_table_value->mutable_data<T>(context.GetPlace()); d_table_value->mutable_data<T>(context.GetPlace());
d_table->set_height(table_dim[0]); d_table->set_height(table_dim[0]);
...@@ -127,7 +127,10 @@ class LookupTableGradKernel : public framework::OpKernel<T> { ...@@ -127,7 +127,10 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
auto *d_output_data = d_output->data<T>(); auto *d_output_data = d_output->data<T>();
auto *d_table_data = d_table_value->data<T>(); auto *d_table_data = d_table_value->data<T>();
PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output->dims()); auto d_output_dims = d_output->dims();
PADDLE_ENFORCE_EQ(
d_table_value->dims(),
framework::flatten_to_2d(d_output_dims, d_output_dims.size() - 1));
memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel()); memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel());
} else { } else {
auto *ids = context.Input<LoDTensor>("Ids"); auto *ids = context.Input<LoDTensor>("Ids");
...@@ -135,10 +138,9 @@ class LookupTableGradKernel : public framework::OpKernel<T> { ...@@ -135,10 +138,9 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
auto *d_table = context.Output<LoDTensor>(framework::GradVarName("W")); auto *d_table = context.Output<LoDTensor>(framework::GradVarName("W"));
auto *ids_data = ids->data<int64_t>(); auto *ids_data = ids->data<int64_t>();
auto ids_dim = ids->dims();
int N = table_dim[0]; int N = table_dim[0];
int D = d_output->dims()[1]; int D = table_dim[1];
auto *d_output_data = d_output->data<T>(); auto *d_output_data = d_output->data<T>();
auto *d_table_data = d_table->mutable_data<T>(context.GetPlace()); auto *d_table_data = d_table->mutable_data<T>(context.GetPlace());
......
...@@ -30,8 +30,16 @@ class SoftmaxCUDNNKernel : public framework::OpKernel<T> { ...@@ -30,8 +30,16 @@ class SoftmaxCUDNNKernel : public framework::OpKernel<T> {
// allocate memory on device. // allocate memory on device.
Out->mutable_data<T>(context.GetPlace()); Out->mutable_data<T>(context.GetPlace());
auto dims = X->dims();
auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
framework::LoDTensor flattened_x;
framework::LoDTensor flattened_out;
flattened_x.ShareDataWith(*X).Resize(flattened_dims);
flattened_out.ShareDataWith(*Out).Resize(flattened_dims);
math::SoftmaxCUDNNFunctor<T>()( math::SoftmaxCUDNNFunctor<T>()(
context.template device_context<platform::CUDADeviceContext>(), X, Out); context.template device_context<platform::CUDADeviceContext>(),
&flattened_x, &flattened_out);
} }
}; };
...@@ -46,9 +54,18 @@ class SoftmaxGradCUDNNKernel : public framework::OpKernel<T> { ...@@ -46,9 +54,18 @@ class SoftmaxGradCUDNNKernel : public framework::OpKernel<T> {
// allocate memory on device. // allocate memory on device.
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto dims = Out->dims();
auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
framework::LoDTensor flattened_out;
framework::LoDTensor flattened_d_out;
framework::LoDTensor flattened_d_x;
flattened_out.ShareDataWith(*Out).Resize(flattened_dims);
flattened_d_out.ShareDataWith(*dOut).Resize(flattened_dims);
flattened_d_x.ShareDataWith(*dX).Resize(flattened_dims);
math::SoftmaxGradCUDNNFunctor<T>()( math::SoftmaxGradCUDNNFunctor<T>()(
context.template device_context<platform::CUDADeviceContext>(), Out, context.template device_context<platform::CUDADeviceContext>(),
dOut, dX); &flattened_out, &flattened_d_out, &flattened_d_x);
} }
}; };
......
...@@ -26,9 +26,9 @@ using paddle::platform::MKLDNNMemDesc; ...@@ -26,9 +26,9 @@ using paddle::platform::MKLDNNMemDesc;
using mkldnn::memory; // Note: paddle has also "memory" namespace using mkldnn::memory; // Note: paddle has also "memory" namespace
using mkldnn::primitive; using mkldnn::primitive;
using mkldnn::softmax_forward;
using mkldnn::softmax_backward;
using mkldnn::prop_kind; using mkldnn::prop_kind;
using mkldnn::softmax_backward;
using mkldnn::softmax_forward;
using mkldnn::stream; using mkldnn::stream;
using platform::to_void_cast; using platform::to_void_cast;
...@@ -113,17 +113,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -113,17 +113,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto mkldnn_engine = dev_ctx.GetEngine(); auto mkldnn_engine = dev_ctx.GetEngine();
const Tensor* input = ctx.Input<Tensor>("X"); const Tensor* input = ctx.Input<Tensor>("X");
Tensor* output = ctx.Output<Tensor>("Out"); Tensor* output = ctx.Output<Tensor>("Out");
PADDLE_ENFORCE(input->dims().size() == 2UL, PADDLE_ENFORCE_EQ(
"The input of softmax op must be a 2D matrix."); input->dims(), output->dims(),
const T* input_data = input->data<T>(); "The shape of softmax's input and output must be identical.");
// allocate memory for output
T* output_data = output->mutable_data<T>(ctx.GetPlace()); // make sure 'output' holds memory, which will be shared by
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims()); // 'flattened_output' later.
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); output->mutable_data<T>(ctx.GetPlace());
// MKL-DNN does support softmax over selected axis. Having 2D Tensor,
// we will make normalization after final eg. axis: 1 // flatten input and output to 2-D matrixs
PADDLE_ENFORCE(((src_tz[0] == dst_tz[0]) && (src_tz[1] == dst_tz[1])), auto dims = input->dims(); // input and output share the same shape
"Softmax input and output dimensions should match"); auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
framework::Tensor flattened_input;
framework::Tensor flattened_output;
flattened_input.ShareDataWith(*input).Resize(flattened_dims);
flattened_output.ShareDataWith(*output).Resize(flattened_dims);
const T* input_data = flattened_input.data<T>();
T* output_data = flattened_output.mutable_data<T>(ctx.GetPlace());
std::vector<int> src_tz = paddle::framework::vectorize2int(flattened_dims);
std::vector<int> dst_tz = src_tz;
// Same memory descriptor to be used for input and output // Same memory descriptor to be used for input and output
memory::dims softmax_tz = {src_tz[0], src_tz[1]}; memory::dims softmax_tz = {src_tz[0], src_tz[1]};
// Generate keys for storing/retriving primitives for this operator // Generate keys for storing/retriving primitives for this operator
...@@ -174,23 +184,34 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> { ...@@ -174,23 +184,34 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>(); auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
auto mkldnn_engine = dev_ctx.GetEngine(); auto mkldnn_engine = dev_ctx.GetEngine();
const Tensor* output = ctx.Input<Tensor>("Out"); const Tensor* output = ctx.Input<Tensor>("Out");
const T* dst_data = output->data<T>();
auto* dout = ctx.template Input<Tensor>(framework::GradVarName("Out")); auto* dout = ctx.template Input<Tensor>(framework::GradVarName("Out"));
const auto* diff_dst_ptr = dout->template data<T>();
auto* dx = auto* dx =
ctx.template Output<framework::Tensor>(framework::GradVarName("X")); ctx.template Output<framework::Tensor>(framework::GradVarName("X"));
T* diff_src_ptr = dx->template mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims()); PADDLE_ENFORCE_EQ(
dout->dims(), dx->dims(),
"The shape of softmax_grad's input and output must be identical.");
// make sure 'dx' holds memory, which will be shared by 'flattened_dx'
// later.
dx->template mutable_data<T>(ctx.GetPlace());
auto dims = dout->dims(); // input and output share the same shape
auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
framework::Tensor flattened_output;
framework::Tensor flattened_dout;
framework::Tensor flattened_dx;
flattened_output.ShareDataWith(*output).Resize(flattened_dims);
flattened_dout.ShareDataWith(*dout).Resize(flattened_dims);
flattened_dx.ShareDataWith(*dx).Resize(flattened_dims);
const T* dst_data = flattened_output.data<T>();
const T* diff_dst_ptr = flattened_dout.template data<T>();
T* diff_src_ptr = flattened_dx.template mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = paddle::framework::vectorize2int(flattened_dims);
std::vector<int> src_tz(dst_tz); std::vector<int> src_tz(dst_tz);
PADDLE_ENFORCE(output->dims().size() == 2UL,
"The input of softmax op must be a 2D matrix.");
// MKL-DNN does support softmax over selected axis. Having 2D Tensor,
// we will make normalization after final eg. axis: 1
PADDLE_ENFORCE(((src_tz[0] == dst_tz[0]) && (src_tz[1] == dst_tz[1])),
"Softmax input and output dimensions should match");
// Same memory descriptor to be used for input and output // Same memory descriptor to be used for input and output
memory::dims softmax_tz = {src_tz[0], src_tz[1]}; memory::dims softmax_tz = {src_tz[0], src_tz[1]};
// Currently only supports NC data format // Currently only supports NC data format
......
...@@ -37,10 +37,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { ...@@ -37,10 +37,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SoftmaxOp should not be null."); "Output(Out) of SoftmaxOp should not be null.");
auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
PADDLE_ENFORCE(x_dims.size() == 2UL,
"The input of softmax op must be a matrix.");
ctx->SetOutputDim("Out", x_dims);
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
...@@ -81,8 +78,8 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -81,8 +78,8 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X",
"The input tensor of softmax. " "The input tensor of softmax, "
"2-D with shape [batch_size, input_feature_dimensions]."); "whose last dimension is the input_feature_dimensions.");
AddOutput("Out", "The normalized values with the same shape as X.") AddOutput("Out", "The normalized values with the same shape as X.")
.Reuse("X"); .Reuse("X");
AddAttr<bool>( AddAttr<bool>(
...@@ -105,20 +102,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -105,20 +102,23 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
Softmax Operator. Softmax Operator.
The input of the softmax operator is a 2-D tensor with shape N x K (N is the The input of the softmax operator is a tensor of any rank. The output tensor
batch_size, K is the dimension of input feature). The output tensor has the has the same shape as the input.
same shape as the input tensor.
For each row of the input tensor, the softmax operator squashes the The input tensor will first be logically flattened to a 2-D matrix. The matrix's
K-dimensional vector of arbitrary real values to a K-dimensional vector of real second dimension(row length) is as same as the last dimension of the input
values in the range [0, 1] that add up to 1. tensor, and the first dimension(column length) is the product of all other
dimensions of the input tensor. For each row of the matrix, the softmax operator
squashes the K-dimensional(K is the width of the matrix, which is also the size
of the input tensor's last dimension) vector of arbitrary real values to a
K-dimensional vector of real values in the range [0, 1] that add up to 1.
It computes the exponential of the given dimension and the sum of exponential It computes the exponential of the given dimension and the sum of exponential
values of all the other dimensions in the K-dimensional vector input. values of all the other dimensions in the K-dimensional vector input.
Then the ratio of the exponential of the given dimension and the sum of Then the ratio of the exponential of the given dimension and the sum of
exponential values of all the other dimensions is the output of the softmax exponential values of all the other dimensions is the output of the softmax
operator. operator.
For each row $i$ and each column $j$ in Input(X), we have: For each row $i$ and each column $j$ in the matrix, we have:
$$Out[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ $$Out[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$
)DOC"); )DOC");
......
...@@ -31,8 +31,16 @@ class SoftmaxKernel : public framework::OpKernel<T> { ...@@ -31,8 +31,16 @@ class SoftmaxKernel : public framework::OpKernel<T> {
// allocate memory on device. // allocate memory on device.
Out->mutable_data<T>(context.GetPlace()); Out->mutable_data<T>(context.GetPlace());
auto dims = X->dims();
auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
framework::LoDTensor flattened_x;
framework::LoDTensor flattened_out;
flattened_x.ShareDataWith(*X).Resize(flattened_dims);
flattened_out.ShareDataWith(*Out).Resize(flattened_dims);
math::SoftmaxFunctor<DeviceContext, T>()( math::SoftmaxFunctor<DeviceContext, T>()(
context.template device_context<DeviceContext>(), X, Out); context.template device_context<DeviceContext>(), &flattened_x,
&flattened_out);
} }
}; };
...@@ -47,8 +55,18 @@ class SoftmaxGradKernel : public framework::OpKernel<T> { ...@@ -47,8 +55,18 @@ class SoftmaxGradKernel : public framework::OpKernel<T> {
// allocate memory on device. // allocate memory on device.
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto dims = Out->dims();
auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1);
framework::LoDTensor flattened_out;
framework::LoDTensor flattened_d_out;
framework::LoDTensor flattened_d_x;
flattened_out.ShareDataWith(*Out).Resize(flattened_dims);
flattened_d_out.ShareDataWith(*dOut).Resize(flattened_dims);
flattened_d_x.ShareDataWith(*dX).Resize(flattened_dims);
math::SoftmaxGradFunctor<DeviceContext, T>()( math::SoftmaxGradFunctor<DeviceContext, T>()(
context.template device_context<DeviceContext>(), Out, dOut, dX); context.template device_context<DeviceContext>(), &flattened_out,
&flattened_d_out, &flattened_d_x);
} }
}; };
......
...@@ -1313,13 +1313,16 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): ...@@ -1313,13 +1313,16 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None): def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None):
""" """
The input of the softmax layer is a 2-D tensor with shape N x K (N is the The input of the softmax operator is a tensor of any rank. The output tensor
batch_size, K is the dimension of input feature). The output tensor has the has the same shape as the input.
same shape as the input tensor.
For each row of the input tensor, the softmax operator squashes the The input tensor will first be logically flattened to a 2-D matrix. The matrix's
K-dimensional vector of arbitrary real values to a K-dimensional vector of real second dimension(row length) is as same as the last dimension of the input
values in the range [0, 1] that add up to 1. tensor, and the first dimension(column length) is the product of all other
dimensions of the input tensor. For each row of the matrix, the softmax operator
squashes the K-dimensional(K is the width of the matrix, which is also the size
of the input tensor's last dimension) vector of arbitrary real values to a
K-dimensional vector of real values in the range [0, 1] that add up to 1.
It computes the exponential of the given dimension and the sum of exponential It computes the exponential of the given dimension and the sum of exponential
values of all the other dimensions in the K-dimensional vector input. values of all the other dimensions in the K-dimensional vector input.
...@@ -1327,7 +1330,7 @@ def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None): ...@@ -1327,7 +1330,7 @@ def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None):
exponential values of all the other dimensions is the output of the softmax exponential values of all the other dimensions is the output of the softmax
operator. operator.
For each row :math:`i` and each column :math:`j` in Input(X), we have: For each row :math:`i` and each column :math:`j` in the matrix, we have:
.. math:: .. math::
......
...@@ -35,6 +35,22 @@ class TestLookupTableOp(OpTest): ...@@ -35,6 +35,22 @@ class TestLookupTableOp(OpTest):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
class TestLookupTableOpWithTensorIds(OpTest):
def setUp(self):
self.op_type = "lookup_table"
table = np.random.random((17, 31)).astype("float32")
ids = np.random.randint(
low=0, high=17, size=(2, 4, 5, 1)).astype("int64")
self.inputs = {'W': table, 'Ids': ids}
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
class TestLookupTableOpWithPadding(TestLookupTableOp): class TestLookupTableOpWithPadding(TestLookupTableOp):
def test_check_output(self): def test_check_output(self):
ids = np.squeeze(self.inputs['Ids']) ids = np.squeeze(self.inputs['Ids'])
...@@ -44,21 +60,34 @@ class TestLookupTableOpWithPadding(TestLookupTableOp): ...@@ -44,21 +60,34 @@ class TestLookupTableOpWithPadding(TestLookupTableOp):
self.check_output() self.check_output()
def test_check_grad(self): def test_check_grad(self):
# Since paddings are not trainable and fixed in forward, the gradient of # Since paddings are not trainable and fixed in forward, the gradient of
# paddings makes no sense and we don't test the gradient here. # paddings makes no sense and we don't test the gradient here.
pass pass
class TestLookupTableWIsSelectedRows(OpTest): class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
def check_with_place(self, place): def test_check_output(self):
scope = core.Scope() ids = self.inputs['Ids']
flatten_idx = ids.flatten()
padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': long(padding_idx)}
self.check_output()
def test_check_grad(self):
# Since paddings are not trainable and fixed in forward, the gradient of
# paddings makes no sense and we don't test the gradient here.
pass
# create and initialize Id Variable
class TestLookupTableWIsSelectedRows(OpTest):
def prepare_ids(self, scope, place):
ids_tensor = scope.var('Ids').get_tensor() ids_tensor = scope.var('Ids').get_tensor()
ids_array = np.array([[0], [4], [3], [5]]).astype("int64") ids_array = np.array([[0], [4], [3], [5]]).astype("int64")
ids_tensor.set(ids_array, place) ids_tensor.set(ids_array, place)
return ids_array
# create and initialize W Variable def prepare_w(self, scope, place):
rows = [0, 1, 2, 3, 4, 5, 6] rows = [0, 1, 2, 3, 4, 5, 6]
row_numel = 12 row_numel = 12
...@@ -71,8 +100,22 @@ class TestLookupTableWIsSelectedRows(OpTest): ...@@ -71,8 +100,22 @@ class TestLookupTableWIsSelectedRows(OpTest):
w_tensor = w_selected_rows.get_tensor() w_tensor = w_selected_rows.get_tensor()
w_tensor.set(w_array, place) w_tensor.set(w_array, place)
# create Out Variable def create_out_tensor(self, scope, place):
out_tensor = scope.var('Out').get_tensor() return scope.var('Out').get_tensor()
def check_result(self, ids_array, result_array):
# all(): return True if all elements of the iterable are true (or if the iterable is empty)
for idx, row in enumerate(ids_array):
assert (row[0] == result_array[idx]).all()
def check_with_place(self, place):
scope = core.Scope()
ids_array = self.prepare_ids(scope, place)
self.prepare_w(scope, place)
out_tensor = self.create_out_tensor(scope, place)
# create and run lookup_table operator # create and run lookup_table operator
lookup_table = Operator("lookup_table", W='W', Ids='Ids', Out='Out') lookup_table = Operator("lookup_table", W='W', Ids='Ids', Out='Out')
...@@ -80,9 +123,8 @@ class TestLookupTableWIsSelectedRows(OpTest): ...@@ -80,9 +123,8 @@ class TestLookupTableWIsSelectedRows(OpTest):
# get result from Out # get result from Out
result_array = np.array(out_tensor) result_array = np.array(out_tensor)
# all(): return True if all elements of the iterable are true (or if the iterable is empty)
for idx, row in enumerate(ids_array): self.check_result(ids_array, result_array)
assert (row[0] == result_array[idx]).all()
def test_w_is_selected_rows(self): def test_w_is_selected_rows(self):
places = [core.CPUPlace()] places = [core.CPUPlace()]
...@@ -91,5 +133,19 @@ class TestLookupTableWIsSelectedRows(OpTest): ...@@ -91,5 +133,19 @@ class TestLookupTableWIsSelectedRows(OpTest):
self.check_with_place(place) self.check_with_place(place)
class TestLookupTableWithTensorIdsWIsSelectedRows(
TestLookupTableWIsSelectedRows):
def prepare_ids(self, scope, place):
ids_tensor = scope.var('Ids').get_tensor()
ids_array = np.random.randint(
low=0, high=6, size=(2, 4, 3, 1)).astype("int64")
ids_tensor.set(ids_array, place)
return ids_array
def check_result(self, ids_array, result_array):
for idx, row in np.ndenumerate(ids_array):
assert (row == result_array[idx]).all()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -26,15 +26,22 @@ def stable_softmax(x): ...@@ -26,15 +26,22 @@ def stable_softmax(x):
class TestSoftmaxOp(OpTest): class TestSoftmaxOp(OpTest):
def get_x_shape(self):
return [10, 10]
def setUp(self): def setUp(self):
self.op_type = "softmax" self.op_type = "softmax"
self.use_cudnn = False self.use_cudnn = False
self.use_mkldnn = False self.use_mkldnn = False
self.dtype = np.float32 self.dtype = np.float32
self.init_kernel_type() self.init_kernel_type()
self.shape = self.get_x_shape()
x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
out = np.apply_along_axis(stable_softmax, 1,
x.reshape([-1, self.shape[-1]]))
out = out.reshape(self.shape)
x = np.random.uniform(0.1, 1, [10, 10]).astype(self.dtype)
out = np.apply_along_axis(stable_softmax, 1, x)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.attrs = { self.attrs = {
...@@ -63,6 +70,11 @@ class TestSoftmaxOp(OpTest): ...@@ -63,6 +70,11 @@ class TestSoftmaxOp(OpTest):
self.check_grad(["X"], "Out", max_relative_error=0.01) self.check_grad(["X"], "Out", max_relative_error=0.01)
class TestSoftmaxOp2(TestSoftmaxOp):
def get_x_shape(self):
return [2, 3, 4, 5]
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
class TestSoftmaxCUDNNOp(TestSoftmaxOp): class TestSoftmaxCUDNNOp(TestSoftmaxOp):
...@@ -70,6 +82,13 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp): ...@@ -70,6 +82,13 @@ class TestSoftmaxCUDNNOp(TestSoftmaxOp):
self.use_cudnn = True self.use_cudnn = True
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestSoftmaxCUDNNOp2(TestSoftmaxCUDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
class TestSoftmaxFP16Op(TestSoftmaxOp): class TestSoftmaxFP16Op(TestSoftmaxOp):
...@@ -83,6 +102,13 @@ class TestSoftmaxFP16Op(TestSoftmaxOp): ...@@ -83,6 +102,13 @@ class TestSoftmaxFP16Op(TestSoftmaxOp):
self.check_output_with_place(place, atol=1e-3) self.check_output_with_place(place, atol=1e-3)
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestSoftmaxFP16Op2(TestSoftmaxFP16Op):
def get_x_shape(self):
return [2, 3, 4, 5]
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA") "core is not compiled with CUDA")
class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp): class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp):
...@@ -97,10 +123,22 @@ class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp): ...@@ -97,10 +123,22 @@ class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp):
self.check_output_with_place(place, atol=1e-3) self.check_output_with_place(place, atol=1e-3)
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
class TestSoftmaxMKLDNNOp(TestSoftmaxOp): class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
def init_kernel_type(self): def init_kernel_type(self):
self.use_mkldnn = True self.use_mkldnn = True
class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册