未验证 提交 ebc5f997 编写于 作者: T tangwei12 提交者: GitHub

add embedding 2.0 (#26649)

* add embedding 2.0

* add embedding support input int32
上级 1f36d3cd
...@@ -15,8 +15,8 @@ limitations under the License. */ ...@@ -15,8 +15,8 @@ limitations under the License. */
#include "paddle/fluid/operators/lookup_table_v2_op.h" #include "paddle/fluid/operators/lookup_table_v2_op.h"
#include <memory> #include <memory>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/framework/var_type_inference.h"
namespace paddle { namespace paddle {
...@@ -196,3 +196,14 @@ REGISTER_OP_CPU_KERNEL(lookup_table_v2, ops::LookupTableV2Kernel<float>, ...@@ -196,3 +196,14 @@ REGISTER_OP_CPU_KERNEL(lookup_table_v2, ops::LookupTableV2Kernel<float>,
REGISTER_OP_CPU_KERNEL(lookup_table_v2_grad, REGISTER_OP_CPU_KERNEL(lookup_table_v2_grad,
ops::LookupTableV2GradKernel<float>, ops::LookupTableV2GradKernel<float>,
ops::LookupTableV2GradKernel<double>); ops::LookupTableV2GradKernel<double>);
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(lookup_table_v2)
.AddCheckpoint(
R"ROC(fix lookup_table_v2, add input type `int32`)ROC",
paddle::framework::compatible::OpVersionDesc()
.BugfixWithBehaviorChanged("lookup_table_v2 support input type "
"`int64`; after support input type "
"`int32/int64`"));
/* ========================================================================== */
...@@ -85,6 +85,14 @@ __global__ void LookupTableV2Grad(T *table, const T *output, const int64_t *ids, ...@@ -85,6 +85,14 @@ __global__ void LookupTableV2Grad(T *table, const T *output, const int64_t *ids,
} }
} }
template <typename T>
__global__ void InputTypeCovert(const T *in_ids, const int64_t K,
int64_t *out_ids) {
for (int i = 0; i < K; i++) {
out_ids[i] = (int64_t)(in_ids[i]);
}
}
template <typename T> template <typename T>
class LookupTableV2CUDAKernel : public framework::OpKernel<T> { class LookupTableV2CUDAKernel : public framework::OpKernel<T> {
public: public:
...@@ -101,23 +109,37 @@ class LookupTableV2CUDAKernel : public framework::OpKernel<T> { ...@@ -101,23 +109,37 @@ class LookupTableV2CUDAKernel : public framework::OpKernel<T> {
size_t D = table_t->dims()[1]; size_t D = table_t->dims()[1];
size_t K = ids_t->numel(); size_t K = ids_t->numel();
auto *ids = ids_t->data<int64_t>();
auto *table = table_t->data<T>();
auto *output = output_t->mutable_data<T>(context.GetPlace());
dim3 threads(256, 4); dim3 threads(256, 4);
dim3 grids(80, 1); dim3 grids(80, 1);
// copy GPU memory to CPU pinned memory
framework::Vector<int64_t> ids;
ids.resize(K);
const int64_t *ids_p = nullptr;
if (ids_t->type() == framework::proto::VarType::INT32) {
InputTypeCovert<
int><<<grids, threads, 0, context.cuda_device_context().stream()>>>(
ids_t->data<int>(), K, ids.MutableData(context.GetPlace()));
ids_p = ids.MutableData(context.GetPlace());
} else {
ids_p = ids_t->data<int64_t>();
}
auto *table = table_t->data<T>();
auto *output = output_t->mutable_data<T>(context.GetPlace());
if (padding_idx == -1) if (padding_idx == -1)
LookupTableV2< LookupTableV2<
T, 256, 4, 80, T, 256, 4, 80,
false><<<grids, threads, 0, context.cuda_device_context().stream()>>>( false><<<grids, threads, 0, context.cuda_device_context().stream()>>>(
output, table, ids, N, K, D, padding_idx); output, table, ids_p, N, K, D, padding_idx);
else else
LookupTableV2< LookupTableV2<
T, 256, 4, 80, T, 256, 4, 80,
true><<<grids, threads, 0, context.cuda_device_context().stream()>>>( true><<<grids, threads, 0, context.cuda_device_context().stream()>>>(
output, table, ids, N, K, D, padding_idx); output, table, ids_p, N, K, D, padding_idx);
} }
}; };
...@@ -139,16 +161,24 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel<T> { ...@@ -139,16 +161,24 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel<T> {
auto *ids_data = ids->data<int64_t>(); auto *ids_data = ids->data<int64_t>();
int64_t ids_num = ids->numel(); int64_t ids_num = ids->numel();
dim3 threads(128, 8);
dim3 grids(8, 1);
auto stream = dev_ctx.stream(); auto stream = dev_ctx.stream();
// copy GPU memory to CPU pinned memory // copy GPU memory to CPU pinned memory
framework::Vector<int64_t> new_rows; framework::Vector<int64_t> new_rows;
new_rows.resize(ids_num); new_rows.resize(ids_num);
auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()); auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace());
// TODO(yuyang18): Strange code here. if (ids->type() == framework::proto::VarType::INT32) {
InputTypeCovert<
int><<<grids, threads, 0, context.cuda_device_context().stream()>>>(
ids->data<int>(), ids_num,
new_rows.MutableData(context.GetPlace()));
} else {
memory::Copy(gpu_place, new_rows.CUDAMutableData(context.GetPlace()), memory::Copy(gpu_place, new_rows.CUDAMutableData(context.GetPlace()),
gpu_place, ids_data, ids_num * sizeof(int64_t), stream); gpu_place, ids_data, ids_num * sizeof(int64_t), stream);
}
d_table->set_rows(new_rows); d_table->set_rows(new_rows);
auto *d_table_value = d_table->mutable_value(); auto *d_table_value = d_table->mutable_value();
...@@ -177,17 +207,32 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel<T> { ...@@ -177,17 +207,32 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel<T> {
int N = d_table_t->dims()[0]; int N = d_table_t->dims()[0];
int D = d_table_t->dims()[1]; int D = d_table_t->dims()[1];
int K = ids_t->numel(); int K = ids_t->numel();
const int64_t *ids = ids_t->data<int64_t>();
dim3 threads(128, 8);
dim3 grids(8, 1);
// copy GPU memory to CPU pinned memory
framework::Vector<int64_t> ids;
ids.resize(K);
const int64_t *ids_p = nullptr;
if (ids_t->type() == framework::proto::VarType::INT32) {
InputTypeCovert<
int><<<grids, threads, 0, context.cuda_device_context().stream()>>>(
ids_t->data<int>(), K, ids.MutableData(context.GetPlace()));
ids_p = ids.MutableData(context.GetPlace());
} else {
ids_p = ids_t->data<int64_t>();
}
const T *d_output = d_output_t->data<T>(); const T *d_output = d_output_t->data<T>();
T *d_table = d_table_t->mutable_data<T>(context.GetPlace()); T *d_table = d_table_t->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*d_table_t); auto t = framework::EigenVector<T>::Flatten(*d_table_t);
t.device(*dev_ctx.eigen_device()) = t.constant(static_cast<T>(0)); t.device(*dev_ctx.eigen_device()) = t.constant(static_cast<T>(0));
dim3 threads(128, 8);
dim3 grids(8, 1);
LookupTableV2Grad<T, 128, 8, 8><<<grids, threads, 0, dev_ctx.stream()>>>( LookupTableV2Grad<T, 128, 8, 8><<<grids, threads, 0, dev_ctx.stream()>>>(
d_table, d_output, ids, N, K, D); d_table, d_output, ids_p, N, K, D);
} }
} }
}; };
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -45,32 +46,19 @@ class LookupTableV2Kernel : public framework::OpKernel<T> { ...@@ -45,32 +46,19 @@ class LookupTableV2Kernel : public framework::OpKernel<T> {
auto *output_t = context.Output<LoDTensor>("Out"); // float tensor auto *output_t = context.Output<LoDTensor>("Out"); // float tensor
auto *table_var = context.InputVar("W"); auto *table_var = context.InputVar("W");
auto id_name = context.InputNames("Ids").front(); int64_t padding_idx = context.Attr<int64_t>("padding_idx");
auto embedding_name = context.InputNames("W").front(); int64_t ids_numel = ids_t->numel();
auto out_name = context.OutputNames("Out").front();
// for remote prefetch
auto epmap = context.Attr<std::vector<std::string>>("epmap");
auto remote_prefetch = context.Attr<bool>("remote_prefetch");
auto table_names = context.Attr<std::vector<std::string>>("table_names");
if (remote_prefetch && !epmap.empty()) { std::vector<int64_t> ids;
// if epmap is not empty, then the parameter will be fetched from remote ids.reserve(ids_numel);
// parameter server
#ifdef PADDLE_WITH_DISTRIBUTE if (ids_t->type() == framework::proto::VarType::INT32) {
operators::distributed::prefetch(id_name, out_name, embedding_name, false, std::transform(ids_t->data<int>(), ids_t->data<int>() + ids_numel,
table_names, epmap, context, std::back_inserter(ids),
context.scope()); [&](int id) { return static_cast<int64_t>(id); });
#else
PADDLE_THROW(
"paddle is not compiled with distribute support, can not do "
"parameter prefetch!");
#endif
} else { } else {
int64_t padding_idx = context.Attr<int64_t>("padding_idx"); framework::TensorToVector(*ids_t, &ids);
int64_t *ids = const_cast<int64_t *>(ids_t->data<int64_t>()); }
int64_t ids_numel = ids_t->numel();
if (table_var->IsType<LoDTensor>()) { if (table_var->IsType<LoDTensor>()) {
auto *table_t = context.Input<LoDTensor>("W"); auto *table_t = context.Input<LoDTensor>("W");
...@@ -117,8 +105,8 @@ class LookupTableV2Kernel : public framework::OpKernel<T> { ...@@ -117,8 +105,8 @@ class LookupTableV2Kernel : public framework::OpKernel<T> {
"expected >= 0. But received %ld", "expected >= 0. But received %ld",
ids[i]); ids[i]);
auto id_index = table_t.Index(ids[i]); auto id_index = table_t.Index(ids[i]);
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(id_index, 0,
id_index, 0, "the input key should be exists. But received %d.", "the input key should be exists. But received %d.",
id_index); id_index);
blas.VCOPY(row_width, table + id_index * row_width, blas.VCOPY(row_width, table + id_index * row_width,
output + i * row_width); output + i * row_width);
...@@ -126,7 +114,6 @@ class LookupTableV2Kernel : public framework::OpKernel<T> { ...@@ -126,7 +114,6 @@ class LookupTableV2Kernel : public framework::OpKernel<T> {
} }
} }
} }
}
}; };
template <typename T> template <typename T>
...@@ -151,17 +138,23 @@ class LookupTableV2GradKernel : public framework::OpKernel<T> { ...@@ -151,17 +138,23 @@ class LookupTableV2GradKernel : public framework::OpKernel<T> {
// Since paddings are not trainable and fixed in forward, the gradient of // Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward. // paddings makes no sense and we don't deal with it in backward.
if (is_sparse) { if (is_sparse) {
auto *ids = context.Input<LoDTensor>("Ids"); auto *ids_t = context.Input<LoDTensor>("Ids");
auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out")); auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W")); auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W"));
int64_t ids_num = ids_t->numel();
std::vector<int64_t> ids;
ids.reserve(ids_num);
auto *ids_data = ids->data<int64_t>(); if (ids_t->type() == framework::proto::VarType::INT32) {
int64_t ids_num = ids->numel(); std::transform(ids_t->data<int>(), ids_t->data<int>() + ids_num,
std::back_inserter(ids),
[&](int id) { return static_cast<int64_t>(id); });
} else {
framework::TensorToVector(*ids_t, &ids);
}
std::vector<int64_t> new_rows; d_table->set_rows(ids);
new_rows.resize(ids_num);
std::memcpy(&new_rows[0], ids_data, ids_num * sizeof(int64_t));
d_table->set_rows(new_rows);
auto *d_table_value = d_table->mutable_value(); auto *d_table_value = d_table->mutable_value();
d_table_value->Resize({ids_num, table_dim[1]}); d_table_value->Resize({ids_num, table_dim[1]});
...@@ -185,11 +178,23 @@ class LookupTableV2GradKernel : public framework::OpKernel<T> { ...@@ -185,11 +178,23 @@ class LookupTableV2GradKernel : public framework::OpKernel<T> {
memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel()); memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel());
} else { } else {
auto *ids = context.Input<LoDTensor>("Ids"); auto *ids_t = context.Input<LoDTensor>("Ids");
auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out")); auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
auto *d_table = context.Output<LoDTensor>(framework::GradVarName("W")); auto *d_table = context.Output<LoDTensor>(framework::GradVarName("W"));
int64_t ids_num = ids_t->numel();
std::vector<int64_t> ids;
ids.reserve(ids_num);
if (ids_t->type() == framework::proto::VarType::INT32) {
std::transform(ids_t->data<int>(), ids_t->data<int>() + ids_num,
std::back_inserter(ids),
[&](int id) { return static_cast<int64_t>(id); });
} else {
framework::TensorToVector(*ids_t, &ids);
}
auto *ids_data = ids->data<int64_t>(); auto *ids_data = ids.data();
int64_t N = table_dim[0]; int64_t N = table_dim[0];
int64_t D = table_dim[1]; int64_t D = table_dim[1];
...@@ -199,7 +204,7 @@ class LookupTableV2GradKernel : public framework::OpKernel<T> { ...@@ -199,7 +204,7 @@ class LookupTableV2GradKernel : public framework::OpKernel<T> {
memset(d_table_data, 0, d_table->numel() * sizeof(T)); memset(d_table_data, 0, d_table->numel() * sizeof(T));
for (int64_t i = 0; i < ids->numel(); ++i) { for (int64_t i = 0; i < ids_num; ++i) {
if (padding_idx != kNoPadding && ids_data[i] == padding_idx) { if (padding_idx != kNoPadding && ids_data[i] == padding_idx) {
// the gradient of padding_idx should be 0, already done by memset, so // the gradient of padding_idx should be 0, already done by memset, so
// do nothing. // do nothing.
......
...@@ -129,6 +129,7 @@ def one_hot(input, depth, allow_out_of_range=False): ...@@ -129,6 +129,7 @@ def one_hot(input, depth, allow_out_of_range=False):
return one_hot_out return one_hot_out
@deprecated(since='2.0.0', update_to='paddle.nn.functional.embedding')
def embedding(input, def embedding(input,
size, size,
is_sparse=False, is_sparse=False,
......
...@@ -367,6 +367,7 @@ def fc(input, ...@@ -367,6 +367,7 @@ def fc(input,
return helper.append_activation(pre_activation) return helper.append_activation(pre_activation)
@deprecated(since="2.0.0", update_to="paddle.nn.functional.embedding")
def embedding(input, def embedding(input,
size, size,
is_sparse=False, is_sparse=False,
......
...@@ -450,7 +450,7 @@ class TestAdamOpV2(unittest.TestCase): ...@@ -450,7 +450,7 @@ class TestAdamOpV2(unittest.TestCase):
import paddle import paddle
paddle.disable_static() paddle.disable_static()
emb = paddle.nn.Embedding([10, 10]) emb = paddle.nn.Embedding(10, 10)
adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters()) adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters())
state_dict = adam.state_dict() state_dict = adam.state_dict()
......
...@@ -59,7 +59,7 @@ class TestLookupTableOpWithTensorIds(OpTest): ...@@ -59,7 +59,7 @@ class TestLookupTableOpWithTensorIds(OpTest):
def setUp(self): def setUp(self):
self.op_type = "lookup_table_v2" self.op_type = "lookup_table_v2"
table = np.random.random((17, 31)).astype("float64") table = np.random.random((17, 31)).astype("float64")
ids = np.random.randint(low=0, high=17, size=(2, 4, 5)).astype("int64") ids = np.random.randint(low=0, high=17, size=(2, 4, 5)).astype("int32")
self.inputs = {'W': table, 'Ids': ids} self.inputs = {'W': table, 'Ids': ids}
self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))}
...@@ -100,7 +100,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): ...@@ -100,7 +100,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
class TestLookupTableWIsSelectedRows(unittest.TestCase): class TestLookupTableWIsSelectedRows(unittest.TestCase):
def prepare_ids(self, scope, place): def prepare_ids(self, scope, place):
ids_tensor = scope.var('Ids').get_tensor() ids_tensor = scope.var('Ids').get_tensor()
ids_array = np.array([0, 4, 3, 5]).astype("int64") ids_array = np.array([0, 4, 3, 5]).astype("int32")
ids_tensor.set(ids_array, place) ids_tensor.set(ids_array, place)
return ids_array return ids_array
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
class EmbeddingDygraph(unittest.TestCase):
def test_1(self):
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
# example 1
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
inp_word.shape # [2, 3]
dict_size = 20
emb = nn.Embedding(dict_size, 32, weight_attr='emb.w', sparse=False)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle.nn.functional as functional
class EmbeddingStatic(unittest.TestCase):
def test_1(self):
prog = fluid.Program()
with fluid.program_guard(prog):
def test_bad_x():
initializer = fluid.initializer.NumpyArrayInitializer(
np.random.random(size=(128, 100)))
param_attr = fluid.ParamAttr(
name="emb_weight",
learning_rate=0.5,
initializer=initializer,
trainable=True)
weight = prog.global_block().create_parameter(
(128, 100), attr=param_attr, dtype="float32")
label = fluid.layers.data(
name="label",
shape=[4],
append_batch_size=False,
dtype="int64")
emb = functional.embedding(
x=label, weight=weight, sparse=True, name="embedding")
test_bad_x()
def test_2(self):
prog = fluid.Program()
with fluid.program_guard(prog):
def test_bad_x():
initializer = fluid.initializer.NumpyArrayInitializer(
np.random.random(size=(128, 100)))
param_attr = fluid.ParamAttr(
name="emb_weight",
learning_rate=0.5,
initializer=initializer,
trainable=True)
weight = prog.global_block().create_parameter(
(128, 100), attr=param_attr, dtype="float32")
label = fluid.layers.data(
name="label",
shape=[4],
append_batch_size=False,
dtype="int32")
emb = functional.embedding(
x=label, weight=weight, sparse=True, name="embedding")
test_bad_x()
if __name__ == '__main__':
unittest.main()
...@@ -233,3 +233,4 @@ from .vision import space_to_depth #DEFINE_ALIAS ...@@ -233,3 +233,4 @@ from .vision import space_to_depth #DEFINE_ALIAS
from .vision import yolo_box #DEFINE_ALIAS from .vision import yolo_box #DEFINE_ALIAS
from .vision import yolov3_loss #DEFINE_ALIAS from .vision import yolov3_loss #DEFINE_ALIAS
from .input import one_hot #DEFINE_ALIAS from .input import one_hot #DEFINE_ALIAS
from .input import embedding #DEFINE_ALIAS
...@@ -19,7 +19,7 @@ from ...fluid.layer_helper import LayerHelper ...@@ -19,7 +19,7 @@ from ...fluid.layer_helper import LayerHelper
from ...fluid.layers import core from ...fluid.layers import core
from ...fluid.data_feeder import check_variable_and_dtype, check_dtype from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
__all__ = ['one_hot'] __all__ = ['one_hot', 'embedding']
def one_hot(x, num_classes, name=None): def one_hot(x, num_classes, name=None):
...@@ -83,6 +83,7 @@ def one_hot(x, num_classes, name=None): ...@@ -83,6 +83,7 @@ def one_hot(x, num_classes, name=None):
# [0., 1., 0., 0.], # [0., 1., 0., 0.],
# [0., 0., 0., 1.], # [0., 0., 0., 1.],
# [1., 0., 0., 0.]] # [1., 0., 0., 0.]]
""" """
if in_dygraph_mode(): if in_dygraph_mode():
...@@ -108,3 +109,115 @@ def one_hot(x, num_classes, name=None): ...@@ -108,3 +109,115 @@ def one_hot(x, num_classes, name=None):
outputs={'Out': one_hot_out}, outputs={'Out': one_hot_out},
stop_gradient=True) stop_gradient=True)
return one_hot_out return one_hot_out
def embedding(x, weight, padding_idx=None, sparse=False, name=None):
"""
The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
with embedding size.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` ,
otherwise the program will throw an exception and exit.
.. code-block:: text
Case 1:
input is a Tensor.
padding_idx = -1
x.data = [[1, 3], [2, 4], [4, 127]]
x.shape = [3, 2]
weight.shape = [128, 16]
output is a Tensor:
out.shape = [3, 2, 16]
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
[0.345421456, 0.524563927, ..., 0.144534654]],
[[0.345249859, 0.124939536, ..., 0.194353745],
[0.945345345, 0.435394634, ..., 0.435345365]],
[[0.945345345, 0.435394634, ..., 0.435345365],
[0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127.
Args:
x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should
satisfy :math:`0<= id < weight.shape[0]` .
weight (Tensor): The weight. A Tensor with shape of lookup table parameter. It should have two elements which
indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
sparse(bool): The flag indicating whether to use sparse update. This parameter only
affects the performance of the backwards gradient update. It is recommended to set
True because sparse update is faster. But some optimizers does not support sparse update,
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these cases, is_sparse must be False. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
name(str|None): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: Embedding Tensor mapped by input. The data type is the same as :attr:`weight`.
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
weight = prog.global_block().create_parameter(
attr=self._param_attr,
shape=param_shape,
dtype=self._dtype,
default_initializer=Constant(1.0))
prog = paddle.static.Program()
weight = prog.global_block().create_parameter(
(128, 100), dtype="float32", default_initializer=Constant(1.0))
label = paddle.data(
name="label",
shape=[4],
append_batch_size=False,
dtype="int64")
emb = nn.embedding(
x=label, weight=weight, sparse=True, name="embedding")
"""
if in_dygraph_mode():
return core.ops.lookup_table_v2(
weight, x, 'is_sparse', sparse, 'is_distributed', False,
'remote_prefetch', False, 'padding_idx', padding_idx)
else:
helper = LayerHelper('embedding', **locals())
dtype = helper.input_dtype()
check_variable_and_dtype(x, 'input', ['int32', 'int64'], 'embedding')
is_distributed = False
remote_prefetch = sparse and (not is_distributed)
tmp = helper.create_variable_for_type_inference(dtype)
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
weight.shape[0] + padding_idx)
helper.append_op(
type='lookup_table_v2',
inputs={'Ids': x,
'W': weight},
outputs={'Out': tmp},
attrs={
'is_sparse': sparse,
'is_distributed': is_distributed,
'remote_prefetch': remote_prefetch,
'padding_idx': padding_idx
})
return tmp
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# TODO: define the common classes to build a neural network # TODO: define the common classes to build a neural network
from ...fluid.dygraph import BilinearTensorProduct #DEFINE_ALIAS from ...fluid.dygraph import BilinearTensorProduct #DEFINE_ALIAS
from ...fluid.dygraph import Pool2D #DEFINE_ALIAS from ...fluid.dygraph import Pool2D #DEFINE_ALIAS
from ...fluid.dygraph import Embedding #DEFINE_ALIAS from ...fluid.dygraph import Linear #DEFINE_ALIAS
from ...fluid.dygraph import Flatten #DEFINE_ALIAS from ...fluid.dygraph import Flatten #DEFINE_ALIAS
from ...fluid.dygraph import layers from ...fluid.dygraph import layers
from .. import functional as F from .. import functional as F
...@@ -1547,3 +1547,131 @@ class CosineSimilarity(layers.Layer): ...@@ -1547,3 +1547,131 @@ class CosineSimilarity(layers.Layer):
def forward(self, x1, x2): def forward(self, x1, x2):
return F.cosine_similarity(x1, x2, axis=self._axis, eps=self._eps) return F.cosine_similarity(x1, x2, axis=self._axis, eps=self._eps)
class Embedding(layers.Layer):
"""
:alias_main: paddle.nn.Embedding
:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
:old_api: paddle.fluid.dygraph.Embedding
**Embedding Layer**
This interface is used to construct a callable object of the ``Embedding`` class.
For specific usage, refer to code examples. It implements the function of the Embedding Layer.
This layer is used to lookup embeddings vector of ids provided by :attr:`input` .
It automatically constructs a 2D embedding matrix based on the
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
The shape of output Tensor is generated by appending an emb_size dimension to the
last dimension of the input Tensor shape.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
otherwise the program will throw an exception and exit.
.. code-block:: text
Case 1:
input is a Tensor. padding_idx = -1
input.data = [[1, 3], [2, 4], [4, 127]
input.shape = [3, 2]
Given size = [128, 16]
output is a Tensor:
out.shape = [3, 2, 16]
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
[0.345421456, 0.524563927, ..., 0.144534654]],
[[0.345249859, 0.124939536, ..., 0.194353745],
[0.945345345, 0.435394634, ..., 0.435345365]],
[[0.945345345, 0.435394634, ..., 0.435345365],
[0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127.
Parameters:
num_embeddings (int): Just one element which indicate the size
of the dictionary of embeddings.
embedding_dim: Just one element which indicate the size of each embedding vector respectively.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
sparse(bool): The flag indicating whether to use sparse update. This parameter only
affects the performance of the backwards gradient update. It is recommended to set
True because sparse update is faster. But some optimizer does not support sparse update,
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these case, is_sparse must be False. Default: False.
weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example 2 for details.
name(str|None): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Attribute:
**weight** (Parameter): the learnable weights of this layer.
Returns:
None
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
# example 1
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
inp_word.shape # [2, 3]
dict_size = 20
emb = nn.Embedding(
dict_size,
32,
sparse=False)
"""
def __init__(self,
num_embeddings,
embedding_dim,
padding_idx=None,
sparse=False,
weight_attr=None,
name=None):
super(Embedding, self).__init__()
self._num_embeddings = num_embeddings
self._embedding_dim = embedding_dim
self._sparse = sparse
self._is_distributed = False
self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
num_embeddings + padding_idx)
self._dtype = self._helper.get_default_dtype()
self._size = [self._num_embeddings, self._embedding_dim]
self._weight_attr = weight_attr
self._remote_prefetch = False
self._name = name
self._weight = self.create_parameter(
attr=self._weight_attr,
shape=self._size,
dtype=self._dtype,
is_bias=False)
def forward(self, x):
return F.embedding(
x,
weight=self._weight,
padding_idx=self._padding_idx,
sparse=self._sparse,
name=self._name)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册