提交 e81773c9 编写于 作者: P phlrain

move reset impl to phi; test=develop

上级 ec0e8391
......@@ -114,12 +114,95 @@ void EmbeddingGradKernel(const Context& ctx,
paddle::framework::TransToProtoVarType(input.dtype()), functor);
}
template <typename T, typename Context>
struct LookupTableV2SparseGradCPUFunctor {
LookupTableV2SparseGradCPUFunctor(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad)
: dev_ctx_(dev_ctx),
input_(input),
weight_(weight),
out_grad_(out_grad),
weight_grad_(weight_grad),
padding_idx_(padding_idx) {}
template <typename IdT>
void apply() {
DDim table_dim = weight_.dims();
auto ids = CopyIdsToVector<IdT, int64_t>(input_);
auto ids_num = static_cast<int64_t>(ids.size());
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
auto* d_table = weight_grad_;
auto* d_output = &out_grad_;
d_table->set_rows(ids);
auto* d_table_value = d_table->mutable_value();
d_table_value->Resize({ids_num, table_dim[1]});
d_table_value->template mutable_data<T>(dev_ctx_.GetPlace());
d_table->set_height(table_dim[0]);
auto* d_output_data = d_output->template data<T>();
auto* d_table_data = d_table_value->template data<T>();
auto d_output_dims = d_output->dims();
auto d_output_dims_2d =
flatten_to_2d(d_output_dims, d_output_dims.size() - 1);
PADDLE_ENFORCE_EQ(d_table_value->dims(),
d_output_dims_2d,
phi::errors::InvalidArgument(
"ShapeError: The shape of lookup_table@Grad and "
"output@Grad should be same. "
"But received lookup_table@Grad's shape = [%s], "
"output@Grad's shape = [%s].",
d_table_value->dims(),
d_output_dims_2d));
memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel());
}
private:
const Context& dev_ctx_;
const DenseTensor& input_;
const DenseTensor& weight_;
const DenseTensor& out_grad_;
SelectedRows* weight_grad_;
int64_t padding_idx_;
};
template <typename T, typename Context>
void EmbeddingSparseGradKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad) {
LookupTableV2SparseGradCPUFunctor<T, Context> functor(
ctx, input, weight, out_grad, padding_idx, weight_grad);
paddle::framework::VisitIntDataType(
paddle::framework::TransToProtoVarType(input.dtype()), functor);
}
} // namespace phi
PT_REGISTER_KERNEL(embedding_grad,
PD_REGISTER_KERNEL(embedding_grad,
CPU,
ALL_LAYOUT,
phi::EmbeddingGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(embedding_sparse_grad,
CPU,
ALL_LAYOUT,
phi::EmbeddingSparseGradKernel,
float,
double,
phi::dtype::float16) {}
......@@ -99,7 +99,7 @@ void EmbeddingKernel(const Context& ctx,
} // namespace phi
PT_REGISTER_KERNEL(embedding,
PD_REGISTER_KERNEL(embedding,
CPU,
ALL_LAYOUT,
phi::EmbeddingKernel,
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/embedding_grad_kernel.h"
#include "paddle/phi/kernels/sparse_weight_embedding_grad_kernel.h"
#include "paddle/phi/kernels/funcs/embedding_util.h"
#include "paddle/fluid/framework/convert_utils.h"
......@@ -23,13 +23,13 @@
namespace phi {
template <typename T, typename Context>
struct LookupTableV2GradCPUFunctor {
LookupTableV2GradCPUFunctor(const Context& dev_ctx,
const DenseTensor& input,
const SelectedRows& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
DenseTensor* weight_grad)
struct SparseWeightLookupTableV2GradCPUFunctor {
SparseWeightLookupTableV2GradCPUFunctor(const Context& dev_ctx,
const DenseTensor& input,
const SelectedRows& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
DenseTensor* weight_grad)
: dev_ctx_(dev_ctx),
input_(input),
weight_(weight),
......@@ -101,6 +101,68 @@ struct LookupTableV2GradCPUFunctor {
int64_t padding_idx_;
};
template <typename T, typename Context>
struct SparseWeightLookupTableV2SparseGradCPUFunctor {
SparseWeightLookupTableV2SparseGradCPUFunctor(const Context& dev_ctx,
const DenseTensor& input,
const SelectedRows& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad)
: dev_ctx_(dev_ctx),
input_(input),
weight_(weight),
out_grad_(out_grad),
weight_grad_(weight_grad),
padding_idx_(padding_idx) {}
template <typename IdT>
void apply() {
DDim table_dim = weight_.dims();
auto ids = CopyIdsToVector<IdT, int64_t>(input_);
auto ids_num = static_cast<int64_t>(ids.size());
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
auto* d_table = weight_grad_;
auto* d_output = &out_grad_;
d_table->set_rows(ids);
auto* d_table_value = d_table->mutable_value();
d_table_value->Resize({ids_num, table_dim[1]});
d_table_value->template mutable_data<T>(dev_ctx_.GetPlace());
d_table->set_height(table_dim[0]);
auto* d_output_data = d_output->template data<T>();
auto* d_table_data = d_table_value->template data<T>();
auto d_output_dims = d_output->dims();
auto d_output_dims_2d =
phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1);
PADDLE_ENFORCE_EQ(d_table_value->dims(),
d_output_dims_2d,
phi::errors::InvalidArgument(
"ShapeError: The shape of lookup_table@Grad and "
"output@Grad should be same. "
"But received lookup_table@Grad's shape = [%s], "
"output@Grad's shape = [%s].",
d_table_value->dims(),
d_output_dims_2d));
memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel());
}
private:
const Context& dev_ctx_;
const DenseTensor& input_;
const SelectedRows& weight_;
const DenseTensor& out_grad_;
SelectedRows* weight_grad_;
int64_t padding_idx_;
};
template <typename T, typename Context>
void SparseWeightEmbeddingGradKernel(const Context& ctx,
const DenseTensor& input,
......@@ -108,7 +170,20 @@ void SparseWeightEmbeddingGradKernel(const Context& ctx,
const DenseTensor& out_grad,
int64_t padding_idx,
DenseTensor* weight_grad) {
LookupTableV2GradCPUFunctor<T, Context> functor(
SparseWeightLookupTableV2GradCPUFunctor<T, Context> functor(
ctx, input, weight, out_grad, padding_idx, weight_grad);
paddle::framework::VisitIntDataType(
paddle::framework::TransToProtoVarType(input.dtype()), functor);
}
template <typename T, typename Context>
void SparseWeightEmbeddingSparseGradKernel(const Context& ctx,
const DenseTensor& input,
const SelectedRows& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad) {
SparseWeightLookupTableV2SparseGradCPUFunctor<T, Context> functor(
ctx, input, weight, out_grad, padding_idx, weight_grad);
paddle::framework::VisitIntDataType(
paddle::framework::TransToProtoVarType(input.dtype()), functor);
......@@ -116,10 +191,18 @@ void SparseWeightEmbeddingGradKernel(const Context& ctx,
} // namespace phi
PT_REGISTER_KERNEL(sparse_weight_embedding_grad,
PD_REGISTER_KERNEL(sparse_weight_embedding_grad,
CPU,
ALL_LAYOUT,
phi::SparseWeightEmbeddingGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(sparse_weight_embedding_sparse_grad,
CPU,
ALL_LAYOUT,
phi::SparseWeightEmbeddingSparseGradKernel,
float,
double,
phi::dtype::float16) {}
......@@ -24,12 +24,12 @@
namespace phi {
template <typename T, typename Context>
struct LookupTableV2CPUFunctor {
LookupTableV2CPUFunctor(const Context& dev_ctx,
const DenseTensor& input,
const SelectedRows& weight,
int64_t padding_idx,
DenseTensor* out)
struct LookupTableV2CPUSparseFunctor {
LookupTableV2CPUSparseFunctor(const Context& dev_ctx,
const DenseTensor& input,
const SelectedRows& weight,
int64_t padding_idx,
DenseTensor* out)
: dev_ctx_(dev_ctx),
input_(input),
weight_(weight),
......@@ -94,7 +94,7 @@ void SparseWeightEmbeddingKernel(const Context& ctx,
const SelectedRows& weight,
int64_t padding_idx,
DenseTensor* out) {
LookupTableV2CPUFunctor<T, Context> functor(
LookupTableV2CPUSparseFunctor<T, Context> functor(
ctx, input, weight, padding_idx, out);
paddle::framework::VisitIntDataType(
paddle::framework::TransToProtoVarType(input.dtype()), functor);
......@@ -102,7 +102,7 @@ void SparseWeightEmbeddingKernel(const Context& ctx,
} // namespace phi
PT_REGISTER_KERNEL(sparse_weight_embedding,
PD_REGISTER_KERNEL(sparse_weight_embedding,
CPU,
ALL_LAYOUT,
phi::SparseWeightEmbeddingKernel,
......
......@@ -15,6 +15,7 @@
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
namespace phi {
......@@ -26,4 +27,12 @@ void EmbeddingGradKernel(const Context& ctx,
int64_t padding_idx,
DenseTensor* weight_grad);
template <typename T, typename Context>
void EmbeddingSparseGradKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad);
} // namespace phi
......@@ -21,7 +21,9 @@
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
namespace phi {
template <typename InT, typename OutT>
......@@ -120,12 +122,117 @@ void EmbeddingGradKernel(const Context& ctx,
paddle::framework::VisitIntDataType(
paddle::framework::TransToProtoVarType(input.dtype()), functor);
}
template <typename T, typename Context>
struct LookupTableV2SparseGradCUDAFunctor {
LookupTableV2SparseGradCUDAFunctor(const Context& dev_ctx,
const DenseTensor& input,
const DenseTensor& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad)
: dev_ctx_(dev_ctx),
input_(input),
weight_(weight),
out_grad_(out_grad),
padding_idx_(padding_idx),
weight_grad_(weight_grad) {}
template <typename IdT>
void apply() {
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
const auto* ids_data = input_.template data<IdT>();
auto* d_table = weight_grad_;
auto* table = &weight_;
auto* d_output = &out_grad_;
int64_t ids_num = input_.numel();
dim3 threads(128, 8);
dim3 grids(8, 1);
auto stream = dev_ctx_.stream();
paddle::framework::Vector<int64_t> new_rows;
new_rows.resize(ids_num);
auto gpu_place = dev_ctx_.GetPlace();
paddle::framework::MixVector<int64_t> mixv_new_rows(&new_rows);
if (!std::is_same<IdT, int64_t>::value) {
InputTypeConvert<<<grids, threads, 0, stream>>>(
ids_data, ids_num, mixv_new_rows.MutableData(gpu_place));
} else {
paddle::memory::Copy(gpu_place,
mixv_new_rows.CUDAMutableData(gpu_place),
gpu_place,
ids_data,
ids_num * sizeof(int64_t),
stream);
}
mixv_new_rows.CopyToCPU();
d_table->set_rows(new_rows);
auto* d_table_value = d_table->mutable_value();
d_table_value->Resize({ids_num, table->dims()[1]});
d_table_value->template mutable_data<T>(gpu_place);
auto* d_table_data = d_table_value->template data<T>();
auto* d_output_data = d_output->template data<T>();
auto d_output_dims = d_output->dims();
auto d_output_dims_2d =
phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1);
PADDLE_ENFORCE_EQ(d_table_value->dims(),
d_output_dims_2d,
phi::errors::InvalidArgument(
"ShapeError: The shape of lookup_table@Grad and "
"output@Grad should be same. "
"But received lookup_table@Grad's shape = [%s], "
"output@Grad's shape = [%s].",
d_table_value->dims(),
d_output_dims_2d));
paddle::memory::Copy(gpu_place,
d_table_data,
gpu_place,
d_output_data,
d_output->numel() * sizeof(T),
stream);
}
private:
const phi::GPUContext& dev_ctx_;
const DenseTensor& input_;
const DenseTensor& weight_;
const DenseTensor& out_grad_;
int64_t padding_idx_;
SelectedRows* weight_grad_;
};
template <typename T, typename Context>
void EmbeddingSparseGradKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad) {
LookupTableV2SparseGradCUDAFunctor<T, Context> functor(
ctx, input, weight, out_grad, padding_idx, weight_grad);
paddle::framework::VisitIntDataType(
paddle::framework::TransToProtoVarType(input.dtype()), functor);
}
} // namespace phi
PT_REGISTER_KERNEL(embedding_grad,
PD_REGISTER_KERNEL(embedding_grad,
GPU,
ALL_LAYOUT,
phi::EmbeddingGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(embedding_sparse_grad,
GPU,
ALL_LAYOUT,
phi::EmbeddingSparseGradKernel,
float,
double,
phi::dtype::float16) {}
......@@ -115,7 +115,7 @@ void EmbeddingKernel(const Context &ctx,
} // namespace phi
PT_REGISTER_KERNEL(embedding,
PD_REGISTER_KERNEL(embedding,
GPU,
ALL_LAYOUT,
phi::EmbeddingKernel,
......
......@@ -27,4 +27,12 @@ void SparseWeightEmbeddingGradKernel(const Context& ctx,
int64_t padding_idx,
DenseTensor* weight_grad);
template <typename T, typename Context>
void SparseWeightEmbeddingSparseGradKernel(const Context& ctx,
const DenseTensor& input,
const SelectedRows& weight,
const DenseTensor& out_grad,
int64_t padding_idx,
SelectedRows* weight_grad);
} // namespace phi
......@@ -18,10 +18,8 @@ namespace phi {
KernelSignature EmbeddingOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("W")) {
LOG(ERROR) << "is dense here";
return KernelSignature("embedding", {"Ids", "W"}, {"padding_idx"}, {"Out"});
} else {
LOG(ERROR) << "is selcted rows";
return KernelSignature(
"sparse_weight_embedding", {"Ids", "W"}, {"padding_idx"}, {"Out"});
}
......@@ -30,23 +28,37 @@ KernelSignature EmbeddingOpArgumentMapping(const ArgumentMappingContext& ctx) {
KernelSignature EmbeddingGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
if (ctx.IsDenseTensorInput("W")) {
return KernelSignature("embedding_grad",
{"Ids", "W", GradVarName("Out")},
{"padding_idx"},
{GradVarName("W")});
if ((paddle::any_cast<bool>(ctx.Attr("is_sparse"))) == true) {
return KernelSignature("embedding_sparse_grad",
{"Ids", "W", GradVarName("Out")},
{"padding_idx"},
{GradVarName("W")});
} else {
return KernelSignature("embedding_grad",
{"Ids", "W", GradVarName("Out")},
{"padding_idx"},
{GradVarName("W")});
}
} else {
return KernelSignature("sparse_weight_embedding_grad",
{"Ids", "W", GradVarName("Out")},
{"padding_idx"},
{GradVarName("W")});
if ((paddle::any_cast<bool>(ctx.Attr("is_sparse"))) == true) {
return KernelSignature("sparse_weight_embedding_sparse_grad",
{"Ids", "W", GradVarName("Out")},
{"padding_idx"},
{GradVarName("W")});
} else {
return KernelSignature("sparse_weight_embedding_grad",
{"Ids", "W", GradVarName("Out")},
{"padding_idx"},
{GradVarName("W")});
}
}
}
} // namespace phi
PT_REGISTER_BASE_KERNEL_NAME(lookup_table_v2, embedding);
PT_REGISTER_BASE_KERNEL_NAME(lookup_table_v2_grad, embedding_grad);
PD_REGISTER_BASE_KERNEL_NAME(lookup_table_v2, embedding);
PD_REGISTER_BASE_KERNEL_NAME(lookup_table_v2_grad, embedding_grad);
PT_REGISTER_ARG_MAPPING_FN(lookup_table_v2, phi::EmbeddingOpArgumentMapping);
PT_REGISTER_ARG_MAPPING_FN(lookup_table_v2_grad,
PD_REGISTER_ARG_MAPPING_FN(lookup_table_v2, phi::EmbeddingOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(lookup_table_v2_grad,
phi::EmbeddingGradOpArgumentMapping);
......@@ -25,23 +25,24 @@ import paddle.compat as cpt
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
# class TestStaticGraphSupportMultipleInt(unittest.TestCase):
# def test_main(self):
# dtypes = ['uint8', 'int8', 'int16', 'int32', 'int64']
# if paddle.in_dynamic_mode():
# paddle.enable_static()
# disable_static = True
# else:
# disable_static = False
# for i, dtype in enumerate(dtypes):
# with paddle.static.program_guard(paddle.static.Program(),
# paddle.static.Program()):
# x = paddle.static.data(name='x', shape=[-1, 7, 30], dtype=dtype)
# emb = paddle.nn.Embedding(10, 20)
# y = emb(x)
# if disable_static:
# paddle.disable_static()
class TestStaticGraphSupportMultipleInt(unittest.TestCase):
def test_main(self):
dtypes = ['uint8', 'int8', 'int16', 'int32', 'int64']
if paddle.in_dynamic_mode():
paddle.enable_static()
disable_static = True
else:
disable_static = False
for i, dtype in enumerate(dtypes):
with paddle.static.program_guard(paddle.static.Program(),
paddle.static.Program()):
x = paddle.static.data(name='x', shape=[-1, 7, 30], dtype=dtype)
emb = paddle.nn.Embedding(10, 20)
y = emb(x)
if disable_static:
paddle.disable_static()
class TestLookupTableOp(OpTest):
......@@ -62,17 +63,19 @@ class TestLookupTableOp(OpTest):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
# class TestLookupTableOpInt16(OpTest):
# def id_dtype(self):
# return "int16"
class TestLookupTableOpInt16(OpTest):
def id_dtype(self):
return "int16"
# class TestLookupTableOpInt8(OpTest):
# def id_dtype(self):
# return "int8"
# class TestLookupTableOpUInt8(OpTest):
# def id_dtype(self):
# return "uint8"
class TestLookupTableOpInt8(OpTest):
def id_dtype(self):
return "int8"
class TestLookupTableOpUInt8(OpTest):
def id_dtype(self):
return "uint8"
class TestLookupTableOpWithTensorIds(OpTest):
......@@ -90,183 +93,190 @@ class TestLookupTableOpWithTensorIds(OpTest):
self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
# @skip_check_grad_ci(
# reason="Since paddings are not trainable and fixed in forward,"
# "the gradient of paddings makes no sense and we don't "
# "test the gradient here.")
# class TestLookupTableOpWithPadding(TestLookupTableOp):
# def test_check_output(self):
# ids = np.squeeze(self.inputs['Ids'])
# padding_idx = np.random.choice(ids, 1)[0]
# self.outputs['Out'][ids == padding_idx] = np.zeros(31)
# self.attrs = {'padding_idx': int(padding_idx)}
# self.check_output()
# @skip_check_grad_ci(
# reason="Since paddings are not trainable and fixed in forward,"
# "the gradient of paddings makes no sense and we don't "
# "test the gradient here.")
# class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
# def test_check_output(self):
# ids = self.inputs['Ids']
# flatten_idx = ids.flatten()
# padding_idx = np.random.choice(flatten_idx, 1)[0]
# self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
# self.attrs = {'padding_idx': cpt.long_type(padding_idx)}
# self.check_output()
# class TestLookupTableWIsSelectedRows(unittest.TestCase):
# def prepare_ids(self, scope, place):
# ids_tensor = scope.var('Ids').get_tensor()
# ids_array = np.array([0, 4, 3, 5]).astype("int32")
# ids_tensor.set(ids_array, place)
# return ids_array
# def prepare_w(self, scope, place):
# rows = [0, 1, 2, 3, 4, 5, 6]
# row_numel = 12
# w_selected_rows = scope.var('W').get_selected_rows()
# w_selected_rows.set_height(len(rows))
# w_selected_rows.set_rows(rows)
# w_array = np.ones((len(rows), row_numel)).astype("float32")
# for i in range(len(rows)):
# w_array[i] *= i
# w_tensor = w_selected_rows.get_tensor()
# w_tensor.set(w_array, place)
# def create_out_tensor(self, scope, place):
# return scope.var('Out').get_tensor()
# def check_result(self, ids_array, result_array):
# # all(): return True if all elements of the iterable are true (or if the iterable is empty)
# for idx, row in enumerate(ids_array):
# assert (row == result_array[idx]).all()
# def check_with_place(self, place):
# scope = core.Scope()
# ids_array = self.prepare_ids(scope, place)
# self.prepare_w(scope, place)
# out_tensor = self.create_out_tensor(scope, place)
# # create and run lookup_table operator
# lookup_table = Operator("lookup_table_v2", W='W', Ids='Ids', Out='Out')
# lookup_table.run(scope, place)
# # get result from Out
# result_array = np.array(out_tensor)
# self.check_result(ids_array, result_array)
# def test_w_is_selected_rows(self):
# places = [core.CPUPlace()]
# # currently only support CPU
# for place in places:
# self.check_with_place(place)
# class TestLookupTableWithTensorIdsWIsSelectedRows(
# TestLookupTableWIsSelectedRows):
# def prepare_ids(self, scope, place):
# ids_tensor = scope.var('Ids').get_tensor()
# ids_array = np.random.randint(
# low=0, high=6, size=(2, 4, 3)).astype("int64")
# ids_tensor.set(ids_array, place)
# return ids_array
# def check_result(self, ids_array, result_array):
# for idx, row in np.ndenumerate(ids_array):
# assert (row == result_array[idx]).all()
# class TestLookupTableIsSparse(unittest.TestCase):
# def init_data(self):
# self.x_data = np.array([[1, 3, 0, 4, 7]]).astype("int64")
# self.y_data = np.array([[0.1, 0.3, 0, 0.4, 0.7]]).astype("float32")
# def get_w_grad(self, is_sparse):
# self.init_data()
# main_program = fluid.Program()
# with fluid.program_guard(main_program, fluid.Program()):
# x = fluid.layers.data(name='x', shape=[5], dtype='int64')
# y_ = fluid.layers.data(name='y_', shape=[5], dtype='float32')
# emb = fluid.input.embedding(
# input=x,
# size=[10, 16],
# param_attr=fluid.ParamAttr(
# name="emb_weight",
# learning_rate=10,
# initializer=fluid.initializer.NumpyArrayInitializer(
# self.w_data)),
# is_sparse=is_sparse)
# y = fluid.layers.reduce_sum(emb, dim=-1)
# loss = fluid.layers.square_error_cost(input=y, label=y_)
# loss = fluid.layers.mean(loss)
# sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
# sgd_optimizer.minimize(loss)
# place = fluid.CPUPlace()
# exe = fluid.Executor(place)
# exe.run(fluid.default_startup_program())
# ret = exe.run(feed={'x': self.x_data,
# 'y_': self.y_data},
# fetch_list=['emb_weight'],
# return_numpy=False)
# return np.array(ret[0])
# def test_w_grad(self):
# self.w_data = np.random.random(size=(10, 16)).astype("float32")
# w_grad = self.get_w_grad(False)
# w_grad_with_sparse = self.get_w_grad(True)
# self.check_grad(w_grad, w_grad_with_sparse)
# def check_grad(self, w_grad1, w_grad2, tolerance=1e-6):
# np.testing.assert_allclose(
# w_grad1, w_grad2, rtol=tolerance, atol=tolerance)
# class TestLookupTableApi(unittest.TestCase):
# def test_api(self):
# x = fluid.layers.data(name='x', shape=[20], dtype='int64')
# emb = fluid.embedding(input=x, size=[128, 64])
# place = fluid.CPUPlace()
# x_data = np.random.randint(0, 127, [2, 20]).astype("int64")
# exe = fluid.Executor(place)
# exe.run(fluid.default_startup_program())
# ret = exe.run(feed={'x': x_data, },
# fetch_list=[emb],
# return_numpy=False)
# class TestEmbedOpError(unittest.TestCase):
# def test_errors(self):
# with program_guard(Program(), Program()):
# input_data = np.random.randint(0, 10, (4, 6)).astype("int64")
# def test_Variable():
# # the input type must be Variable
# fluid.embedding(input=input_data, size=(10, 64))
# self.assertRaises(TypeError, test_Variable)
# def test_input_dtype():
# # the input dtype must be int64
# input = fluid.data(name='x1', shape=[4, 6], dtype='float32')
# fluid.embedding(input=input, size=(10, 64))
# self.assertRaises(TypeError, test_input_dtype)
# def test_param_dtype():
# # dtype must be float32 or float64
# input2 = fluid.data(name='x2', shape=[4, 6], dtype='int64')
# fluid.embedding(input=input2, size=(10, 64), dtype='int64')
# self.assertRaises(TypeError, test_param_dtype)
# input3 = fluid.data(name='x3', shape=[4, 6], dtype='int64')
# fluid.embedding(input=input3, size=(10, 64), dtype='float16')
@skip_check_grad_ci(
reason="Since paddings are not trainable and fixed in forward,"
"the gradient of paddings makes no sense and we don't "
"test the gradient here.")
class TestLookupTableOpWithPadding(TestLookupTableOp):
def test_check_output(self):
ids = np.squeeze(self.inputs['Ids'])
padding_idx = np.random.choice(ids, 1)[0]
self.outputs['Out'][ids == padding_idx] = np.zeros(31)
self.attrs = {'padding_idx': int(padding_idx)}
self.check_output()
@skip_check_grad_ci(
reason="Since paddings are not trainable and fixed in forward,"
"the gradient of paddings makes no sense and we don't "
"test the gradient here.")
class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds):
def test_check_output(self):
ids = self.inputs['Ids']
flatten_idx = ids.flatten()
padding_idx = np.random.choice(flatten_idx, 1)[0]
self.outputs['Out'][np.squeeze(ids == padding_idx)] = np.zeros(31)
self.attrs = {'padding_idx': cpt.long_type(padding_idx)}
self.check_output()
class TestLookupTableWIsSelectedRows(unittest.TestCase):
def prepare_ids(self, scope, place):
ids_tensor = scope.var('Ids').get_tensor()
ids_array = np.array([0, 4, 3, 5]).astype("int32")
ids_tensor.set(ids_array, place)
return ids_array
def prepare_w(self, scope, place):
rows = [0, 1, 2, 3, 4, 5, 6]
row_numel = 12
w_selected_rows = scope.var('W').get_selected_rows()
w_selected_rows.set_height(len(rows))
w_selected_rows.set_rows(rows)
w_array = np.ones((len(rows), row_numel)).astype("float32")
for i in range(len(rows)):
w_array[i] *= i
w_tensor = w_selected_rows.get_tensor()
w_tensor.set(w_array, place)
def create_out_tensor(self, scope, place):
return scope.var('Out').get_tensor()
def check_result(self, ids_array, result_array):
# all(): return True if all elements of the iterable are true (or if the iterable is empty)
for idx, row in enumerate(ids_array):
assert (row == result_array[idx]).all()
def check_with_place(self, place):
scope = core.Scope()
ids_array = self.prepare_ids(scope, place)
self.prepare_w(scope, place)
out_tensor = self.create_out_tensor(scope, place)
# create and run lookup_table operator
lookup_table = Operator("lookup_table_v2", W='W', Ids='Ids', Out='Out')
lookup_table.run(scope, place)
# get result from Out
result_array = np.array(out_tensor)
self.check_result(ids_array, result_array)
def test_w_is_selected_rows(self):
places = [core.CPUPlace()]
# currently only support CPU
for place in places:
self.check_with_place(place)
class TestLookupTableWithTensorIdsWIsSelectedRows(
TestLookupTableWIsSelectedRows):
def prepare_ids(self, scope, place):
ids_tensor = scope.var('Ids').get_tensor()
ids_array = np.random.randint(
low=0, high=6, size=(2, 4, 3)).astype("int64")
ids_tensor.set(ids_array, place)
return ids_array
def check_result(self, ids_array, result_array):
for idx, row in np.ndenumerate(ids_array):
assert (row == result_array[idx]).all()
class TestLookupTableIsSparse(unittest.TestCase):
def init_data(self):
self.x_data = np.array([[1, 3, 0, 4, 7]]).astype("int64")
self.y_data = np.array([[0.1, 0.3, 0, 0.4, 0.7]]).astype("float32")
def get_w_grad(self, is_sparse):
self.init_data()
main_program = fluid.Program()
with fluid.program_guard(main_program, fluid.Program()):
x = fluid.layers.data(name='x', shape=[5], dtype='int64')
y_ = fluid.layers.data(name='y_', shape=[5], dtype='float32')
emb = fluid.input.embedding(
input=x,
size=[10, 16],
param_attr=fluid.ParamAttr(
name="emb_weight",
learning_rate=10,
initializer=fluid.initializer.NumpyArrayInitializer(
self.w_data)),
is_sparse=is_sparse)
y = fluid.layers.reduce_sum(emb, dim=-1)
loss = fluid.layers.square_error_cost(input=y, label=y_)
loss = fluid.layers.mean(loss)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
sgd_optimizer.minimize(loss)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
ret = exe.run(feed={'x': self.x_data,
'y_': self.y_data},
fetch_list=['emb_weight'],
return_numpy=False)
return np.array(ret[0])
def test_w_grad(self):
self.w_data = np.random.random(size=(10, 16)).astype("float32")
w_grad = self.get_w_grad(False)
w_grad_with_sparse = self.get_w_grad(True)
self.check_grad(w_grad, w_grad_with_sparse)
def check_grad(self, w_grad1, w_grad2, tolerance=1e-6):
np.testing.assert_allclose(
w_grad1, w_grad2, rtol=tolerance, atol=tolerance)
class TestLookupTableApi(unittest.TestCase):
def test_api(self):
x = fluid.layers.data(name='x', shape=[20], dtype='int64')
emb = fluid.embedding(input=x, size=[128, 64])
place = fluid.CPUPlace()
x_data = np.random.randint(0, 127, [2, 20]).astype("int64")
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
ret = exe.run(feed={'x': x_data, },
fetch_list=[emb],
return_numpy=False)
class TestEmbedOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
input_data = np.random.randint(0, 10, (4, 6)).astype("int64")
def test_Variable():
# the input type must be Variable
fluid.embedding(input=input_data, size=(10, 64))
self.assertRaises(TypeError, test_Variable)
def test_input_dtype():
# the input dtype must be int64
input = fluid.data(name='x1', shape=[4, 6], dtype='float32')
fluid.embedding(input=input, size=(10, 64))
self.assertRaises(TypeError, test_input_dtype)
def test_param_dtype():
# dtype must be float32 or float64
input2 = fluid.data(name='x2', shape=[4, 6], dtype='int64')
fluid.embedding(input=input2, size=(10, 64), dtype='int64')
self.assertRaises(TypeError, test_param_dtype)
input3 = fluid.data(name='x3', shape=[4, 6], dtype='int64')
fluid.embedding(input=input3, size=(10, 64), dtype='float16')
if __name__ == "__main__":
paddle.enable_static()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册