未验证 提交 0e74bf36 编写于 作者: Z zhaoyingli 提交者: GitHub

[NewIR]support c_allreduce_sum/c_identity/c_embedding/c_embedding_grad (#56836)

* [NewIR]add c_allreduce_sum/c_identity/c_reduce_sum/c_embedding/c_embedding_grad

* rm VLOG

* rm c_identity from LegacyOpList

* rm VLOG

* rm c_reduce_sum
上级 10d60b73
...@@ -27,7 +27,9 @@ const std::unordered_set<std::string> LegacyOpList = { ...@@ -27,7 +27,9 @@ const std::unordered_set<std::string> LegacyOpList = {
"pd.c_sync_calc_stream_", "pd.c_sync_calc_stream_",
"pd.c_sync_comm_stream_", "pd.c_sync_comm_stream_",
"pd.send_v2", "pd.send_v2",
"pd.recv_v2"}; "pd.recv_v2",
"pd.c_allreduce_sum",
"pd.c_allreduce_sum_"};
enum class AttrType { enum class AttrType {
UNDEFINED = 0, UNDEFINED = 0,
......
...@@ -102,6 +102,17 @@ ...@@ -102,6 +102,17 @@
composite: batch_norm_grad(x, scale, bias, mean_out, variance_out, saved_mean, saved_variance, reserve_space, out_grad, momentum, epsilon, data_layout, is_test, use_global_stats, trainable_statistics) composite: batch_norm_grad(x, scale, bias, mean_out, variance_out, saved_mean, saved_variance, reserve_space, out_grad, momentum, epsilon, data_layout, is_test, use_global_stats, trainable_statistics)
backward : batch_norm_double_grad backward : batch_norm_double_grad
- backward_op : c_embedding_grad
forward : c_embedding (Tensor weight, Tensor x, int64_t start_index=0) -> Tensor(out)
args : (Tensor weight, Tensor x, Tensor out_grad, int64_t start_index=0)
output : Tensor(weight_grad)
infer_meta :
func : EmbeddingGradInferMeta
param : [x, weight]
kernel :
func : c_embedding_grad
no_need_buffer : weight
- backward_op : cast_grad - backward_op : cast_grad
forward : cast (Tensor x, DataType dtype) -> Tensor(out) forward : cast (Tensor x, DataType dtype) -> Tensor(out)
args : (Tensor x, Tensor out_grad) args : (Tensor x, Tensor out_grad)
......
...@@ -123,6 +123,16 @@ ...@@ -123,6 +123,16 @@
backward : batch_norm_grad backward : batch_norm_grad
optional : reserve_space optional : reserve_space
- op : c_allreduce_sum
args : (Tensor x, int ring_id, bool use_calc_stream, bool use_model_parallel)
output : Tensor(out)
infer_meta :
func : AllReduceInferMeta
param : [x]
kernel :
func : c_allreduce_sum
inplace : (x -> out)
- op : c_broadcast - op : c_broadcast
args : (Tensor x, int ring_id=0, int root=0, bool use_calc_stream=false) args : (Tensor x, int ring_id=0, int root=0, bool use_calc_stream=false)
output : Tensor(out) output : Tensor(out)
...@@ -142,6 +152,27 @@ ...@@ -142,6 +152,27 @@
kernel : kernel :
func : c_concat func : c_concat
- op : c_embedding
args : (Tensor weight, Tensor x, int64_t start_index=0)
output : Tensor(out)
infer_meta :
func : CEmbeddingInferMeta
param : [weight, x, start_index]
kernel :
func : c_embedding
param : [weight, x, start_index]
data_type : weight
backward : c_embedding_grad
- op : c_identity
args : (Tensor x, int ring_id, bool use_calc_stream, bool use_model_parallel)
output : Tensor(out)
infer_meta :
func : CIdentityInferMeta
kernel :
func : c_identity
inplace : (x -> out)
- op : c_sync_calc_stream - op : c_sync_calc_stream
args : (Tensor x) args : (Tensor x)
output : Tensor(out) output : Tensor(out)
......
...@@ -435,6 +435,13 @@ ...@@ -435,6 +435,13 @@
outputs : outputs :
out : Out out : Out
- op : c_embedding
backward : c_embedding_grad
inputs :
{weight : W, x : Ids}
outputs :
out : Out
- op : cast - op : cast
inputs : inputs :
x : X x : X
...@@ -3032,12 +3039,24 @@ ...@@ -3032,12 +3039,24 @@
yolo_loss : GetYoloLossExpectedKernelType yolo_loss : GetYoloLossExpectedKernelType
yolo_loss_grad : GetYoloLossExpectedKernelType yolo_loss_grad : GetYoloLossExpectedKernelType
- op: c_allreduce_sum
inputs :
x : X
outputs :
out: Out
- op: c_broadcast - op: c_broadcast
inputs : inputs :
x : X x : X
outputs : outputs :
out : Out out : Out
- op: c_identity
inputs :
x : X
outputs :
out: Out
- op: c_sync_calc_stream - op: c_sync_calc_stream
inputs : inputs :
x : X x : X
......
...@@ -1274,6 +1274,43 @@ void EmbeddingInferMeta(const MetaTensor& x, ...@@ -1274,6 +1274,43 @@ void EmbeddingInferMeta(const MetaTensor& x,
out->share_lod(x); out->share_lod(x);
} }
void CEmbeddingInferMeta(const MetaTensor& weight,
const MetaTensor& x,
int64_t start_index,
MetaTensor* out) {
const auto& table_dims = weight.dims();
const auto& ids_dims = x.dims();
int ids_rank = ids_dims.size();
VLOG(5) << "ids rank is " << ids_rank << std::endl;
PADDLE_ENFORCE_EQ(
table_dims.size(),
2,
phi::errors::InvalidArgument(
"ShapeError: The dimensions of the 'c_embedding' must be 2. "
"But received c_embedding's dimensions = %d, "
"c_embedding's shape = [%s].",
table_dims.size(),
table_dims));
auto output_dims = phi::vectorize(ids_dims);
output_dims.push_back(table_dims[1]);
out->set_dims(phi::make_ddim(output_dims));
out->set_dtype(weight.dtype());
out->share_lod(x);
const auto height = table_dims[0];
const auto width = table_dims[1];
PADDLE_ENFORCE_EQ(
(height > 0 && width > 0 && start_index >= 0),
true,
phi::errors::InvalidArgument(
"height:%ld width:%ld start_index:%ld must not have negative values",
height,
width,
start_index));
}
void ExpandAsInferMeta(const MetaTensor& x, void ExpandAsInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
const std::vector<int>& target_shape, const std::vector<int>& target_shape,
......
...@@ -211,6 +211,11 @@ void EmbeddingInferMeta(const MetaTensor& x, ...@@ -211,6 +211,11 @@ void EmbeddingInferMeta(const MetaTensor& x,
int64_t padding_idx, int64_t padding_idx,
MetaTensor* out); MetaTensor* out);
void CEmbeddingInferMeta(const MetaTensor& weight,
const MetaTensor& x,
int64_t start_index,
MetaTensor* out);
void ExpandAsInferMeta(const MetaTensor& x, void ExpandAsInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
const std::vector<int>& target_shape, const std::vector<int>& target_shape,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册