diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index bbc42d97146f24e69d2f2337967e129af013fb6c..68a52a79e4ce33311780fdf1993397b717a718b2 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -233,7 +233,8 @@ REGISTER_OP_CPU_KERNEL( ops::ConcatKernel, ops::ConcatKernel, - ops::ConcatKernel); + ops::ConcatKernel, + ops::ConcatKernel); REGISTER_OP_CPU_KERNEL( concat_grad, ops::ConcatGradKernel, @@ -242,4 +243,5 @@ REGISTER_OP_CPU_KERNEL( ops::ConcatGradKernel, ops::ConcatGradKernel, - ops::ConcatGradKernel); + ops::ConcatGradKernel, + ops::ConcatKernel); diff --git a/paddle/fluid/operators/concat_op.cu.cc b/paddle/fluid/operators/concat_op.cu.cc index 8c30703f2576b35deb419238de08c5f2fa7b42d2..8732556acb9fdee6e6d83fb34f1bcadf7d8b4bb6 100644 --- a/paddle/fluid/operators/concat_op.cu.cc +++ b/paddle/fluid/operators/concat_op.cu.cc @@ -23,7 +23,8 @@ REGISTER_OP_CUDA_KERNEL( ops::ConcatKernel, ops::ConcatKernel, ops::ConcatKernel, - ops::ConcatKernel); + ops::ConcatKernel, + ops::ConcatKernel); REGISTER_OP_CUDA_KERNEL( concat_grad, ops::ConcatGradKernel, @@ -31,4 +32,5 @@ REGISTER_OP_CUDA_KERNEL( ops::ConcatGradKernel, ops::ConcatGradKernel, ops::ConcatGradKernel, - ops::ConcatGradKernel); + ops::ConcatGradKernel, + ops::ConcatKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc index fdb2c57385b2bc1068c618f206bfeb6513d3d8c4..c8d568c8c2cf73041549a138085b72b41c0c297a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc @@ -100,6 +100,8 @@ REGISTER_OPERATOR(reduce_mean_grad, ops::ReduceGradOp, ops::ReduceMeanDoubleGradOpBaseMaker, ops::ReduceMeanGradNoNeedBufferVarInferer); REGISTER_OP_CPU_KERNEL(reduce_mean, + ops::ReduceKernel, ops::ReduceKernel, ops::ReduceKernel; -REGISTER_OP_CPU_KERNEL(reduce_mean_grad, CPUReduceMeanGradKernel, +REGISTER_OP_CPU_KERNEL(reduce_mean_grad, CPUReduceMeanGradKernel, + CPUReduceMeanGradKernel, CPUReduceMeanGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu index cc3653fcb43a4c000d0c61c9d854965fafd59a9c..50d2fcdee23bd9e830f32e0cff4d367c3ad5ba66 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cu @@ -65,5 +65,6 @@ class ReduceMeanKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_CUDA_KERNEL(reduce_mean, ops::ReduceMeanKernel, +REGISTER_OP_CUDA_KERNEL(reduce_mean, ops::ReduceMeanKernel, + ops::ReduceMeanKernel, ops::ReduceMeanKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu b/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu index 289f574719ff03b1b09f313d05bab152f5c5d651..0e133d5447f93b8891c6de4cb5ad40ac7825493b 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.part.cu @@ -20,5 +20,6 @@ using CUDAReduceMeanGradKernel = ops::ReduceGradKernel; -REGISTER_OP_CUDA_KERNEL(reduce_mean_grad, CUDAReduceMeanGradKernel, +REGISTER_OP_CUDA_KERNEL(reduce_mean_grad, CUDAReduceMeanGradKernel, + CUDAReduceMeanGradKernel, CUDAReduceMeanGradKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc index 5a8e8894e1c5da8e0d34f15f2e402b7ecbbea364..a085e851eea77f7bb94076b0f8f83a873b894f3b 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc @@ -109,8 +109,10 @@ REGISTER_OPERATOR(reduce_sum_grad, ops::ReduceGradOp, ops::ReduceSumGradNoNeedBufferVarInferer); REGISTER_OP_CPU_KERNEL( - reduce_sum, ops::ReduceKernel, + ops::ReduceKernel, ops::ReduceKernel, ops::ReduceKernel, @@ -128,7 +130,8 @@ using CPUReduceSumGradKernel = ops::ReduceSumGradKernel; -REGISTER_OP_CPU_KERNEL(reduce_sum_grad, CPUReduceSumGradKernel, +REGISTER_OP_CPU_KERNEL(reduce_sum_grad, CPUReduceSumGradKernel, + CPUReduceSumGradKernel, CPUReduceSumGradKernel, CPUReduceSumGradKernel, CPUReduceSumGradKernel, diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cu b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cu index 219cc231a1ea7a0786026d6dcc6d63ce78e24025..dbd020514b2088a336184c4f1ca4f367dd3a14a3 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cu +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cu @@ -70,7 +70,8 @@ class ReduceSumKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_CUDA_KERNEL(reduce_sum, ops::ReduceSumKernel, +REGISTER_OP_CUDA_KERNEL(reduce_sum, ops::ReduceSumKernel, + ops::ReduceSumKernel, ops::ReduceSumKernel, ops::ReduceSumKernel, ops::ReduceSumKernel, ops::ReduceSumKernel, diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.part.cu b/paddle/fluid/operators/reduce_ops/reduce_sum_op.part.cu index f2bee6dddc39ec965966e4964c954e5fb1441bf5..67de8bb9a0c1ab4ae917b7e267fc2748087d900e 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.part.cu +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.part.cu @@ -20,7 +20,8 @@ using CUDAReduceSumGradKernel = ops::ReduceGradKernel; -REGISTER_OP_CUDA_KERNEL(reduce_sum_grad, CUDAReduceSumGradKernel, +REGISTER_OP_CUDA_KERNEL(reduce_sum_grad, CUDAReduceSumGradKernel, + CUDAReduceSumGradKernel, CUDAReduceSumGradKernel, CUDAReduceSumGradKernel, CUDAReduceSumGradKernel, diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 97826f7d5f81d9da8df2c97833f1dcd84a11923a..67e6c7f8e44d740f179961c7e183efdced9ff805 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -80,7 +80,7 @@ def concat(x, axis=0, name=None): Args: x(list|tuple): ``x`` is a Tensor list or Tensor tuple which is with data type bool, float16, - float32, float64, int32, int64. All the Tensors in ``x`` must have same data type. + float32, float64, int32, int64, uint8. All the Tensors in ``x`` must have same data type. axis(int|Tensor, optional): Specify the axis to operate on the input Tensors. It's a scalar with data type int or a Tensor with shape [1] and data type int32 or int64. The effective range is [-R, R), where R is Rank(x). When ``axis < 0``,