diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 3f6af507aed2fbf4dde49a472767c43bc0f8fd72..3199582ce29a5a5c7cb13f1c74c31a58afd8241d 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1797,6 +1797,15 @@ static std::pair GenerateForwardFunctionContents( generated_function_body += amp_context; generated_function_body += "\n"; } + + if (!forward_inplace_map.empty()) { + generated_function_body += + " auto current_level = egr::Controller::Instance().GetAMPLevel();\n"; + generated_function_body += + " " + "egr::Controller::Instance().SetAMPLevel(paddle::imperative::AmpLevel::" + "O0);\n"; + } // forward ins insert const char* FWD_INS_MAP_TEMPLATE = " std::map GenerateForwardFunctionContents( } trace_op_body_str += out_tensor_str; } + if (!forward_inplace_map.empty()) { + trace_op_body_str += + " egr::Controller::Instance().SetAMPLevel(current_level);\n"; + } trace_op_body_str += "\n"; VLOG(6) << "Converted Output VarBase to EagerVariable(s)"; /* ------ END Generate TraceOp ----- */ diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 151d1ad211424eeab7d0a08f923b523de74da26b..b384544bb6012933d43edd0c91192fbfd20367b6 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -47,7 +47,9 @@ typedef SSIZE_T ssize_t; #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#include "paddle/fluid/eager/amp_utils.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +#include "paddle/fluid/eager/eager_amp_auto_cast.h" #include "paddle/fluid/framework/python_headers.h" #include "paddle/fluid/memory/allocation/mmap_allocator.h" #include "paddle/fluid/pybind/tensor_py.h" @@ -1171,6 +1173,17 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self, // Release gil and do tracing py::gil_scoped_release release; // use inplace set_value_ operator + if (value_tensor.initialized() && + (self->tensor.dtype() != value_tensor.dtype())) { + paddle::small_vector, + egr::kSlotSmallVectorSize> + tmps = {{self->tensor}, {value_tensor}}; + auto amp_dtype = egr::GetAmpDestDtype("set_value", tmps); + self->tensor = egr::EagerAmpAutoCast( + self->tensor.name(), self->tensor, amp_dtype, "set_value"); + value_tensor = egr::EagerAmpAutoCast( + value_tensor.name(), value_tensor, amp_dtype, "set_value"); + } self->tensor = set_value__dygraph_function( self->tensor, value_tensor, {}, {}, {}, attrs); } diff --git a/paddle/phi/kernels/cpu/scale_kernel.cc b/paddle/phi/kernels/cpu/scale_kernel.cc index 421aae270ee591c13dd205dd6909a8eb7bc3cef5..358d89197edb2ee0e096ec5b270f9fe33e73502e 100644 --- a/paddle/phi/kernels/cpu/scale_kernel.cc +++ b/paddle/phi/kernels/cpu/scale_kernel.cc @@ -58,6 +58,7 @@ PD_REGISTER_KERNEL(scale, float, double, phi::dtype::bfloat16, + phi::dtype::float16, uint8_t, int8_t, int16_t, diff --git a/paddle/phi/kernels/cpu/set_value_grad_kernel.cc b/paddle/phi/kernels/cpu/set_value_grad_kernel.cc index 44df36bb9fd87320db8548815b68a431e46bbcac..882648e8c346a62f0f2b5ad3f63da29944c03cd8 100644 --- a/paddle/phi/kernels/cpu/set_value_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/set_value_grad_kernel.cc @@ -26,4 +26,5 @@ PD_REGISTER_KERNEL(set_value_grad, double, int, int64_t, - bool) {} + bool, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/cpu/set_value_kernel.cc b/paddle/phi/kernels/cpu/set_value_kernel.cc index dcf278cd94e65189cd891124c4aa3ab81fa4397d..be5affb4ccfbfcd53f3b0e0e85181e9131a341a8 100644 --- a/paddle/phi/kernels/cpu/set_value_kernel.cc +++ b/paddle/phi/kernels/cpu/set_value_kernel.cc @@ -26,7 +26,8 @@ PD_REGISTER_KERNEL(set_value, double, int, int64_t, - bool) {} + bool, + phi::dtype::float16) {} PD_REGISTER_KERNEL(set_value_with_tensor, CPU, ALL_LAYOUT, @@ -35,4 +36,5 @@ PD_REGISTER_KERNEL(set_value_with_tensor, double, int, int64_t, - bool) {} + bool, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/funcs/eigen/scale.cc b/paddle/phi/kernels/funcs/eigen/scale.cc index 341bf52f547fedfd957aba2a95643a8037b32830..7e2d463a9fab13ed3551cce89c67e5ca69d5ff5e 100644 --- a/paddle/phi/kernels/funcs/eigen/scale.cc +++ b/paddle/phi/kernels/funcs/eigen/scale.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" namespace phi { @@ -41,6 +42,7 @@ struct EigenScale { template struct EigenScale; template struct EigenScale; template struct EigenScale; +template struct EigenScale; template struct EigenScale; template struct EigenScale; template struct EigenScale; diff --git a/paddle/phi/kernels/gpu/set_value_grad_kernel.cu b/paddle/phi/kernels/gpu/set_value_grad_kernel.cu index 7eed96699e720870577c3d5246ce07c12c37335c..49a57b944187215a113ed256b48c6793425cdf9a 100644 --- a/paddle/phi/kernels/gpu/set_value_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/set_value_grad_kernel.cu @@ -26,4 +26,5 @@ PD_REGISTER_KERNEL(set_value_grad, double, int, int64_t, - bool) {} + bool, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/gpu/set_value_kernel.cu b/paddle/phi/kernels/gpu/set_value_kernel.cu index f788da010b6827d18ea455bad57d775da4049acf..0e6c5734852b787153ff583c961f05e275ec9839 100644 --- a/paddle/phi/kernels/gpu/set_value_kernel.cu +++ b/paddle/phi/kernels/gpu/set_value_kernel.cu @@ -26,7 +26,8 @@ PD_REGISTER_KERNEL(set_value, double, int, int64_t, - bool) {} + bool, + paddle::platform::float16) {} PD_REGISTER_KERNEL(set_value_with_tensor, GPU, ALL_LAYOUT, @@ -35,4 +36,5 @@ PD_REGISTER_KERNEL(set_value_with_tensor, double, int, int64_t, - bool) {} + bool, + paddle::platform::float16) {}