未验证 提交 7b828f71 编写于 作者: T taixiurong 提交者: GitHub

remove old XDNN implementation test=kunlun (#42404)

上级 a1abb7c9
...@@ -9,7 +9,7 @@ SET(XPU_RT_LIB_NAME "libxpurt.so") ...@@ -9,7 +9,7 @@ SET(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL) if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220425") SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220510")
else() else()
SET(XPU_BASE_URL "${XPU_BASE_URL}") SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif() endif()
...@@ -17,7 +17,7 @@ endif() ...@@ -17,7 +17,7 @@ endif()
# ubuntu and centos: use output by XDNN API team # ubuntu and centos: use output by XDNN API team
if(NOT DEFINED XPU_XDNN_BASE_URL) if(NOT DEFINED XPU_XDNN_BASE_URL)
SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev") SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220425") SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220510")
else() else()
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif() endif()
......
...@@ -18,6 +18,10 @@ limitations under the License. */ ...@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/platform/transform.h" #include "paddle/fluid/platform/transform.h"
#if defined(PADDLE_WITH_XPU)
#include "paddle/fluid/platform/device/device_wrapper.h"
#endif
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -28,6 +32,49 @@ struct CastDataTypeFunctor { ...@@ -28,6 +32,49 @@ struct CastDataTypeFunctor {
} }
}; };
#if defined(PADDLE_WITH_XPU)
template <typename InType, typename OutType>
static void XPUCastData(const framework::Tensor& in, framework::Tensor* out,
const platform::XPUDeviceContext* dev_ctx) {
using XPUInTDType = typename XPUTypeTrait<InType>::Type;
using XPUOutTDType = typename XPUTypeTrait<OutType>::Type;
int r = xpu::cast_v2<XPUInTDType, XPUOutTDType>(
dev_ctx->x_context(),
reinterpret_cast<const XPUInTDType*>(in.data<InType>()),
reinterpret_cast<XPUOutTDType*>(out->mutable_data<OutType>(in.place())),
in.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
dev_ctx->Wait();
}
template <typename InType>
static void XPUTransDataType(
const framework::Tensor& in, framework::Tensor* out,
const paddle::framework::proto::VarType::Type& dst_type,
const platform::DeviceContext* ctx) {
auto* context = static_cast<const platform::XPUDeviceContext*>(ctx);
#define XPUCastCallback(cpp_type, proto_type) \
do { \
if (dst_type == proto_type) { \
XPUCastData<InType, cpp_type>(in, out, context); \
} \
} while (0)
if (dst_type == proto::VarType::FP32 && dst_type == proto::VarType::FP16 &&
dst_type == proto::VarType::BOOL && dst_type == proto::VarType::INT16 &&
dst_type == proto::VarType::INT32 && dst_type == proto::VarType::INT64) {
_ForEachDataType_(XPUCastCallback);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Data type (%s) is not supported in XPU when casting data type.",
DataTypeToString(dst_type)));
}
}
#endif
template <typename InType> template <typename InType>
struct CastDataType { struct CastDataType {
CastDataType(const framework::Tensor& in, framework::Tensor* out, CastDataType(const framework::Tensor& in, framework::Tensor* out,
...@@ -88,6 +135,34 @@ void TransDataType(const Tensor& in, ...@@ -88,6 +135,34 @@ void TransDataType(const Tensor& in,
auto dst_type = type; auto dst_type = type;
auto ctx = pool.Get(in.place()); auto ctx = pool.Get(in.place());
#if defined(PADDLE_WITH_XPU)
switch (src_type) {
case proto::VarType::FP16:
XPUTransDataType<platform::float16>(in, out, dst_type, ctx);
break;
case proto::VarType::FP32:
XPUTransDataType<float>(in, out, dst_type, ctx);
break;
case proto::VarType::BOOL:
XPUTransDataType<bool>(in, out, dst_type, ctx);
break;
case proto::VarType::INT16:
XPUTransDataType<int16_t>(in, out, dst_type, ctx);
break;
case proto::VarType::INT32:
XPUTransDataType<int>(in, out, dst_type, ctx);
break;
case proto::VarType::INT64:
XPUTransDataType<int64_t>(in, out, dst_type, ctx);
break;
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Data type (%s) is not supported in XPU when casting data type.",
DataTypeToString(src_type)));
}
#else
switch (src_type) { switch (src_type) {
case proto::VarType::FP16: case proto::VarType::FP16:
framework::VisitDataType(dst_type, framework::VisitDataType(dst_type,
...@@ -123,6 +198,7 @@ void TransDataType(const Tensor& in, ...@@ -123,6 +198,7 @@ void TransDataType(const Tensor& in,
"Data type (%s) is not supported when casting data type.", "Data type (%s) is not supported when casting data type.",
DataTypeToString(src_type))); DataTypeToString(src_type)));
} }
#endif
} }
void TransComplexToReal(const proto::VarType::Type& dst_type, void TransComplexToReal(const proto::VarType::Type& dst_type,
...@@ -131,7 +207,6 @@ void TransComplexToReal(const proto::VarType::Type& dst_type, ...@@ -131,7 +207,6 @@ void TransComplexToReal(const proto::VarType::Type& dst_type,
auto& pool = platform::DeviceContextPool::Instance(); auto& pool = platform::DeviceContextPool::Instance();
auto* ctx = pool.Get(in.place()); auto* ctx = pool.Get(in.place());
out->Resize(in.dims()); out->Resize(in.dims());
// complex -> real // complex -> real
switch (src_type) { switch (src_type) {
case proto::VarType::COMPLEX64: case proto::VarType::COMPLEX64:
......
...@@ -21,58 +21,67 @@ template <typename DeviceContext, typename T, typename AttrType = T> ...@@ -21,58 +21,67 @@ template <typename DeviceContext, typename T, typename AttrType = T>
class LogLossXPUKernel : public framework::OpKernel<T> { class LogLossXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* predict = ctx.Input<Tensor>("Predicted"); /*** TODO wait XDNN new interface
auto* labels = ctx.Input<Tensor>("Labels"); auto* predict = ctx.Input<Tensor>("Predicted");
auto* loss = ctx.Output<Tensor>("Loss"); auto* labels = ctx.Input<Tensor>("Labels");
auto epsilon = static_cast<T>(ctx.Attr<AttrType>("epsilon")); auto* loss = ctx.Output<Tensor>("Loss");
loss->mutable_data<T>(ctx.GetPlace()); auto epsilon = static_cast<T>(ctx.Attr<AttrType>("epsilon"));
int n = predict->numel(); loss->mutable_data<T>(ctx.GetPlace());
auto& dev_ctx = ctx.template device_context<DeviceContext>(); int n = predict->numel();
int r = auto& dev_ctx = ctx.template device_context<DeviceContext>();
xpu::log_loss_fwd(dev_ctx.x_context(), n, epsilon, predict->data<T>(), int r =
labels->data<T>(), loss->data<T>()); xpu::log_loss_fwd(dev_ctx.x_context(), n, epsilon,
PADDLE_ENFORCE_EQ( predict->data<T>(),
r, xpu::Error_t::SUCCESS, labels->data<T>(), loss->data<T>());
platform::errors::External( PADDLE_ENFORCE_EQ(
"XPU log_loss kernel return wrong value[%d], please check whether " r, xpu::Error_t::SUCCESS,
"Baidu Kunlun Card is properly installed.", platform::errors::External(
r)); "XPU log_loss kernel return wrong value[%d], please check
whether "
"Baidu Kunlun Card is properly installed.",
r));
***/
} }
}; };
template <typename DeviceContext, typename T, typename AttrType = T> template <typename DeviceContext, typename T, typename AttrType = T>
class LogLossGradXPUKernel : public framework::OpKernel<T> { class LogLossGradXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* predict = ctx.Input<Tensor>("Predicted"); /*** TODO wait XDNN new interface
auto* labels = ctx.Input<Tensor>("Labels");
auto* dloss = ctx.Input<Tensor>(framework::GradVarName("Loss")); auto* predict = ctx.Input<Tensor>("Predicted");
auto* dpred = ctx.Output<Tensor>(framework::GradVarName("Predicted")); auto* labels = ctx.Input<Tensor>("Labels");
if (!dpred) { auto* dloss = ctx.Input<Tensor>(framework::GradVarName("Loss"));
return; auto* dpred = ctx.Output<Tensor>(framework::GradVarName("Predicted"));
} if (!dpred) {
auto epsilon = static_cast<T>(ctx.Attr<AttrType>("epsilon")); return;
dpred->mutable_data<T>(ctx.GetPlace()); }
int n = predict->numel(); auto epsilon = static_cast<T>(ctx.Attr<AttrType>("epsilon"));
auto& dev_ctx = ctx.template device_context<DeviceContext>(); dpred->mutable_data<T>(ctx.GetPlace());
int r = xpu::log_loss_bwd(dev_ctx.x_context(), n, epsilon, int n = predict->numel();
predict->data<T>(), labels->data<T>(), auto& dev_ctx = ctx.template device_context<DeviceContext>();
dloss->data<T>(), dpred->data<T>()); int r = xpu::log_loss_bwd(dev_ctx.x_context(), n, epsilon,
PADDLE_ENFORCE_EQ( predict->data<T>(), labels->data<T>(),
r, xpu::Error_t::SUCCESS, dloss->data<T>(), dpred->data<T>());
platform::errors::External( PADDLE_ENFORCE_EQ(
"XPU log_loss kernel return wrong value[%d], please check whether " r, xpu::Error_t::SUCCESS,
"Baidu Kunlun Card is properly installed.", platform::errors::External(
r)); "XPU log_loss kernel return wrong value[%d], please check
whether "
"Baidu Kunlun Card is properly installed.",
r));
***/
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; // namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL( // REGISTER_OP_XPU_KERNEL(
log_loss, ops::LogLossXPUKernel<paddle::platform::XPUDeviceContext, float>); // log_loss, ops::LogLossXPUKernel<paddle::platform::XPUDeviceContext,
REGISTER_OP_XPU_KERNEL( // float>);
log_loss_grad, // REGISTER_OP_XPU_KERNEL(
ops::LogLossGradXPUKernel<paddle::platform::XPUDeviceContext, float>); // log_loss_grad,
// ops::LogLossGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif #endif
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h" #include "paddle/fluid/platform/device/device_wrapper.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -42,68 +42,26 @@ class AccuracyXPUKernel : public framework::OpKernel<T> { ...@@ -42,68 +42,26 @@ class AccuracyXPUKernel : public framework::OpKernel<T> {
if (num_samples == 0) { if (num_samples == 0) {
return; return;
} }
size_t indices_int32_size = num_samples * class_dim * sizeof(int);
size_t indices_int64_size = num_samples * class_dim * sizeof(int64_t);
size_t label_int32_size = num_samples * sizeof(int);
size_t label_int64_size = num_samples * sizeof(int64_t);
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& dev_ctx = ctx.template device_context<DeviceContext>();
int* indices_int32_device = NULL; xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
PADDLE_ENFORCE_EQ( int size = num_samples * class_dim;
xpu_malloc(reinterpret_cast<void**>(&indices_int32_device), int* indices_int32_ptr = RAII_GUARD.alloc_l3_or_gm<int>(size);
indices_int32_size), PADDLE_ENFORCE_XDNN_NOT_NULL(indices_int32_ptr);
XPU_SUCCESS, int* label_int32_ptr = RAII_GUARD.alloc_l3_or_gm<int>(size);
platform::errors::ResourceExhausted( PADDLE_ENFORCE_XDNN_NOT_NULL(label_int32_ptr);
"\n\nOut of memory error on XPU, Cannot allocate %s memory"
" on XPU. \n\nPlease check whether there is any other process "
"using XPU.\n",
string::HumanReadableSize(indices_int32_size)));
int* label_int32_device = NULL;
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&label_int32_device),
label_int32_size),
XPU_SUCCESS,
platform::errors::ResourceExhausted(
"\n\nOut of memory error on XPU, Cannot allocate %s memory"
" on XPU. \n\nPlease check whether there is any other process "
"using XPU.\n",
string::HumanReadableSize(label_int32_size)));
int* indices_int32_host = int r = xpu::cast_v2<int64_t, int32_t>(dev_ctx.x_context(), indices_data,
reinterpret_cast<int*>(std::malloc(indices_int32_size)); indices_int32_ptr, size);
int64_t* indices_int64_host = PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
reinterpret_cast<int64_t*>(std::malloc(indices_int64_size));
int* label_int32_host = r = xpu::cast_v2<int64_t, int32_t>(dev_ctx.x_context(), label_data,
reinterpret_cast<int*>(std::malloc(label_int32_size)); label_int32_ptr, size);
int64_t* label_int64_host = PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
reinterpret_cast<int64_t*>(std::malloc(label_int64_size));
dev_ctx.Wait(); r = xpu::accuracy(dev_ctx.x_context(), indices_int32_ptr, label_int32_ptr,
memory::Copy(platform::CPUPlace(), indices_int64_host, ctx.GetPlace(), num_samples, class_dim, correct_data, total_data,
indices_data, indices_int64_size); accuracy_data);
memory::Copy(platform::CPUPlace(), label_int64_host, ctx.GetPlace(), PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
label_data, label_int64_size);
for (size_t i = 0; i < num_samples; ++i) {
label_int32_host[i] = label_int64_host[i];
for (size_t j = 0; j < class_dim; ++j) {
indices_int32_host[i * class_dim + j] =
indices_int64_host[i * class_dim + j];
}
}
memory::Copy(ctx.GetPlace(), indices_int32_device, platform::CPUPlace(),
indices_int32_host, indices_int32_size);
memory::Copy(ctx.GetPlace(), label_int32_device, platform::CPUPlace(),
label_int32_host, label_int32_size);
int r = xpu::accuracy(dev_ctx.x_context(), indices_int32_device,
label_int32_device, num_samples, class_dim,
correct_data, total_data, accuracy_data);
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Fatal("XPU accuracy kernel error!"));
dev_ctx.Wait();
xpu_free(indices_int32_device);
xpu_free(label_int32_device);
std::free(indices_int32_host);
std::free(indices_int64_host);
std::free(label_int32_host);
std::free(label_int64_host);
} }
}; };
......
...@@ -25,101 +25,111 @@ template <typename DeviceContext, typename T> ...@@ -25,101 +25,111 @@ template <typename DeviceContext, typename T>
class LambOpXPUKernel : public framework::OpKernel<T> { class LambOpXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
using paddle::framework::LoDTensor; /*** TODO wait XDNN new interface
const auto* param_var = ctx.InputVar("Param"); using paddle::framework::LoDTensor;
PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true, const auto* param_var = ctx.InputVar("Param");
platform::errors::InvalidArgument( PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true,
"The Var(%s)'s type should be LoDTensor, " platform::errors::InvalidArgument(
"but the received is %s", "The Var(%s)'s type should be LoDTensor, "
ctx.InputNames("Param").front(), "but the received is %s",
framework::ToTypeName(param_var->Type()))); ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type())));
using paddle::framework::LoDTensor; using paddle::framework::LoDTensor;
// inputs // inputs
T epsilon = static_cast<T>(ctx.Attr<float>("epsilon")); T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
T weight_decay = static_cast<T>(ctx.Attr<float>("weight_decay")); T weight_decay = static_cast<T>(ctx.Attr<float>("weight_decay"));
T beta1 = static_cast<T>(ctx.Attr<float>("beta1")); T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
T beta2 = static_cast<T>(ctx.Attr<float>("beta2")); T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
auto& param = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Param"), "Input", auto& param = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Param"), "Input",
"Param", "Lamb"); "Param", "Lamb");
auto* grad_var = ctx.InputVar("Grad"); auto* grad_var = ctx.InputVar("Grad");
auto& mom1 = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment1"), "Input", auto& mom1 = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment1"), "Input",
"Moment1", "Lamb"); "Moment1", "Lamb");
auto& mom2 = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment2"), "Input", auto& mom2 = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment2"), "Input",
"Moment2", "Lamb"); "Moment2", "Lamb");
auto& lr = GET_DATA_SAFELY(ctx.Input<LoDTensor>("LearningRate"), "Input", auto& lr = GET_DATA_SAFELY(ctx.Input<LoDTensor>("LearningRate"),
"LearningRate", "Lamb"); "Input",
"LearningRate", "Lamb");
auto& beta1_pow = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Beta1Pow"), "Input", auto& beta1_pow = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Beta1Pow"),
"Beta1Pow", "Lamb"); "Input",
auto& beta2_pow = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Beta2Pow"), "Input", "Beta1Pow", "Lamb");
"Beta2Pow", "Lamb"); auto& beta2_pow = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Beta2Pow"),
"Input",
"Beta2Pow", "Lamb");
auto& param_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("ParamOut"), auto& param_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("ParamOut"),
"Output", "ParamOut", "Lamb"); "Output", "ParamOut", "Lamb");
auto& mom1_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Moment1Out"), auto& mom1_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Moment1Out"),
"Output", "Moment1Out", "Lamb"); "Output", "Moment1Out", "Lamb");
auto& mom2_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Moment2Out"), auto& mom2_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Moment2Out"),
"Output", "Moment2Out", "Lamb"); "Output", "Moment2Out", "Lamb");
auto& beta1_pow_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Beta1PowOut"), auto& beta1_pow_out =
"Output", "Beta1PowOut", "Lamb"); GET_DATA_SAFELY(ctx.Output<LoDTensor>("Beta1PowOut"),
auto& beta2_pow_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("Beta2PowOut"), "Output", "Beta1PowOut", "Lamb");
"Output", "Beta2PowOut", "Lamb"); auto& beta2_pow_out =
auto& dev_ctx = ctx.template device_context<DeviceContext>(); GET_DATA_SAFELY(ctx.Output<LoDTensor>("Beta2PowOut"),
"Output", "Beta2PowOut", "Lamb");
auto& dev_ctx = ctx.template device_context<DeviceContext>();
if (grad_var->IsType<framework::LoDTensor>()) { if (grad_var->IsType<framework::LoDTensor>()) {
auto& grad = *ctx.Input<LoDTensor>("Grad"); auto& grad = *ctx.Input<LoDTensor>("Grad");
int r = xpu::lamb(dev_ctx.x_context(), grad.template data<T>(), int r = xpu::lamb(dev_ctx.x_context(), grad.template data<T>(),
mom1.template data<T>(), mom2.template data<T>(), mom1.template data<T>(), mom2.template data<T>(),
param.template data<T>(), beta1_pow.template data<T>(), param.template data<T>(), beta1_pow.template
beta2_pow.template data<T>(), beta1, beta2, epsilon, data<T>(),
weight_decay, lr.template data<T>(), beta2_pow.template data<T>(), beta1, beta2, epsilon,
mom1_out.template mutable_data<T>(ctx.GetPlace()), weight_decay, lr.template data<T>(),
mom2_out.template mutable_data<T>(ctx.GetPlace()), mom1_out.template mutable_data<T>(ctx.GetPlace()),
param_out.template mutable_data<T>(ctx.GetPlace()), mom2_out.template mutable_data<T>(ctx.GetPlace()),
beta1_pow_out.template mutable_data<T>(ctx.GetPlace()), param_out.template mutable_data<T>(ctx.GetPlace()),
beta2_pow_out.template mutable_data<T>(ctx.GetPlace()), beta1_pow_out.template
param.numel()); mutable_data<T>(ctx.GetPlace()),
beta2_pow_out.template
mutable_data<T>(ctx.GetPlace()),
param.numel());
if (r == xpu::Error_t::INVALID_PARAM) { if (r == xpu::Error_t::INVALID_PARAM) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
r, xpu::Error_t::SUCCESS, r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"XPU kernel error of LambOp, error message: INVALID_PARAM, " "XPU kernel error of LambOp, error message: INVALID_PARAM, "
"please check your input & output.")); "please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) { } else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable( platform::errors::Unavailable(
"XPU kernel error of LambOp, error message: " "XPU kernel error of LambOp, error message: "
"RUNTIME_ERROR, please check whether Baidu " "RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed.")); "Kunlun Card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) { } else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted( platform::errors::ResourceExhausted(
"XPU kernel error of LambOp, error " "XPU kernel error of LambOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU " "message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory.")); "has no enough memory."));
} else { } else {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted( platform::errors::ResourceExhausted(
"XPU kernel error of LambOp, error " "XPU kernel error of LambOp, error "
"message: OTHER " "message: OTHER "
"XPU API returns error code: %d.", "XPU API returns error code: %d.",
r)); r));
} }
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"Variable type not supported by lamb_op. Expect LoDTensor, " "Variable type not supported by lamb_op. Expect LoDTensor, "
"but got %s", "but got %s",
framework::ToTypeName(param_var->Type()))); framework::ToTypeName(param_var->Type())));
} }
**/
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; // namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL( // REGISTER_OP_XPU_KERNEL(
lamb, ops::LambOpXPUKernel<paddle::platform::XPUDeviceContext, float>); // lamb, ops::LambOpXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif #endif
...@@ -40,113 +40,122 @@ template <typename DeviceContext, typename T> ...@@ -40,113 +40,122 @@ template <typename DeviceContext, typename T>
class RmspropOpXPUKernel : public framework::OpKernel<T> { class RmspropOpXPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
using paddle::framework::LoDTensor; /*** TODO wait XDNN new interface
using paddle::framework::LoDTensor;
// check Param & Grad tensor type
const auto* param_var = ctx.InputVar("Param"); // check Param & Grad tensor type
PADDLE_ENFORCE_EQ(param_var->IsType<LoDTensor>(), true, const auto* param_var = ctx.InputVar("Param");
platform::errors::InvalidArgument( PADDLE_ENFORCE_EQ(param_var->IsType<LoDTensor>(), true,
"Tensor holds the wrong type,Expected Var(%s)'s " platform::errors::InvalidArgument(
"type is LoDTensor, " "Tensor holds the wrong type,Expected Var(%s)'s "
"but the received is %s", "type is LoDTensor, "
ctx.InputNames("Param").front(), "but the received is %s",
framework::ToTypeName(param_var->Type()))); ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type())));
const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE_EQ(grad_var->IsType<LoDTensor>(), true, const auto* grad_var = ctx.InputVar("Grad");
platform::errors::InvalidArgument( PADDLE_ENFORCE_EQ(grad_var->IsType<LoDTensor>(), true,
"Tensor holds the wrong type,Expected Var(%s)'s " platform::errors::InvalidArgument(
"type is LoDTensor, " "Tensor holds the wrong type,Expected Var(%s)'s "
"but the received is %s", "type is LoDTensor, "
ctx.InputNames("Grad").front(), "but the received is %s",
framework::ToTypeName(grad_var->Type()))); ctx.InputNames("Grad").front(),
framework::ToTypeName(grad_var->Type())));
// inputs
auto& param = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Param"), "Input", // inputs
"Param", "Rmsprop"); auto& param = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Param"), "Input",
auto& meanSquare = GET_DATA_SAFELY(ctx.Input<LoDTensor>("MeanSquare"), "Param", "Rmsprop");
"Input", "MeanSquare", "Rmsprop"); auto& meanSquare = GET_DATA_SAFELY(ctx.Input<LoDTensor>("MeanSquare"),
auto& grad = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Grad"), "Input", "Grad", "Input", "MeanSquare", "Rmsprop");
"Rmsprop"); auto& grad = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Grad"), "Input",
auto& mom = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment"), "Input", "Grad",
"Moment", "Rmsprop"); "Rmsprop");
auto& mom = GET_DATA_SAFELY(ctx.Input<LoDTensor>("Moment"), "Input",
auto* learning_rate = ctx.Input<Tensor>("LearningRate"); "Moment", "Rmsprop");
PADDLE_ENFORCE_EQ(learning_rate->dims().size(), 1,
platform::errors::InvalidArgument( auto* learning_rate = ctx.Input<Tensor>("LearningRate");
"learining rate should have dimension = 1." PADDLE_ENFORCE_EQ(learning_rate->dims().size(), 1,
" But received learning rate dim [%s] ", platform::errors::InvalidArgument(
learning_rate->dims().size())); "learining rate should have dimension = 1."
T lr = static_cast<T>(GetAttrFromTensor(learning_rate)); " But received learning rate dim [%s] ",
learning_rate->dims().size()));
// constants T lr = static_cast<T>(GetAttrFromTensor(learning_rate));
T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
T decay = static_cast<T>(ctx.Attr<float>("decay")); // constants
T momentum = static_cast<T>(ctx.Attr<float>("momentum")); T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
T decay = static_cast<T>(ctx.Attr<float>("decay"));
// outputs T momentum = static_cast<T>(ctx.Attr<float>("momentum"));
auto& param_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("ParamOut"),
"Output", "ParamOut", "Rmsprop"); // outputs
auto& mom_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("MomentOut"), auto& param_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("ParamOut"),
"Output", "MomentOut", "Rmsprop"); "Output", "ParamOut", "Rmsprop");
auto& mom_sqrt_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("MeanSquareOut"), auto& mom_out = GET_DATA_SAFELY(ctx.Output<LoDTensor>("MomentOut"),
"Output", "MeanSquareOut", "Rmsprop"); "Output", "MomentOut", "Rmsprop");
auto& dev_ctx = ctx.template device_context<DeviceContext>(); auto& mom_sqrt_out =
GET_DATA_SAFELY(ctx.Output<LoDTensor>("MeanSquareOut"),
///// rmsprop优化算法 "Output", "MeanSquareOut",
/// "Rmsprop");
/// ms_out[i] = rho * ms[i] + (1 - rho) * (g[i] * g[i]); auto& dev_ctx = ctx.template device_context<DeviceContext>();
///
/// mom_out[i] = momentum * mom[i] + lr * ///// rmsprop优化算法
/// (g[i] / ((float)sqrt(ms_out[i] + epsilon))); ///
/// /// ms_out[i] = rho * ms[i] + (1 - rho) * (g[i] * g[i]);
/// p_out[i] = p[i] - mom_out[i]; ///
/// DLL_EXPORT int rmsprop(Context* ctx, const float* p, /// mom_out[i] = momentum * mom[i] + lr *
/// const float* ms, const float* g, const float* mom, /// (g[i] / ((float)sqrt(ms_out[i] + epsilon)));
/// float epsilon, float rho, float momentum, float lr, ///
/// float *ms_out, float *mom_out, float *p_out, int n) /// p_out[i] = p[i] - mom_out[i];
int r = xpu::rmsprop(dev_ctx.x_context(), param.template data<T>(), /// DLL_EXPORT int rmsprop(Context* ctx, const float* p,
meanSquare.template data<T>(), grad.template data<T>(), /// const float* ms, const float* g, const float* mom,
mom.template data<T>(), epsilon, decay, momentum, lr, /// float epsilon, float rho, float momentum, float lr,
mom_sqrt_out.template mutable_data<T>(ctx.GetPlace()), /// float *ms_out, float *mom_out, float *p_out, int n)
mom_out.template mutable_data<T>(ctx.GetPlace()), int r = xpu::rmsprop(dev_ctx.x_context(), param.template data<T>(),
param_out.template mutable_data<T>(ctx.GetPlace()), meanSquare.template data<T>(), grad.template
param.numel()); data<T>(),
mom.template data<T>(), epsilon, decay, momentum,
if (r == xpu::Error_t::INVALID_PARAM) { lr,
PADDLE_ENFORCE_EQ( mom_sqrt_out.template
r, xpu::Error_t::SUCCESS, mutable_data<T>(ctx.GetPlace()),
platform::errors::InvalidArgument( mom_out.template mutable_data<T>(ctx.GetPlace()),
"XPU kernel error of RmspropOp, error message: INVALID_PARAM, " param_out.template mutable_data<T>(ctx.GetPlace()),
"please check your input & output.")); param.numel());
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, if (r == xpu::Error_t::INVALID_PARAM) {
platform::errors::Unavailable( PADDLE_ENFORCE_EQ(
"XPU kernel error of RmspropOp, error message: " r, xpu::Error_t::SUCCESS,
"RUNTIME_ERROR, please check whether Baidu " platform::errors::InvalidArgument(
"Kunlun Card is properly installed.")); "XPU kernel error of RmspropOp, error message: INVALID_PARAM,
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) { "
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, "please check your input & output."));
platform::errors::ResourceExhausted( } else if (r == xpu::Error_t::RUNTIME_ERROR) {
"XPU kernel error of RmspropOp, error " PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
"message: NO_ENOUGH_WORKSPACE, XPU " platform::errors::Unavailable(
"has no enough memory.")); "XPU kernel error of RmspropOp, error message: "
} else { "RUNTIME_ERROR, please check whether Baidu "
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, "Kunlun Card is properly installed."));
platform::errors::ResourceExhausted( } else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
"XPU kernel error of RmspropOp, error " PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
"message: OTHER " platform::errors::ResourceExhausted(
"XPU API returns error code: %d.", "XPU kernel error of RmspropOp, error "
r)); "message: NO_ENOUGH_WORKSPACE, XPU "
} "has no enough memory."));
} else {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of RmspropOp, error "
"message: OTHER "
"XPU API returns error code: %d.",
r));
}
***/
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; // namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL( // REGISTER_OP_XPU_KERNEL(
rmsprop, // rmsprop,
ops::RmspropOpXPUKernel<paddle::platform::XPUDeviceContext, float>); // ops::RmspropOpXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif #endif
...@@ -14,11 +14,15 @@ limitations under the License. */ ...@@ -14,11 +14,15 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/optimizers/sgd_op.h" #include "paddle/fluid/operators/optimizers/sgd_op.h"
#include <string> #include <string>
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SGDOpXPUKernel : public framework::OpKernel<T> { class SGDOpXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
const auto *learning_rate = ctx.Input<framework::Tensor>("LearningRate"); const auto *learning_rate = ctx.Input<framework::Tensor>("LearningRate");
...@@ -48,40 +52,31 @@ class SGDOpXPUKernel : public framework::OpKernel<T> { ...@@ -48,40 +52,31 @@ class SGDOpXPUKernel : public framework::OpKernel<T> {
"numel = [%s], ParamOut's numel = [%s]", "numel = [%s], ParamOut's numel = [%s]",
grad->numel(), sz)); grad->numel(), sz));
const T *lr = learning_rate->data<T>(); const T *lr_t = learning_rate->data<T>();
auto &dev_ctx = ctx.template device_context<DeviceContext>();
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
const float *lr = nullptr;
if (std::is_same<T, paddle::platform::float16>::value) {
float *lr_float =
RAII_GUARD.alloc_l3_or_gm<float>(learning_rate->numel());
int r = xpu::cast_v2<XPUType, float>(
dev_ctx.x_context(), reinterpret_cast<const XPUType *>(lr_t),
lr_float, learning_rate->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "clip_v2");
lr = lr_float;
} else {
lr = reinterpret_cast<const float *>(lr_t);
}
const T *param_data = param->data<T>(); const T *param_data = param->data<T>();
const T *grad_data = grad->data<T>(); const T *grad_data = grad->data<T>();
T *out_data = param_out->mutable_data<T>(ctx.GetPlace()); T *out_data = param_out->mutable_data<T>(ctx.GetPlace());
auto &dev_ctx = ctx.template device_context<DeviceContext>(); int r = xpu::sgd(dev_ctx.x_context(),
int r = xpu::sgd(dev_ctx.x_context(), sz, grad_data, param_data, lr, reinterpret_cast<const XPUType *>(grad_data),
out_data); reinterpret_cast<const XPUType *>(param_data), lr,
if (r == xpu::Error_t::INVALID_PARAM) { reinterpret_cast<XPUType *>(out_data), sz);
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_XDNN_SUCCESS(r, "sgd");
r, xpu::Error_t::SUCCESS,
platform::errors::InvalidArgument(
"XPU kernel error of SgdOp, error message: INVALID_PARAM, "
"please check your input & output."));
} else if (r == xpu::Error_t::RUNTIME_ERROR) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Unavailable(
"XPU kernel error of SgdOp, error message: "
"RUNTIME_ERROR, please check whether Baidu "
"Kunlun Card is properly installed."));
} else if (r == xpu::Error_t::NO_ENOUGH_WORKSPACE) {
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::ResourceExhausted(
"XPU kernel error of SgdOp, error "
"message: NO_ENOUGH_WORKSPACE, XPU "
"has no enough memory."));
}
} else {
PADDLE_ENFORCE_EQ(false, true,
platform::errors::PermissionDenied(
"Unsupported Variable Type of Param & Grad in "
"SgdOp-XPU. Excepted "
"LodTensor, But received [%s] and [%s]",
paddle::framework::ToTypeName(param_var->Type())));
} }
} }
}; };
...@@ -90,6 +85,8 @@ class SGDOpXPUKernel : public framework::OpKernel<T> { ...@@ -90,6 +85,8 @@ class SGDOpXPUKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_XPU_KERNEL( REGISTER_OP_XPU_KERNEL(
sgd, ops::SGDOpXPUKernel<paddle::platform::XPUDeviceContext, float>); sgd, ops::SGDOpXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::SGDOpXPUKernel<paddle::platform::XPUDeviceContext, plat::float16>);
#endif #endif
...@@ -145,7 +145,6 @@ XPUOpMap& get_kl1_ops() { ...@@ -145,7 +145,6 @@ XPUOpMap& get_kl1_ops() {
{"hard_switch", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"hard_switch", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"iou_similarity", {"iou_similarity",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"lamb", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"layer_norm_grad", {"layer_norm_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
...@@ -175,9 +174,6 @@ XPUOpMap& get_kl1_ops() { ...@@ -175,9 +174,6 @@ XPUOpMap& get_kl1_ops() {
pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace()), pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})}, pOpKernelType(vartype::FP32, XPUPlace())})},
{"log_loss_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"log_loss", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"logsumexp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"logsumexp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"lookup_table_v2_grad", {"lookup_table_v2_grad",
...@@ -236,7 +232,6 @@ XPUOpMap& get_kl1_ops() { ...@@ -236,7 +232,6 @@ XPUOpMap& get_kl1_ops() {
pOpKernelType(vartype::INT32, XPUPlace()), pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::BOOL, XPUPlace()), pOpKernelType(vartype::BOOL, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})}, pOpKernelType(vartype::FP32, XPUPlace())})},
{"rmsprop", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, {"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"roi_align_grad", {"roi_align_grad",
......
...@@ -328,6 +328,8 @@ XPUOpMap& get_kl2_ops() { ...@@ -328,6 +328,8 @@ XPUOpMap& get_kl2_ops() {
pOpKernelType(vartype::INT64, XPUPlace())})}, pOpKernelType(vartype::INT64, XPUPlace())})},
{"scatter", XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()), {"scatter", XPUKernelSet({pOpKernelType(vartype::INT64, XPUPlace()),
pOpKernelType(vartype::FP32, XPUPlace())})}, pOpKernelType(vartype::FP32, XPUPlace())})},
{"sgd", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
{"sigmoid_cross_entropy_with_logits_grad", {"sigmoid_cross_entropy_with_logits_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sigmoid_cross_entropy_with_logits", {"sigmoid_cross_entropy_with_logits",
......
...@@ -23,41 +23,52 @@ import paddle.fluid as fluid ...@@ -23,41 +23,52 @@ import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard from paddle.fluid import compiler, Program, program_guard
import paddle import paddle
from op_test_xpu import XPUOpTest
from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
paddle.enable_static() paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(), class XPUTestAccuracyOp(XPUOpTestWrapper):
"core is not compiled with XPU") def __init__(self):
class TestXPUAccuracyOp(OpTest): self.op_name = 'accuracy'
def setUp(self): self.use_dynamic_create_class = False
self.op_type = "accuracy"
self.init_dtype() class TestXPUAccuracyOp(XPUOpTest):
n = 8192 def setUp(self):
infer = np.random.random((n, 1)).astype(self.dtype) self.op_type = "accuracy"
indices = np.random.randint(0, 2, (n, 1)).astype('int64') self.init_dtype()
label = np.random.randint(0, 2, (n, 1)).astype('int64') n = 8192
self.inputs = {'Out': infer, 'Indices': indices, "Label": label} infer = np.random.random((n, 1)).astype(self.dtype)
num_correct = 0 indices = np.random.randint(0, 2, (n, 1)).astype('int64')
for rowid in range(n): label = np.random.randint(0, 2, (n, 1)).astype('int64')
for ele in indices[rowid]: self.inputs = {'Out': infer, 'Indices': indices, "Label": label}
if ele == label[rowid]: num_correct = 0
num_correct += 1 for rowid in range(n):
break for ele in indices[rowid]:
self.outputs = { if ele == label[rowid]:
'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype), num_correct += 1
'Correct': np.array([num_correct]).astype("int32"), break
'Total': np.array([n]).astype("int32") self.outputs = {
} 'Accuracy':
self.attrs = {'use_xpu': True} np.array([num_correct / float(n)]).astype(self.dtype),
'Correct': np.array([num_correct]).astype("int32"),
def init_dtype(self): 'Total': np.array([n]).astype("int32")
self.dtype = np.float32 }
self.attrs = {'use_xpu': True}
def test_check_output(self):
if paddle.is_compiled_with_xpu(): def init_dtype(self):
place = paddle.XPUPlace(0) self.dtype = self.in_type
self.check_output_with_place(place)
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
support_types = get_xpu_op_support_types('accuracy')
for stype in support_types:
create_test_class(globals(), XPUTestAccuracyOp, stype)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -25,30 +25,43 @@ import paddle.fluid as fluid ...@@ -25,30 +25,43 @@ import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
from op_test_xpu import XPUOpTest
from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
class TestSGDOp(OpTest):
def setUp(self):
self.op_type = "sgd"
self.conf()
w = np.random.random((self.h, self.w)).astype("float32")
g = np.random.random((self.h, self.w)).astype("float32")
lr = np.array([0.1]).astype("float32")
self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr} class XPUTestSgdOp(XPUOpTestWrapper):
self.outputs = {'ParamOut': w - lr * g} def __init__(self):
self.op_name = 'sgd'
self.use_dynamic_create_class = False
def conf(self): class TestSGDOp(XPUOpTest):
self.h = 102 def setUp(self):
self.w = 105 self.op_type = "sgd"
self.dtype = self.in_type
self.conf()
w = np.random.random((self.h, self.w)).astype(self.dtype)
g = np.random.random((self.h, self.w)).astype(self.dtype)
lr = np.array([0.1]).astype(self.dtype)
def test_check_output_with_place(self): self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
self.check_output_with_place(paddle.XPUPlace(0)) self.outputs = {'ParamOut': w - lr * g}
def conf(self):
self.h = 102
self.w = 105
class TestSGDOpCase8X(TestSGDOp): def test_check_output_with_place(self):
def conf(self): self.check_output_with_place(paddle.XPUPlace(0))
self.h = 10
self.w = 64 class TestSGDOpCase8X(TestSGDOp):
def conf(self):
self.h = 10
self.w = 64
support_types = get_xpu_op_support_types('sgd')
for stype in support_types:
create_test_class(globals(), XPUTestSgdOp, stype)
class TestSGDOpWithLargeInput(unittest.TestCase): class TestSGDOpWithLargeInput(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册