提交 92890ac2 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into feature/op/fusion_lstm

......@@ -128,7 +128,8 @@ struct ExtractAttribute {
attr_value = &boost::get<T>(attr);
} catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type %s, its type is %s",
attr_name_, typeid(T).name(), attr.type().name());
attr_name_, paddle::platform::demangle(typeid(T).name()),
paddle::platform::demangle(attr.type().name()));
}
return attr_value;
}
......@@ -160,7 +161,7 @@ struct ExtractAttribute<bool> {
attr_value = &boost::get<bool>(attr);
} catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type bool, its type is %s",
attr_name_, attr.type().name());
attr_name_, paddle::platform::demangle(attr.type().name()));
}
return attr_value;
}
......@@ -186,7 +187,7 @@ struct ExtractAttribute<int64_t> {
attr_value = &boost::get<int64_t>(attr);
} catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type int64_t, its type is %s",
attr_name_, attr.type().name());
attr_name_, paddle::platform::demangle(attr.type().name()));
}
return attr_value;
}
......
......@@ -14,6 +14,11 @@ limitations under the License. */
#include "paddle/fluid/operators/gru_op.h"
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/detail/gru_cpu_kernel.h"
#include "paddle/fluid/operators/math/detail/gru_kernel.h"
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace operators {
......@@ -211,6 +216,158 @@ class GRUGradOp : public framework::OperatorWithKernel {
}
};
template <typename T>
class GRUCPUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
using DeviceContext = paddle::platform::CPUDeviceContext;
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias;
add_bias(dev_ctx, *batch_gate, *bias, batch_gate);
}
int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Tensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<DeviceContext, T>(
context.template device_context<DeviceContext>(), *h0, order,
&ordered_h0, true);
gru_value.prev_out_value = ordered_h0.data<T>();
} else {
gru_value.prev_out_value = nullptr;
}
auto batch_starts = batch_gate->lod()[0];
size_t seq_len = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
#ifdef PADDLE_WITH_MKLML
// use MKL packed to speedup GEMM
if (FLAGS_paddle_num_threads >= 4) {
auto blas = math::GetBlas<DeviceContext, T>(dev_ctx);
T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix, 1 /*height of C*/,
frame_size * 2 /*width of weight*/,
frame_size /*height of height*/);
PADDLE_ENFORCE(packed_gate);
blas.GEMM_PACK(CblasBMatrix, CblasNoTrans, 1 /*cur bs?*/, frame_size * 2,
frame_size, T(1.0), gru_value.gate_weight, frame_size * 2,
packed_gate);
T* packed_state = blas.GEMM_ALLOC(CblasBMatrix, 1 /*height of C*/,
frame_size /*width of weight*/,
frame_size /*height of height*/);
PADDLE_ENFORCE(packed_state);
blas.GEMM_PACK(CblasBMatrix, CblasNoTrans, 1 /*cur bs?*/, frame_size,
frame_size, T(1.0), gru_value.state_weight, frame_size,
packed_state);
for (size_t n = 0; n < seq_len; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t =
batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
if (gru_value.prev_out_value) {
blas.GEMM_COMPUTE(
CblasNoTrans, CblasPacked, cur_batch_size, frame_size * 2,
frame_size, gru_value.prev_out_value, frame_size, packed_gate,
frame_size * 2, T(1), gru_value.gate_value, frame_size * 3);
}
math::detail::forward_reset_output(
math::detail::forward::gru_resetOutput<T>(), gru_value, frame_size,
cur_batch_size, active_gate);
if (gru_value.prev_out_value) {
blas.GEMM_COMPUTE(
CblasNoTrans, CblasPacked, cur_batch_size, frame_size, frame_size,
gru_value.reset_output_value, frame_size, packed_state,
frame_size, T(1), gru_value.gate_value + frame_size * 2,
frame_size * 3);
}
math::detail::forward_final_output(
math::detail::forward::gru_finalOutput<T>(), gru_value, frame_size,
cur_batch_size, active_node);
gru_value.prev_out_value = gru_value.output_value;
}
blas.GEMM_FREE(packed_gate);
blas.GEMM_FREE(packed_state);
} else {
#endif
for (size_t n = 0; n < seq_len; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t =
batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate);
gru_value.prev_out_value = gru_value.output_value;
}
#ifdef PADDLE_WITH_MKLML
}
#endif
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
} // namespace operators
} // namespace paddle
......@@ -218,9 +375,8 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(gru, ops::GRUOp, ops::GRUOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(gru_grad, ops::GRUGradOp);
REGISTER_OP_CPU_KERNEL(
gru, ops::GRUKernel<paddle::platform::CPUDeviceContext, float>,
ops::GRUKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(gru, ops::GRUCPUKernel<float>,
ops::GRUCPUKernel<double>);
REGISTER_OP_CPU_KERNEL(
gru_grad, ops::GRUGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GRUGradKernel<paddle::platform::CPUDeviceContext, double>);
......@@ -14,6 +14,96 @@ limitations under the License. */
#include "paddle/fluid/operators/gru_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class GRUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias;
add_bias(dev_ctx, *batch_gate, *bias, batch_gate);
}
int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Tensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<DeviceContext, T>(
context.template device_context<DeviceContext>(), *h0, order,
&ordered_h0, true);
gru_value.prev_out_value = ordered_h0.data<T>();
} else {
gru_value.prev_out_value = nullptr;
}
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate);
gru_value.prev_out_value = gru_value.output_value;
}
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
gru, ops::GRUKernel<paddle::platform::CUDADeviceContext, float>,
......
......@@ -37,90 +37,6 @@ inline void ReorderInitState(const DeviceContext& ctx,
row_shuffle(ctx, src, index_lod, dst, indexed_src);
}
template <typename DeviceContext, typename T>
class GRUKernel : public framework::OpKernel<T> {
public:
void BatchCompute(const framework::ExecutionContext& context) const {
auto* input = context.Input<LoDTensor>("Input");
auto* h0 = context.Input<Tensor>("H0");
auto* weight = context.Input<Tensor>("Weight");
const T* weight_data = weight->data<T>();
auto* bias = context.Input<Tensor>("Bias");
auto* batch_gate = context.Output<LoDTensor>("BatchGate");
batch_gate->mutable_data<T>(context.GetPlace());
auto* batch_reset_hidden_prev =
context.Output<LoDTensor>("BatchResetHiddenPrev");
batch_reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* batch_hidden = context.Output<LoDTensor>("BatchHidden");
batch_hidden->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<LoDTensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
auto hidden_dims = hidden->dims();
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias;
add_bias(dev_ctx, *batch_gate, *bias, batch_gate);
}
int frame_size = hidden_dims[1];
math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Tensor ordered_h0;
framework::Vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<DeviceContext, T>(
context.template device_context<DeviceContext>(), *h0, order,
&ordered_h0, true);
gru_value.prev_out_value = ordered_h0.data<T>();
} else {
gru_value.prev_out_value = nullptr;
}
auto batch_starts = batch_gate->lod()[0];
size_t num_batch = batch_starts.size() - 1;
auto active_node = math::detail::GetActivationType(
context.Attr<std::string>("activation"));
auto active_gate = math::detail::GetActivationType(
context.Attr<std::string>("gate_activation"));
for (size_t n = 0; n < num_batch; n++) {
int bstart = static_cast<int>(batch_starts[n]);
int bend = static_cast<int>(batch_starts[n + 1]);
int cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice(bstart, bend);
Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend);
Tensor hidden_t = batch_hidden->Slice(bstart, bend);
gru_value.output_value = hidden_t.data<T>();
gru_value.gate_value = gate_t.data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.data<T>();
math::GRUUnitFunctor<DeviceContext, T>::compute(
dev_ctx, gru_value, frame_size, cur_batch_size, active_node,
active_gate);
gru_value.prev_out_value = gru_value.output_value;
}
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
BatchCompute(context);
}
};
template <typename DeviceContext, typename T>
class GRUGradKernel : public framework::OpKernel<T> {
public:
......
......@@ -90,6 +90,25 @@ class Blas {
void GEMM(bool transA, bool transB, int M, int N, int K, T alpha, const T* A,
int lda, const T* B, int ldb, T beta, T* C, int ldc) const;
#ifdef PADDLE_WITH_MKLML
template <typename T>
T* GEMM_ALLOC(const CBLAS_IDENTIFIER id, const int M, const int N,
const int K) const;
template <typename T>
void GEMM_PACK(const CBLAS_IDENTIFIER id, const CBLAS_TRANSPOSE trans, int M,
int N, int K, const T alpha, const T* src, const int ld,
T* dst) const;
template <typename T>
void GEMM_COMPUTE(int transA, int transB, int M, int N, int K, const T* A,
const int lda, const T* B, const int ldb, T beta, T* C,
const int ldc) const;
template <typename T>
void GEMM_FREE(T* data) const;
#endif
template <typename T>
void MatMul(const framework::Tensor& mat_a, bool trans_a,
const framework::Tensor& mat_b, bool trans_b, T alpha,
......@@ -146,6 +165,28 @@ class BlasT : private Blas<DeviceContext> {
Base()->template GEMM<T>(args...);
}
#ifdef PADDLE_WITH_MKLML
template <typename... ARGS>
T* GEMM_ALLOC(ARGS... args) const {
return Base()->template GEMM_ALLOC<T>(args...);
}
template <typename... ARGS>
void GEMM_PACK(ARGS... args) const {
Base()->template GEMM_PACK<T>(args...);
}
template <typename... ARGS>
void GEMM_COMPUTE(ARGS... args) const {
Base()->template GEMM_COMPUTE<T>(args...);
}
template <typename... ARGS>
void GEMM_FREE(ARGS... args) const {
Base()->template GEMM_FREE<T>(args...);
}
#endif
template <typename... ARGS>
void MatMul(ARGS... args) const {
Base()->template MatMul<T>(args...);
......
......@@ -31,6 +31,26 @@ struct CBlas<float> {
platform::dynload::cblas_sgemm(args...);
}
template <typename... ARGS>
static float *GEMM_ALLOC(ARGS... args) {
return platform::dynload::cblas_sgemm_alloc(args...);
}
template <typename... ARGS>
static void GEMM_PACK(ARGS... args) {
platform::dynload::cblas_sgemm_pack(args...);
}
template <typename... ARGS>
static void GEMM_COMPUTE(ARGS... args) {
platform::dynload::cblas_sgemm_compute(args...);
}
template <typename... ARGS>
static void GEMM_FREE(ARGS... args) {
platform::dynload::cblas_sgemm_free(args...);
}
#ifdef PADDLE_WITH_LIBXSMM
template <typename... ARGS>
static void SMM_GEMM(ARGS... args) {
......@@ -71,6 +91,26 @@ struct CBlas<double> {
platform::dynload::cblas_dgemm(args...);
}
template <typename... ARGS>
static double *GEMM_ALLOC(ARGS... args) {
return platform::dynload::cblas_dgemm_alloc(args...);
}
template <typename... ARGS>
static void GEMM_PACK(ARGS... args) {
platform::dynload::cblas_dgemm_pack(args...);
}
template <typename... ARGS>
static void GEMM_COMPUTE(ARGS... args) {
platform::dynload::cblas_dgemm_compute(args...);
}
template <typename... ARGS>
static void GEMM_FREE(ARGS... args) {
platform::dynload::cblas_dgemm_free(args...);
}
#ifdef PADDLE_WITH_LIBXSMM
template <typename... ARGS>
static void SMM_GEMM(ARGS... args) {
......@@ -224,6 +264,41 @@ inline void GEMM_WARP(CBLAS_ORDER order, CBLAS_TRANSPOSE transA,
beta, C, ldc);
}
#ifdef PADDLE_WITH_MKLML
template <>
template <typename T>
T *Blas<platform::CPUDeviceContext>::GEMM_ALLOC(const CBLAS_IDENTIFIER id,
const int M, const int N,
const int K) const {
return CBlas<T>::GEMM_ALLOC(id, M, N, K);
}
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM_PACK(const CBLAS_IDENTIFIER id,
const CBLAS_TRANSPOSE trans,
int M, int N, int K,
const T alpha, const T *src,
const int ld, T *dst) const {
CBlas<T>::GEMM_PACK(CblasRowMajor, id, trans, M, N, K, alpha, src, ld, dst);
}
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM_COMPUTE(
int transA, int transB, int M, int N, int K, const T *A, const int lda,
const T *B, const int ldb, T beta, T *C, const int ldc) const {
CBlas<T>::GEMM_COMPUTE(CblasRowMajor, transA, transB, M, N, K, A, lda, B, ldb,
beta, C, ldc);
}
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM_FREE(T *data) const {
CBlas<T>::GEMM_FREE(data);
}
#endif
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::GEMM(CBLAS_TRANSPOSE transA,
......
......@@ -60,6 +60,14 @@ extern void* mklml_dso_handle;
__macro(cblas_dgemm_batch); \
__macro(vsAdd); \
__macro(vdAdd); \
__macro(cblas_sgemm_alloc); \
__macro(cblas_sgemm_pack); \
__macro(cblas_sgemm_compute); \
__macro(cblas_sgemm_free); \
__macro(cblas_dgemm_alloc); \
__macro(cblas_dgemm_pack); \
__macro(cblas_dgemm_compute); \
__macro(cblas_dgemm_free); \
__macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
......
......@@ -263,7 +263,8 @@ inline void throw_on_error(T e) {
* PADDLE_ENFORCE_EQ(a, b);
*
* will raise an expression described as follows:
* "enforce a == b failed, 1 != 2" with detailed stack information.
* "Enforce failed. Expected input a == b, but received a(1) != b(2)."
* with detailed stack information.
*
* extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
......@@ -292,9 +293,10 @@ inline void throw_on_error(T e) {
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (UNLIKELY(!((__VAL0)__CMP(__VAL1)))) { \
PADDLE_THROW("enforce %s " #__CMP " %s failed, %s " #__INV_CMP \
" %s\n%s", \
#__VAL0, #__VAL1, paddle::string::to_string(__VAL0), \
PADDLE_THROW("Enforce failed. Expected %s " #__CMP \
" %s, but received %s:%s " #__INV_CMP " %s:%s.\n%s", \
#__VAL0, #__VAL1, #__VAL0, \
paddle::string::to_string(__VAL0), #__VAL1, \
paddle::string::to_string(__VAL1), \
paddle::string::Sprintf("" __VA_ARGS__)); \
} \
......
......@@ -54,7 +54,9 @@ TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) {
PADDLE_ENFORCE_EQ(a, 1 + 3);
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
HasPrefix(StringPiece(error.what()), "enforce a == 1 + 3 failed, 2 != 4");
HasPrefix(
StringPiece(error.what()),
"Enforce failed. Expected a == 1 + 3, but received a:2 != 1 + 3:4.");
}
EXPECT_TRUE(caught_exception);
}
......@@ -67,7 +69,8 @@ TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) {
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
HasPrefix(StringPiece(error.what()),
"enforce a == 1 + 3 failed, 2 != 4\ntheir size not match");
"Enforce failed. Expected a == 1 + 3, but received a:2 != 1 + "
"3:4.\ntheir size not match");
}
EXPECT_TRUE(caught_exception);
}
......@@ -84,8 +87,9 @@ TEST(ENFORCE_NE, FAIL) {
PADDLE_ENFORCE_NE(1.0, 1UL);
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
"enforce 1.0 != 1UL failed, 1 == 1"))
EXPECT_TRUE(HasPrefix(
StringPiece(error.what()),
"Enforce failed. Expected 1.0 != 1UL, but received 1.0:1 == 1UL:1."))
<< error.what() << " does not have expected prefix";
}
EXPECT_TRUE(caught_exception);
......@@ -98,8 +102,9 @@ TEST(ENFORCE_GT, FAIL) {
PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_TRUE(
HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2"));
EXPECT_TRUE(HasPrefix(
StringPiece(error.what()),
"Enforce failed. Expected 1 > 2UL, but received 1:1 <= 2UL:2."));
}
EXPECT_TRUE(caught_exception);
}
......@@ -116,8 +121,9 @@ TEST(ENFORCE_GE, FAIL) {
PADDLE_ENFORCE_GE(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_TRUE(
HasPrefix(StringPiece(error.what()), "enforce 1 >= 2UL failed, 1 < 2"));
EXPECT_TRUE(HasPrefix(
StringPiece(error.what()),
"Enforce failed. Expected 1 >= 2UL, but received 1:1 < 2UL:2."));
}
EXPECT_TRUE(caught_exception);
}
......@@ -135,8 +141,9 @@ TEST(ENFORCE_LE, FAIL) {
PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_TRUE(
HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2"));
EXPECT_TRUE(HasPrefix(
StringPiece(error.what()),
"Enforce failed. Expected 1 > 2UL, but received 1:1 <= 2UL:2."));
}
EXPECT_TRUE(caught_exception);
}
......@@ -153,7 +160,8 @@ TEST(ENFORCE_LT, FAIL) {
} catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
"enforce 1UL < 0.12 failed, 1 >= 0.12"));
"Enforce failed. Expected 1UL < 0.12, but "
"received 1UL:1 >= 0.12:0.12."));
}
EXPECT_TRUE(caught_exception);
}
......
......@@ -116,7 +116,8 @@ size_t GpuMaxChunkSize() {
size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use *
(total - reserving));
PADDLE_ENFORCE_LE(allocating, available);
PADDLE_ENFORCE_LE(allocating, available,
"Insufficient GPU memory to allocation.");
return allocating;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册