“88668513b1986e39f458d149be564b21d85191e8”上不存在“paddlespeech/t2s/training/extensions/visualizer.py”
提交 98ffde41 编写于 作者: S shippingwang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

...@@ -25,5 +25,6 @@ third_party/ ...@@ -25,5 +25,6 @@ third_party/
bazel-* bazel-*
third_party/ third_party/
build_*
# clion workspace. # clion workspace.
cmake-build-* cmake-build-*
...@@ -72,7 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON) ...@@ -72,7 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF) option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_FAST_MATH "Make use of fast math library" OFF) option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
# PY_VERSION # PY_VERSION
if(NOT PY_VERSION) if(NOT PY_VERSION)
......
...@@ -198,6 +198,9 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None ...@@ -198,6 +198,9 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include <typeindex> #include <typeindex>
#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
......
...@@ -50,6 +50,27 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -50,6 +50,27 @@ class CompileTimeInferShapeContext : public InferShapeContext {
const std::vector<std::string> &Outputs( const std::vector<std::string> &Outputs(
const std::string &name) const override; const std::string &name) const override;
void ShareDim(const std::string &in, const std::string &out, size_t i = 0,
size_t j = 0) override {
PADDLE_ENFORCE_LT(i, Inputs(in).size());
PADDLE_ENFORCE_LT(j, Outputs(out).size());
const std::string &input_n = Inputs(in)[i];
const std::string &output_n = Outputs(out)[j];
PADDLE_ENFORCE(input_n != framework::kEmptyVarName, "The %s[%d] is @EMPTY@",
in, i);
PADDLE_ENFORCE(output_n != framework::kEmptyVarName,
"The %s[%d] is @EMPTY@", out, j);
auto *in_var = block_.FindVarRecursive(input_n);
auto *out_var = block_.FindVarRecursive(output_n);
PADDLE_ENFORCE(in_var->GetType() == out_var->GetType(),
"The type of %s and %s is not the same.", input_n, output_n);
SetDim(output_n, GetDim(input_n));
}
void ShareLoD(const std::string &in, const std::string &out, size_t i = 0, void ShareLoD(const std::string &in, const std::string &out, size_t i = 0,
size_t j = 0) const override { size_t j = 0) const override {
PADDLE_ENFORCE_LT(i, Inputs(in).size()); PADDLE_ENFORCE_LT(i, Inputs(in).size());
......
...@@ -542,6 +542,36 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -542,6 +542,36 @@ class RuntimeInferShapeContext : public InferShapeContext {
return op_.Outputs(name); return op_.Outputs(name);
} }
void ShareDim(const std::string& in, const std::string& out, size_t i = 0,
size_t j = 0) override {
PADDLE_ENFORCE_LT(i, Inputs(in).size());
PADDLE_ENFORCE_LT(j, Outputs(out).size());
const std::string& input_n = Inputs(in)[i];
const std::string& output_n = Outputs(out)[j];
Variable* in_var = scope_.FindVar(input_n);
Variable* out_var = scope_.FindVar(output_n);
PADDLE_ENFORCE(in_var->Type() == out_var->Type(),
"The type of %s and %s is not the same.", output_n,
GetDim(input_n));
if (in_var->IsType<framework::SelectedRows>()) {
auto& in_sele_rows = in_var->Get<framework::SelectedRows>();
auto out_sele_rows = out_var->GetMutable<framework::SelectedRows>();
out_sele_rows->mutable_value()->Resize(in_sele_rows.value().dims());
out_sele_rows->set_rows(in_sele_rows.rows());
out_sele_rows->set_height(in_sele_rows.height());
} else if (in_var->IsType<framework::LoDTensor>()) {
auto& in_lod_tensor = in_var->Get<framework::LoDTensor>();
auto* out_lod_tensor = out_var->GetMutable<framework::LoDTensor>();
out_lod_tensor->Resize(in_lod_tensor.dims());
} else {
PADDLE_THROW(
"Currently, the input type of ShareDim only can be LoDTensor "
"or SelectedRows.");
}
}
void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, void ShareLoD(const std::string& in, const std::string& out, size_t i = 0,
size_t j = 0) const override { size_t j = 0) const override {
const std::vector<std::string>& inputs = Inputs(in); const std::vector<std::string>& inputs = Inputs(in);
......
...@@ -56,6 +56,9 @@ class InferShapeContext { ...@@ -56,6 +56,9 @@ class InferShapeContext {
virtual const std::vector<std::string> &Outputs( virtual const std::vector<std::string> &Outputs(
const std::string &name) const = 0; const std::string &name) const = 0;
virtual void ShareDim(const std::string &in, const std::string &out,
size_t i = 0, size_t j = 0) = 0;
virtual void ShareLoD(const std::string &in, const std::string &out, virtual void ShareLoD(const std::string &in, const std::string &out,
size_t i = 0, size_t j = 0) const = 0; size_t i = 0, size_t j = 0) const = 0;
......
...@@ -165,10 +165,12 @@ inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, ...@@ -165,10 +165,12 @@ inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
} }
template <typename Predicate> template <typename Predicate>
struct AnyVisitor : public boost::static_visitor<bool> { class AnyVisitor : public boost::static_visitor<bool> {
private:
const framework::Tensor& tensor_; const framework::Tensor& tensor_;
Predicate predicate_; Predicate predicate_;
public:
AnyVisitor(const framework::Tensor& tensor, Predicate predicate) AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {} : tensor_(tensor), predicate_(std::move(predicate)) {}
...@@ -206,6 +208,27 @@ struct AnyVisitor : public boost::static_visitor<bool> { ...@@ -206,6 +208,27 @@ struct AnyVisitor : public boost::static_visitor<bool> {
} }
}; };
template <typename Predicate>
class AnyOutVisitor : public boost::static_visitor<> {
private:
const framework::Tensor& tensor_;
mutable framework::Tensor* out_;
Predicate predicate_;
public:
AnyOutVisitor(const framework::Tensor& tensor, Predicate predicate,
framework::Tensor* out)
: tensor_(tensor), out_(out), predicate_(std::move(predicate)) {}
template <typename Place>
void operator()(const Place& place) const {
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
out_->Resize({1});
out_->mutable_data<bool>(place);
AnyImpl(predicate_, tensor_, *ctx, out_);
}
};
template <typename Predicate> template <typename Predicate>
inline bool Any(const framework::Tensor& tensor, Predicate predicate) { inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate); AnyVisitor<Predicate> visitor(tensor, predicate);
...@@ -213,6 +236,14 @@ inline bool Any(const framework::Tensor& tensor, Predicate predicate) { ...@@ -213,6 +236,14 @@ inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
return platform::VisitPlace(place, visitor); return platform::VisitPlace(place, visitor);
} }
template <typename Predicate>
inline void Any(const framework::Tensor& tensor, Predicate predicate,
framework::Tensor* out) {
AnyOutVisitor<Predicate> visitor(tensor, predicate, out);
auto place = tensor.place();
platform::VisitPlace(place, visitor);
}
struct ContainsNANPredicate { struct ContainsNANPredicate {
template <typename T> template <typename T>
auto operator()(const T& eigen_vec) const auto operator()(const T& eigen_vec) const
...@@ -227,6 +258,12 @@ bool TensorContainsNAN(const framework::Tensor& tensor) { ...@@ -227,6 +258,12 @@ bool TensorContainsNAN(const framework::Tensor& tensor) {
return Any(tensor, predicate); return Any(tensor, predicate);
} }
void TensorContainsNAN(const framework::Tensor& tensor,
framework::Tensor* out) {
ContainsNANPredicate predicate;
Any(tensor, predicate, out);
}
struct ContainsInfPredicate { struct ContainsInfPredicate {
template <typename T> template <typename T>
auto operator()(const T& eigen_vec) const auto operator()(const T& eigen_vec) const
...@@ -241,6 +278,71 @@ bool TensorContainsInf(const framework::Tensor& tensor) { ...@@ -241,6 +278,71 @@ bool TensorContainsInf(const framework::Tensor& tensor) {
return Any(tensor, predicate); return Any(tensor, predicate);
} }
void TensorContainsInf(const framework::Tensor& tensor,
framework::Tensor* out) {
ContainsInfPredicate predicate;
Any(tensor, predicate, out);
}
// NOTE(dzhwinter):
// Isfinite need a AllVisitor to loop through all the elements.
// We choose two cuda call instead of one allvisitor. The AllVisitor
// should be implemented if the performance hurts.
bool TensorIsfinite(const framework::Tensor& tensor) {
ContainsInfPredicate pred_inf;
ContainsNANPredicate pred_nan;
return !Any(tensor, pred_inf) && !Any(tensor, pred_nan);
}
#ifdef PADDLE_WITH_CUDA
template <typename T>
static inline void __global__ BothFalse(const T* cmp, T* out) {
out[0] = (!cmp[0]) && (!out[0]);
}
#endif
struct BothFalseVisitor : public boost::static_visitor<> {
const framework::Tensor& in_;
mutable framework::Tensor* out_;
BothFalseVisitor(const framework::Tensor& in, framework::Tensor* out)
: in_(in), out_(out) {}
template <typename Place>
void operator()(const Place& place) const {
VisitorImpl(place);
}
void VisitorImpl(const platform::CUDAPlace& gpu) const {
#ifdef PADDLE_WITH_CUDA
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu);
BothFalse<bool><<<1, 1, 0, ctx->stream()>>>(in_.data<bool>(),
out_->mutable_data<bool>(gpu));
#endif
}
void VisitorImpl(const platform::CPUPlace& cpu) const {
bool lhs = !in_.data<bool>()[0];
bool rhs = !out_->mutable_data<bool>(cpu)[0];
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
}
void VisitorImpl(
const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const {
bool lhs = !in_.data<bool>()[0];
bool rhs = !out_->mutable_data<bool>(cpu)[0];
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
}
};
void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) {
framework::Tensor tmp;
TensorContainsInf(tensor, &tmp);
TensorContainsNAN(tensor, out);
BothFalseVisitor visitor(tmp, out);
auto place = tensor.place();
platform::VisitPlace(place, visitor);
}
void TensorToStream(std::ostream& os, const Tensor& tensor, void TensorToStream(std::ostream& os, const Tensor& tensor,
const platform::DeviceContext& dev_ctx) { const platform::DeviceContext& dev_ctx) {
{ // the 1st field, uint32_t version { // the 1st field, uint32_t version
......
...@@ -57,8 +57,15 @@ void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx, ...@@ -57,8 +57,15 @@ void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx,
template <typename T> template <typename T>
void TesnorToVector(const Tensor& src, std::vector<T>* dst); void TesnorToVector(const Tensor& src, std::vector<T>* dst);
// copy the result bool to cpu
bool TensorContainsNAN(const framework::Tensor& tensor); bool TensorContainsNAN(const framework::Tensor& tensor);
bool TensorContainsInf(const framework::Tensor& tensor); bool TensorContainsInf(const framework::Tensor& tensor);
bool TensorIsfinite(const framework::Tensor& tensor);
// store the result bool in gpu tensor, async operation. Faster than above ones.
void TensorContainsNAN(const framework::Tensor& tensor, framework::Tensor* out);
void TensorContainsInf(const framework::Tensor& tensor, framework::Tensor* out);
void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out);
void TensorToStream(std::ostream& os, const Tensor& tensor, void TensorToStream(std::ostream& os, const Tensor& tensor,
const platform::DeviceContext& dev_ctx); const platform::DeviceContext& dev_ctx);
......
...@@ -36,7 +36,7 @@ TEST(TensorCopy, Tensor) { ...@@ -36,7 +36,7 @@ TEST(TensorCopy, Tensor) {
TensorCopy(src_tensor, *cpu_place, &dst_tensor); TensorCopy(src_tensor, *cpu_place, &dst_tensor);
const int* dst_ptr = dst_tensor.data<int>(); const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr); EXPECT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]); EXPECT_EQ(src_ptr[i], dst_ptr[i]);
} }
...@@ -47,7 +47,7 @@ TEST(TensorCopy, Tensor) { ...@@ -47,7 +47,7 @@ TEST(TensorCopy, Tensor) {
TensorCopy(slice_tensor, *cpu_place, &dst_tensor); TensorCopy(slice_tensor, *cpu_place, &dst_tensor);
const int* slice_ptr = slice_tensor.data<int>(); const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr); EXPECT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]); EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
} }
...@@ -77,7 +77,7 @@ TEST(TensorCopy, Tensor) { ...@@ -77,7 +77,7 @@ TEST(TensorCopy, Tensor) {
// Sync before Compare Tensors // Sync before Compare Tensors
gpu_ctx.Wait(); gpu_ctx.Wait();
const int* dst_ptr = dst_tensor.data<int>(); const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr); EXPECT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]); EXPECT_EQ(src_ptr[i], dst_ptr[i]);
} }
...@@ -94,7 +94,7 @@ TEST(TensorCopy, Tensor) { ...@@ -94,7 +94,7 @@ TEST(TensorCopy, Tensor) {
gpu_ctx.Wait(); gpu_ctx.Wait();
const int* slice_ptr = slice_tensor.data<int>(); const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr); EXPECT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) { for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]); EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
} }
...@@ -117,7 +117,7 @@ TEST(TensorFromVector, Tensor) { ...@@ -117,7 +117,7 @@ TEST(TensorFromVector, Tensor) {
// Compare Tensors // Compare Tensors
const int* cpu_ptr = cpu_tensor.data<int>(); const int* cpu_ptr = cpu_tensor.data<int>();
const int* src_ptr = src_vec.data(); const int* src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr); EXPECT_NE(src_ptr, cpu_ptr);
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]); EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
} }
...@@ -127,7 +127,7 @@ TEST(TensorFromVector, Tensor) { ...@@ -127,7 +127,7 @@ TEST(TensorFromVector, Tensor) {
paddle::framework::TensorFromVector<int>(src_vec, &cpu_tensor); paddle::framework::TensorFromVector<int>(src_vec, &cpu_tensor);
cpu_ptr = cpu_tensor.data<int>(); cpu_ptr = cpu_tensor.data<int>();
src_ptr = src_vec.data(); src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr); EXPECT_NE(src_ptr, cpu_ptr);
for (size_t i = 0; i < 5; ++i) { for (size_t i = 0; i < 5; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]); EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
} }
...@@ -161,8 +161,8 @@ TEST(TensorFromVector, Tensor) { ...@@ -161,8 +161,8 @@ TEST(TensorFromVector, Tensor) {
const int* src_ptr = src_vec.data(); const int* src_ptr = src_vec.data();
const int* cpu_ptr = cpu_tensor.data<int>(); const int* cpu_ptr = cpu_tensor.data<int>();
const int* dst_ptr = dst_tensor.data<int>(); const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, cpu_ptr); EXPECT_NE(src_ptr, cpu_ptr);
ASSERT_NE(src_ptr, dst_ptr); EXPECT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]); EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
EXPECT_EQ(src_ptr[i], dst_ptr[i]); EXPECT_EQ(src_ptr[i], dst_ptr[i]);
...@@ -181,8 +181,8 @@ TEST(TensorFromVector, Tensor) { ...@@ -181,8 +181,8 @@ TEST(TensorFromVector, Tensor) {
src_ptr = src_vec.data(); src_ptr = src_vec.data();
cpu_ptr = cpu_tensor.data<int>(); cpu_ptr = cpu_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, cpu_ptr); EXPECT_NE(src_ptr, cpu_ptr);
ASSERT_NE(src_ptr, dst_ptr); EXPECT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 5; ++i) { for (size_t i = 0; i < 5; ++i) {
EXPECT_EQ(src_ptr[i], cpu_ptr[i]); EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
EXPECT_EQ(src_ptr[i], dst_ptr[i]); EXPECT_EQ(src_ptr[i], dst_ptr[i]);
...@@ -235,9 +235,9 @@ TEST(TensorContainsNAN, CPU) { ...@@ -235,9 +235,9 @@ TEST(TensorContainsNAN, CPU) {
buf[0] = 0.0; buf[0] = 0.0;
buf[1] = NAN; buf[1] = NAN;
buf[2] = 0.0; buf[2] = 0.0;
ASSERT_TRUE(paddle::framework::TensorContainsNAN(src)); EXPECT_TRUE(paddle::framework::TensorContainsNAN(src));
buf[1] = 0.0; buf[1] = 0.0;
ASSERT_FALSE(paddle::framework::TensorContainsNAN(src)); EXPECT_FALSE(paddle::framework::TensorContainsNAN(src));
} }
{ {
...@@ -248,9 +248,9 @@ TEST(TensorContainsNAN, CPU) { ...@@ -248,9 +248,9 @@ TEST(TensorContainsNAN, CPU) {
buf[0] = 0.0; buf[0] = 0.0;
buf[1].x = 0x7fff; buf[1].x = 0x7fff;
buf[2] = 0.0; buf[2] = 0.0;
ASSERT_TRUE(paddle::framework::TensorContainsNAN(src)); EXPECT_TRUE(paddle::framework::TensorContainsNAN(src));
buf[1] = 0.0; buf[1] = 0.0;
ASSERT_FALSE(paddle::framework::TensorContainsNAN(src)); EXPECT_FALSE(paddle::framework::TensorContainsNAN(src));
} }
} }
...@@ -261,9 +261,9 @@ TEST(TensorContainsInf, CPU) { ...@@ -261,9 +261,9 @@ TEST(TensorContainsInf, CPU) {
buf[0] = 1.0; buf[0] = 1.0;
buf[1] = INFINITY; buf[1] = INFINITY;
buf[2] = 0.0; buf[2] = 0.0;
ASSERT_TRUE(paddle::framework::TensorContainsInf(src)); EXPECT_TRUE(paddle::framework::TensorContainsInf(src));
buf[1] = 1.0; buf[1] = 1.0;
ASSERT_FALSE(paddle::framework::TensorContainsInf(src)); EXPECT_FALSE(paddle::framework::TensorContainsInf(src));
} }
{ {
...@@ -274,9 +274,55 @@ TEST(TensorContainsInf, CPU) { ...@@ -274,9 +274,55 @@ TEST(TensorContainsInf, CPU) {
buf[0] = 1.0; buf[0] = 1.0;
buf[1].x = 0x7c00; buf[1].x = 0x7c00;
buf[2] = 0.0; buf[2] = 0.0;
ASSERT_TRUE(paddle::framework::TensorContainsInf(src)); EXPECT_TRUE(paddle::framework::TensorContainsInf(src));
buf[1] = 1.0; buf[1] = 1.0;
ASSERT_FALSE(paddle::framework::TensorContainsInf(src)); EXPECT_FALSE(paddle::framework::TensorContainsInf(src));
}
}
TEST(TensorIsfinite, CPU) {
{
paddle::framework::Tensor src, out;
double* buf = src.mutable_data<double>({3}, paddle::platform::CPUPlace());
buf[0] = 1.0;
buf[1] = INFINITY;
buf[2] = 0.0;
paddle::framework::TensorIsfinite(src, &out);
EXPECT_EQ(out.data<bool>()[0], false);
buf[1] = 1.0;
paddle::framework::TensorIsfinite(src, &out);
EXPECT_EQ(out.data<bool>()[0], true);
}
{
paddle::framework::Tensor src, out;
double* buf = src.mutable_data<double>({3}, paddle::platform::CPUPlace());
buf[0] = 1.0;
buf[1] = NAN;
buf[2] = 0.0;
paddle::framework::TensorIsfinite(src, &out);
EXPECT_EQ(out.data<bool>()[0], false);
buf[1] = 1.0;
paddle::framework::TensorIsfinite(src, &out);
EXPECT_EQ(out.data<bool>()[0], true);
}
{
paddle::framework::Tensor src, out;
paddle::platform::float16* buf =
src.mutable_data<paddle::platform::float16>(
{3}, paddle::platform::CPUPlace());
buf[0] = 1.0;
buf[1].x = 0x7c00;
buf[2] = 0.0;
paddle::framework::TensorIsfinite(src, &out);
EXPECT_EQ(out.data<bool>()[0], false);
buf[1] = 1.0;
paddle::framework::TensorIsfinite(src, &out);
EXPECT_EQ(out.data<bool>()[0], true);
buf[1].x = 0x7fff;
paddle::framework::TensorIsfinite(src, &out);
EXPECT_EQ(out.data<bool>()[0], false);
} }
} }
...@@ -299,9 +345,9 @@ TEST(Tensor, FromAndToStream) { ...@@ -299,9 +345,9 @@ TEST(Tensor, FromAndToStream) {
TensorFromStream(iss, &dst_tensor, cpu_ctx); TensorFromStream(iss, &dst_tensor, cpu_ctx);
int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace()); int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
ASSERT_EQ(dst_ptr[i], array[i]); EXPECT_EQ(dst_ptr[i], array[i]);
} }
ASSERT_EQ(dst_tensor.dims(), src_tensor.dims()); EXPECT_EQ(dst_tensor.dims(), src_tensor.dims());
delete place; delete place;
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -323,7 +369,7 @@ TEST(Tensor, FromAndToStream) { ...@@ -323,7 +369,7 @@ TEST(Tensor, FromAndToStream) {
int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace()); int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < 6; ++i) { for (int i = 0; i < 6; ++i) {
ASSERT_EQ(dst_ptr[i], array[i]); EXPECT_EQ(dst_ptr[i], array[i]);
} }
delete gpu_place; delete gpu_place;
} }
......
...@@ -27,9 +27,9 @@ static __global__ void FillNAN(float* buf) { ...@@ -27,9 +27,9 @@ static __global__ void FillNAN(float* buf) {
} }
static __global__ void FillInf(float* buf) { static __global__ void FillInf(float* buf) {
buf[0] = 0.0; buf[0] = INFINITY;
buf[1] = INFINITY; buf[1] = 0.1;
buf[2] = 0.5; buf[2] = 0.2;
} }
static __global__ void FillNAN(platform::float16* buf) { static __global__ void FillNAN(platform::float16* buf) {
...@@ -44,6 +44,18 @@ static __global__ void FillInf(platform::float16* buf) { ...@@ -44,6 +44,18 @@ static __global__ void FillInf(platform::float16* buf) {
buf[2] = 0.5; buf[2] = 0.5;
} }
static __global__ void FillFinite(float* buf) {
buf[0] = 0.0;
buf[1] = 0.1;
buf[2] = 0.2;
}
static __global__ void FillFinite(platform::float16* buf) {
buf[0] = 0.0;
buf[1] = 0.1;
buf[2] = 0.2;
}
TEST(TensorContainsNAN, GPU) { TEST(TensorContainsNAN, GPU) {
paddle::platform::CUDAPlace gpu(0); paddle::platform::CUDAPlace gpu(0);
auto& pool = paddle::platform::DeviceContextPool::Instance(); auto& pool = paddle::platform::DeviceContextPool::Instance();
...@@ -86,5 +98,163 @@ TEST(TensorContainsInf, GPU) { ...@@ -86,5 +98,163 @@ TEST(TensorContainsInf, GPU) {
} }
} }
TEST(TensorIsfinite, GPU) {
paddle::platform::CUDAPlace gpu(0);
using paddle::platform::float16;
auto& pool = paddle::platform::DeviceContextPool::Instance();
auto* cuda_ctx = pool.GetByPlace(gpu);
// contains inf
{
Tensor tensor;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
EXPECT_TRUE(!TensorIsfinite(tensor));
}
{
Tensor tensor;
float16* buf = tensor.mutable_data<float16>({3}, gpu);
FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
EXPECT_TRUE(!TensorIsfinite(tensor));
}
// contains nan
{
Tensor tensor;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
EXPECT_TRUE(!TensorIsfinite(tensor));
}
{
Tensor tensor;
float16* buf = tensor.mutable_data<float16>({3}, gpu);
FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
EXPECT_TRUE(!TensorIsfinite(tensor));
}
// all element are finite
{
Tensor tensor;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillFinite<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
EXPECT_TRUE(TensorIsfinite(tensor));
}
{
Tensor tensor;
float16* buf = tensor.mutable_data<float16>({3}, gpu);
FillFinite<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
EXPECT_TRUE(TensorIsfinite(tensor));
}
}
TEST(TensorContainsInf, GPUWithoutWait) {
paddle::platform::CUDAPlace gpu(0);
auto& pool = paddle::platform::DeviceContextPool::Instance();
auto* cuda_ctx = pool.GetByPlace(gpu);
{
Tensor tensor, out;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
TensorContainsInf(tensor, &out);
platform::CPUPlace cpu;
Tensor tmp;
TensorCopy(out, cpu, *cuda_ctx, &tmp);
cuda_ctx->Wait();
ASSERT_EQ(tmp.data<bool>()[0], true);
}
{
Tensor tensor, out;
paddle::platform::float16* buf =
tensor.mutable_data<paddle::platform::float16>({3}, gpu);
FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
TensorContainsInf(tensor, &out);
platform::CPUPlace cpu;
Tensor tmp;
TensorCopy(out, cpu, *cuda_ctx, &tmp);
cuda_ctx->Wait();
ASSERT_EQ(tmp.data<bool>()[0], true);
}
}
TEST(TensorContainsNAN, GPUWithoutWait) {
paddle::platform::CUDAPlace gpu(0);
auto& pool = paddle::platform::DeviceContextPool::Instance();
auto* cuda_ctx = pool.GetByPlace(gpu);
{
Tensor tensor, out;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
TensorContainsNAN(tensor, &out);
platform::CPUPlace cpu;
Tensor tmp;
TensorCopy(out, cpu, *cuda_ctx, &tmp);
cuda_ctx->Wait();
ASSERT_EQ(tmp.data<bool>()[0], true);
}
{
Tensor tensor, out;
paddle::platform::float16* buf =
tensor.mutable_data<paddle::platform::float16>({3}, gpu);
FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
TensorContainsNAN(tensor, &out);
platform::CPUPlace cpu;
Tensor tmp;
TensorCopy(out, cpu, *cuda_ctx, &tmp);
cuda_ctx->Wait();
ASSERT_EQ(tmp.data<bool>()[0], true);
}
}
TEST(TensorIsfinite, GPUWithoutWait) {
paddle::platform::CUDAPlace gpu(0);
auto& pool = paddle::platform::DeviceContextPool::Instance();
auto* cuda_ctx = pool.GetByPlace(gpu);
{
Tensor tensor, out;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
TensorIsfinite(tensor, &out);
platform::CPUPlace cpu;
Tensor tmp;
TensorCopy(out, cpu, *cuda_ctx, &tmp);
cuda_ctx->Wait();
EXPECT_EQ(tmp.data<bool>()[0], false);
}
{
Tensor tensor, out;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
TensorIsfinite(tensor, &out);
platform::CPUPlace cpu;
Tensor tmp;
TensorCopy(out, cpu, *cuda_ctx, &tmp);
cuda_ctx->Wait();
EXPECT_EQ(tmp.data<bool>()[0], false);
}
{
Tensor tensor, out;
float* buf = tensor.mutable_data<float>({3}, gpu);
FillFinite<<<1, 1, 0, cuda_ctx->stream()>>>(buf);
cuda_ctx->Wait();
TensorIsfinite(tensor, &out);
platform::CPUPlace cpu;
Tensor tmp;
TensorCopy(out, cpu, *cuda_ctx, &tmp);
cuda_ctx->Wait();
EXPECT_EQ(tmp.data<bool>()[0], true);
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -22,8 +22,8 @@ limitations under the License. */ ...@@ -22,8 +22,8 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <thread> //NOLINT #include <thread> //NOLINT
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle/fluid/inference/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_string(dirname, "", "Directory of the inference model.");
DEFINE_bool(use_gpu, false, "Whether use gpu."); DEFINE_bool(use_gpu, false, "Whether use gpu.");
...@@ -62,17 +62,17 @@ void Main(bool use_gpu) { ...@@ -62,17 +62,17 @@ void Main(bool use_gpu) {
CHECK(predictor->Run(slots, &outputs)); CHECK(predictor->Run(slots, &outputs));
//# 4. Get output. //# 4. Get output.
PADDLE_ENFORCE(outputs.size(), 1UL); CHECK_EQ(outputs.size(), 1UL);
// Check the output buffer size and result of each tid. // Check the output buffer size and result of each tid.
PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); CHECK_EQ(outputs.front().data.length(), 33168UL);
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706}; 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements); for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) { i++) {
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i], CHECK_NEAR(static_cast<float*>(outputs.front().data.data())[i], result[i],
result[i]); 0.001);
} }
} }
} }
...@@ -108,9 +108,9 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -108,9 +108,9 @@ void MainThreads(int num_threads, bool use_gpu) {
CHECK(predictor->Run(inputs, &outputs)); CHECK(predictor->Run(inputs, &outputs));
// 4. Get output. // 4. Get output.
PADDLE_ENFORCE(outputs.size(), 1UL); CHECK_EQ(outputs.size(), 1UL);
// Check the output buffer size and result of each tid. // Check the output buffer size and result of each tid.
PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); CHECK_EQ(outputs.front().data.length(), 33168UL);
float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815,
0.000932706}; 0.000932706};
const size_t num_elements = const size_t num_elements =
...@@ -118,8 +118,8 @@ void MainThreads(int num_threads, bool use_gpu) { ...@@ -118,8 +118,8 @@ void MainThreads(int num_threads, bool use_gpu) {
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements); for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) { i++) {
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i], CHECK_NEAR(static_cast<float*>(outputs.front().data.data())[i],
result[i]); result[i], 0.001);
} }
} }
}); });
......
...@@ -17,11 +17,12 @@ limitations under the License. */ ...@@ -17,11 +17,12 @@ limitations under the License. */
*/ */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. #include <glog/logging.h> // use glog instead of CHECK to avoid importing other paddle header files.
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
// #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/inference/demo_ci/utils.h" #include "paddle/fluid/inference/demo_ci/utils.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_gpu_memory_to_use);
...@@ -78,18 +79,17 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) { ...@@ -78,18 +79,17 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
VLOG(3) << "predictor output numel " << numel; VLOG(3) << "predictor output numel " << numel;
VLOG(3) << "reference output numel " << refer.data.size(); VLOG(3) << "reference output numel " << refer.data.size();
PADDLE_ENFORCE_EQ(numel, refer.data.size()); CHECK_EQ(numel, refer.data.size());
switch (output.dtype) { switch (output.dtype) {
case PaddleDType::INT64: { case PaddleDType::INT64: {
for (size_t i = 0; i < numel; ++i) { for (size_t i = 0; i < numel; ++i) {
PADDLE_ENFORCE_EQ(static_cast<int64_t*>(output.data.data())[i], CHECK_EQ(static_cast<int64_t*>(output.data.data())[i], refer.data[i]);
refer.data[i]);
} }
break; break;
} }
case PaddleDType::FLOAT32: case PaddleDType::FLOAT32:
for (size_t i = 0; i < numel; ++i) { for (size_t i = 0; i < numel; ++i) {
PADDLE_ENFORCE_LT( CHECK_LT(
fabs(static_cast<float*>(output.data.data())[i] - refer.data[i]), fabs(static_cast<float*>(output.data.data())[i] - refer.data[i]),
1e-5); 1e-5);
} }
......
...@@ -27,9 +27,6 @@ void SetConfig(AnalysisConfig *cfg) { ...@@ -27,9 +27,6 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->device = 0; cfg->device = 0;
cfg->enable_ir_optim = true; cfg->enable_ir_optim = true;
cfg->specify_input_name = true; cfg->specify_input_name = true;
#ifdef PADDLE_WITH_MKLDNN
cfg->_use_mkldnn = true;
#endif
} }
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
......
...@@ -80,7 +80,7 @@ class ActivationOp : public framework::OperatorWithKernel { ...@@ -80,7 +80,7 @@ class ActivationOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
...@@ -91,12 +91,26 @@ class ActivationOp : public framework::OperatorWithKernel { ...@@ -91,12 +91,26 @@ class ActivationOp : public framework::OperatorWithKernel {
} }
}; };
class ActivationOpInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDesc& op_desc,
framework::BlockDesc* block) const override {
auto x_name = op_desc.Input("X")[0];
auto out_name = op_desc.Output("Out")[0];
auto& x = block->FindRecursiveOrCreateVar(x_name);
auto& out = block->FindRecursiveOrCreateVar(out_name);
out.SetType(x.GetType());
out.SetDataType(x.GetDataType());
}
};
class ActivationOpGrad : public framework::OperatorWithKernel { class ActivationOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Out")); ctx->ShareDim("Out", framework::GradVarName("X"));
ctx->ShareLoD("Out", framework::GradVarName("X"));
} }
protected: protected:
...@@ -525,12 +539,14 @@ namespace ops = paddle::operators; ...@@ -525,12 +539,14 @@ namespace ops = paddle::operators;
#define REGISTER_INPLACE_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \ #define REGISTER_INPLACE_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \
REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \ REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \
::paddle::operators::OP_NAME##OpMaker, \ ::paddle::operators::OP_NAME##OpMaker, \
::paddle::operators::ActivationOpInferVarType, \
::paddle::operators::OP_NAME##GradMaker); \ ::paddle::operators::OP_NAME##GradMaker); \
REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad) REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad)
#define REGISTER_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \ #define REGISTER_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \
REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \ REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \
::paddle::operators::OP_NAME##OpMaker, \ ::paddle::operators::OP_NAME##OpMaker, \
::paddle::operators::ActivationOpInferVarType, \
::paddle::framework::DefaultGradOpDescMaker<true>); \ ::paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad) REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad)
......
...@@ -42,8 +42,8 @@ class ArgsortOp : public framework::OperatorWithKernel { ...@@ -42,8 +42,8 @@ class ArgsortOp : public framework::OperatorWithKernel {
"-rank(Input(X)) (%d).", "-rank(Input(X)) (%d).",
axis, num_dims); axis, num_dims);
ctx->SetOutputDim("Out", in_dims); ctx->ShareDim("X", "Out");
ctx->SetOutputDim("Indices", in_dims); ctx->ShareDim("X", "Indices");
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
ctx->ShareLoD("X", "Indices"); ctx->ShareLoD("X", "Indices");
} }
......
...@@ -44,7 +44,7 @@ class ConvShiftOp : public framework::OperatorWithKernel { ...@@ -44,7 +44,7 @@ class ConvShiftOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_LE(y_dims[1], x_dims[1], PADDLE_ENFORCE_LE(y_dims[1], x_dims[1],
"The 2nd dimension of Input(Y) should be less than or " "The 2nd dimension of Input(Y) should be less than or "
"equal to the 2nd dimension of Input(X)."); "equal to the 2nd dimension of Input(X).");
ctx->SetOutputDim("Out", x_dims); ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <cub/cub.cuh> // NOLINT #include <cub/cub.cuh> // NOLINT
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -293,7 +294,12 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y, ...@@ -293,7 +294,12 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y,
} }
auto x_data = x.data<Tx>(); auto x_data = x.data<Tx>();
auto y_data = y->mutable_data<Ty>(x.place()); auto y_data = y->mutable_data<Ty>(x.place());
if (reduce_num == 1) return; if (reduce_num == 1) {
auto out_dims = y->dims();
framework::TensorCopy(x, y->place(), y);
y->Resize(out_dims);
return;
}
#define CUB_BLOCK_DIM_CASE(block_dim) \ #define CUB_BLOCK_DIM_CASE(block_dim) \
case block_dim: { \ case block_dim: { \
......
...@@ -41,7 +41,8 @@ class ElementwiseOp : public framework::OperatorWithKernel { ...@@ -41,7 +41,8 @@ class ElementwiseOp : public framework::OperatorWithKernel {
auto y_dim = ctx->GetInputDim("Y"); auto y_dim = ctx->GetInputDim("Y");
PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(), PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(),
"Rank of first input must >= rank of second input."); "Rank of first input must >= rank of second input.");
ctx->SetOutputDim("Out", x_dim);
ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
...@@ -70,6 +71,7 @@ class ElementwiseOpInferVarType : public framework::VarTypeInference { ...@@ -70,6 +71,7 @@ class ElementwiseOpInferVarType : public framework::VarTypeInference {
auto& x = block->FindRecursiveOrCreateVar(x_name); auto& x = block->FindRecursiveOrCreateVar(x_name);
auto& out = block->FindRecursiveOrCreateVar(out_name); auto& out = block->FindRecursiveOrCreateVar(out_name);
out.SetType(x.GetType()); out.SetType(x.GetType());
out.SetDataType(x.GetDataType());
} }
}; };
...@@ -157,10 +159,12 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { ...@@ -157,10 +159,12 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
auto x_grad_name = framework::GradVarName("X"); auto x_grad_name = framework::GradVarName("X");
auto y_grad_name = framework::GradVarName("Y"); auto y_grad_name = framework::GradVarName("Y");
if (ctx->HasOutput(x_grad_name)) { if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims); ctx->ShareDim("X", /*->*/ x_grad_name);
ctx->ShareLoD("X", /*->*/ x_grad_name);
} }
if (ctx->HasOutput(y_grad_name)) { if (ctx->HasOutput(y_grad_name)) {
ctx->SetOutputDim(y_grad_name, y_dims); ctx->ShareDim("Y", /*->*/ y_grad_name);
ctx->ShareLoD("Y", /*->*/ y_grad_name);
} }
} }
...@@ -193,14 +197,15 @@ class ElementwiseOpExplicitGrad : public ElementwiseOpGrad { ...@@ -193,14 +197,15 @@ class ElementwiseOpExplicitGrad : public ElementwiseOpGrad {
auto x_grad_name = framework::GradVarName("X"); auto x_grad_name = framework::GradVarName("X");
if (ctx->HasOutput(x_grad_name)) { if (ctx->HasOutput(x_grad_name)) {
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); ctx->ShareDim(framework::GradVarName("Out"), /*->*/ x_grad_name);
ctx->SetOutputDim(x_grad_name, out_dims); ctx->ShareLoD(framework::GradVarName("Out"), /*->*/ x_grad_name);
} }
auto y_grad_name = framework::GradVarName("Y"); auto y_grad_name = framework::GradVarName("Y");
if (ctx->HasOutput(y_grad_name)) { if (ctx->HasOutput(y_grad_name)) {
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
auto y_dims = ctx->GetInputDim("Y");
ctx->SetOutputDim(y_grad_name, y_dims); ctx->ShareDim("Y", /*->*/ y_grad_name);
ctx->ShareLoD("Y", /*->*/ y_grad_name);
} }
} }
}; };
......
...@@ -48,7 +48,8 @@ class FakeDequantizeMaxAbsOp : public framework::OperatorWithKernel { ...@@ -48,7 +48,8 @@ class FakeDequantizeMaxAbsOp : public framework::OperatorWithKernel {
"Input(X) of FakeDequantizeMaxAbsOp should not be null."); "Input(X) of FakeDequantizeMaxAbsOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of FakeDequantizeMaxAbsOp should not be null."); "Output(Out) of FakeDequantizeMaxAbsOp should not be null.");
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/isfinite_op.h"
#include <string>
#include <vector>
namespace paddle {
namespace operators {
class OverflowOp : public framework::OperatorWithKernel {
public:
OverflowOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInputs("X"), "Inputs(X) should not be null");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of OverflowOp should not be null.");
ctx->SetOutputDim("Out", {1});
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
int dtype = -1;
auto *x_var = ctx.InputVar("X");
if (x_var->IsType<framework::LoDTensor>()) {
dtype = framework::ToDataType(x_var->Get<framework::LoDTensor>().type());
} else if (x_var->IsType<framework::SelectedRows>()) {
dtype = framework::ToDataType(
x_var->Get<framework::SelectedRows>().value().type());
} else {
PADDLE_THROW("Cannot find the input data type by all input data");
}
return framework::OpKernelType(framework::proto::VarType::Type(dtype),
ctx.GetPlace());
}
};
class OverflowOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor) The input tensors of overflow operator.");
AddOutput("Out",
"(Tensor) 1-dim tensor, contains a bool scalar. The output "
"tensor of overflow operator.");
AddComment(string::Sprintf(R"DOC(
Overflow operator.
$$Out = any(X)$$
If any X contains Inf or Nan, the Out will generate a indicator.
Out = Inf if any X contains Inf,
Out = Nan if any X contains Nan,
Out = 0 if no Inf/Nan detected.
If X contains both Inf/Nan, it will return the first indicator it meeted.
)DOC",
GetName(), GetComments()));
}
protected:
virtual std::string GetName() const = 0;
virtual std::string GetComments() const = 0;
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
#define REGISTER_OP_MAKER(op_type, comment) \
namespace paddle { \
namespace operators { \
class _##op_type##OverflowOpMaker \
: public ::paddle::operators::OverflowOpMaker { \
protected: \
std::string GetName() const { return #op_type; } \
std::string GetComments() const { return comment; } \
}; \
} \
} \
REGISTER_OPERATOR(op_type, ops::OverflowOp, \
ops::_##op_type##OverflowOpMaker, \
paddle::framework::EmptyGradOpMaker)
#define REGISTER_OVERFLOW_CPU_KERNEL(op_type, functor) \
REGISTER_OP_CPU_KERNEL( \
op_type, ops::OverflowKernel<paddle::platform::CPUDeviceContext, int, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CPUDeviceContext, float, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CPUDeviceContext, double, \
ops::functor>);
REGISTER_OP_MAKER(isinf, "isinf(X)");
REGISTER_OP_MAKER(isnan, "isnan(X)");
REGISTER_OP_MAKER(isfinite, "isfinite(X)");
FOR_EACH_KERNEL_FUNCTOR(REGISTER_OVERFLOW_CPU_KERNEL);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/isfinite_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;
#define REGISTER_OVERFLOW_CUDA_KERNEL(op_type, functor) \
REGISTER_OP_CUDA_KERNEL( \
op_type, ops::OverflowKernel<paddle::platform::CUDADeviceContext, int, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CUDADeviceContext, float, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CUDADeviceContext, double, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CUDADeviceContext, plat::float16, \
ops::functor>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_OVERFLOW_CUDA_KERNEL);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/transform.h"
namespace paddle {
namespace operators {
struct InfinityFunctor {
void operator()(const framework::Tensor& tensor, framework::Tensor* out) {
framework::TensorContainsInf(tensor, out);
}
};
struct NANFunctor {
void operator()(const framework::Tensor& tensor, framework::Tensor* out) {
framework::TensorContainsNAN(tensor, out);
}
};
struct IsfiniteFunctor {
void operator()(const framework::Tensor& tensor, framework::Tensor* out) {
framework::TensorIsfinite(tensor, out);
}
};
template <typename DeviceContext, typename T, typename Functor>
class OverflowKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& ctx) const {
auto* x = ctx.InputVar("X");
auto* out = ctx.Output<framework::Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
Functor functor;
if (x->IsType<framework::LoDTensor>()) {
auto* in = ctx.Input<framework::Tensor>("X");
functor(*in, out);
} else if (x->IsType<framework::SelectedRows>()) {
auto& in = ctx.Input<framework::SelectedRows>("X")->value();
functor(in, out);
} else {
PADDLE_THROW("Unsupported input type.");
}
}
};
} // namespace operators
} // namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \
__macro(isinf, InfinityFunctor); \
__macro(isnan, NANFunctor); \
__macro(isfinite, IsfiniteFunctor);
...@@ -137,6 +137,7 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { ...@@ -137,6 +137,7 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
} }
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType());
} }
}; };
......
...@@ -49,7 +49,7 @@ class PReluOp : public framework::OperatorWithKernel { ...@@ -49,7 +49,7 @@ class PReluOp : public framework::OperatorWithKernel {
} else { } else {
PADDLE_THROW("Unkown mode %s", mode); PADDLE_THROW("Unkown mode %s", mode);
} }
ctx->SetOutputDim("Out", x_dim); ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
......
...@@ -54,7 +54,7 @@ class RNNMemoryHelperOpShapeInference : public framework::InferShapeBase { ...@@ -54,7 +54,7 @@ class RNNMemoryHelperOpShapeInference : public framework::InferShapeBase {
"Input(X) of rnn_memory_helper op should not be null."); "Input(X) of rnn_memory_helper op should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output of rnn_memory_helper op should not be null."); "Output of rnn_memory_helper op should not be null.");
ctx->SetOutputDim("Out", ctx->GetInputDim("X")); ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
......
...@@ -90,8 +90,8 @@ class SequenceConvGradOp : public framework::OperatorWithKernel { ...@@ -90,8 +90,8 @@ class SequenceConvGradOp : public framework::OperatorWithKernel {
ctx->GetInputDim("PaddingData")); ctx->GetInputDim("PaddingData"));
} }
if (ctx->HasOutput(framework::GradVarName("X"))) { if (ctx->HasOutput(framework::GradVarName("X"))) {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->ShareDim("X", /*->*/ framework::GradVarName("X"));
ctx->ShareLoD("X", framework::GradVarName("X")); ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
} }
if (ctx->HasOutput(framework::GradVarName("Filter"))) { if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), ctx->SetOutputDim(framework::GradVarName("Filter"),
......
...@@ -102,8 +102,9 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { ...@@ -102,8 +102,9 @@ class SequencePoolGradOp : public framework::OperatorWithKernel {
for (int64_t i = 1; i < og_dims.size(); ++i) { for (int64_t i = 1; i < og_dims.size(); ++i) {
PADDLE_ENFORCE_EQ(og_dims[i], x_dims[i], "The dimension mismatch."); PADDLE_ENFORCE_EQ(og_dims[i], x_dims[i], "The dimension mismatch.");
} }
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
ctx->ShareLoD("X", framework::GradVarName("X")); ctx->ShareDim("X", /*->*/ framework::GradVarName("X"));
ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
} }
protected: protected:
......
...@@ -92,7 +92,7 @@ class SequenceReshapeGradOp : public framework::OperatorWithKernel { ...@@ -92,7 +92,7 @@ class SequenceReshapeGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequenceReshapeGradOp should not be null."); "Input(X) of SequenceReshapeGradOp should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->ShareDim("X", /*->*/ framework::GradVarName("X"));
ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); ctx->ShareLoD("X", /*->*/ framework::GradVarName("X"));
} }
}; };
......
...@@ -27,7 +27,8 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { ...@@ -27,7 +27,8 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel {
"Input(X) of SequenceSoftmaxOp should not be null."); "Input(X) of SequenceSoftmaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequenceSoftmaxOp should not be null."); "Output(Out) of SequenceSoftmaxOp should not be null.");
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
......
...@@ -151,9 +151,9 @@ class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase { ...@@ -151,9 +151,9 @@ class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase {
void operator()(framework::InferShapeContext *context) const override { void operator()(framework::InferShapeContext *context) const override {
PADDLE_ENFORCE(context->HasInput("X")); PADDLE_ENFORCE(context->HasInput("X"));
PADDLE_ENFORCE(context->HasOutput(framework::GradVarName("X"))); PADDLE_ENFORCE(context->HasOutput(framework::GradVarName("X")));
context->SetOutputDim(framework::GradVarName("X"),
context->GetInputDim("X")); context->ShareDim("X", /*->*/ framework::GradVarName("X"));
context->ShareLoD("X", framework::GradVarName("X")); context->ShareLoD("X", /*->*/ framework::GradVarName("X"));
} }
}; };
......
...@@ -40,7 +40,7 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel { ...@@ -40,7 +40,7 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel {
"The 2nd dimension of Input(X) and Input(Label) should " "The 2nd dimension of Input(X) and Input(Label) should "
"be equal."); "be equal.");
ctx->SetOutputDim("Out", x_dims); ctx->ShareDim("X", /*->*/ "Out");
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
......
...@@ -620,7 +620,23 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -620,7 +620,23 @@ All parameter, weight, gradient are variables in Paddle.
// -- python binds for parallel executor. // -- python binds for parallel executor.
py::class_<ParallelExecutor> pe(m, "ParallelExecutor"); py::class_<ParallelExecutor> pe(m, "ParallelExecutor");
py::class_<ExecutionStrategy> exec_strategy(pe, "ExecutionStrategy"); py::class_<ExecutionStrategy> exec_strategy(pe, "ExecutionStrategy", R"DOC(
ExecutionStrategy allows the user to more preciously control how to run
the program in ParallelExecutor by setting the property.
The available properties include:
use_cuda (bool): Whether to use CUDA or not. Default True.
num_threads (int): The number of threads that used to run the
operators in ParallelExecutor. If it is not set, it will be
set in ParallelExecutor according to the device count.
Default 0.
allow_op_delay (bool): Whether to delay the communication operators
to run. Default False.
num_iteration_per_drop_scope (int): how many iterations between
the two dropping local scopes. Default 100.
)DOC");
exec_strategy.def(py::init()) exec_strategy.def(py::init())
.def_property( .def_property(
"num_threads", "num_threads",
...@@ -658,7 +674,25 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -658,7 +674,25 @@ All parameter, weight, gradient are variables in Paddle.
: ExecutionStrategy::kDefault; : ExecutionStrategy::kDefault;
}); });
py::class_<BuildStrategy> build_strategy(pe, "BuildStrategy"); py::class_<BuildStrategy> build_strategy(pe, "BuildStrategy", R"DOC(
BuildStrategy allows the user to more preciously control how to
build the SSA Graph in ParallelExecutor by setting the property.
The available properties include:
reduce_strategy (str): There are two reduce strategies, 'AllReduce'
and 'Reduce'. If you want that all parameters will be optimized
on all devices, you can choose 'AllReduce'; if you choose
'Reduce', all parameters will be evenly allocated to different
devices for optimization, and then broadcast the optimized
parameter to other devices. Default 'AllReduce'.
gradient_scale_strategy (str): There are two ways of defining loss@grad,
'CoeffNumDevice' and 'Customized'. By default, ParallelExecutor
sets the loss@grad according to the number of devices. If you want
to customize loss@grad, you can choose 'Customized'.
Default 'CoeffNumDevice'.
debug_graphviz_path (str): Whether to write the SSA Graph to file in the
form of graphviz. It is useful for debugging. Default "".
)DOC");
py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, "ReduceStrategy") py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, "ReduceStrategy")
.value("Reduce", BuildStrategy::ReduceStrategy::kReduce) .value("Reduce", BuildStrategy::ReduceStrategy::kReduce)
......
...@@ -600,7 +600,7 @@ EOF ...@@ -600,7 +600,7 @@ EOF
if [[ ${WITH_GPU} == "ON" ]]; then if [[ ${WITH_GPU} == "ON" ]]; then
NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.2.13-1+cuda${CUDA_MAJOR} libnccl-dev=2.2.13-1+cuda${CUDA_MAJOR} || true" NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.2.13-1+cuda${CUDA_MAJOR} libnccl-dev=2.2.13-1+cuda${CUDA_MAJOR} || true"
else else
NCCL_DEPS="" NCCL_DEPS="true"
fi fi
if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then
......
...@@ -60,7 +60,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp ...@@ -60,7 +60,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python
COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib.* ${PADDLE_PYTHON_BUILD_DIR}/lib-python
DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp ${MKL_DEPENDS}) set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp ${MKL_DEPENDS})
......
...@@ -35,16 +35,15 @@ import itertools ...@@ -35,16 +35,15 @@ import itertools
import functools import functools
from .common import download from .common import download
import tarfile import tarfile
import six
import scipy.io as scio import scipy.io as scio
from paddle.dataset.image import * from paddle.dataset.image import *
from paddle.reader import * from paddle.reader import *
from paddle import compat as cpt
import os import os
import numpy as np import numpy as np
from multiprocessing import cpu_count from multiprocessing import cpu_count
import six import six
from six.moves import cPickle as pickle from six.moves import cPickle as pickle
from six.moves import zip
__all__ = ['train', 'test', 'valid'] __all__ = ['train', 'test', 'valid']
DATA_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/102flowers.tgz' DATA_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/102flowers.tgz'
...@@ -126,9 +125,11 @@ def reader_creator(data_file, ...@@ -126,9 +125,11 @@ def reader_creator(data_file,
batch = pickle.load(f) batch = pickle.load(f)
else: else:
batch = pickle.load(f, encoding='bytes') batch = pickle.load(f, encoding='bytes')
if six.PY3:
batch = cpt.to_text(batch)
data = batch['data'] data = batch['data']
labels = batch['label'] labels = batch['label']
for sample, label in zip(data, batch['label']): for sample, label in six.moves.zip(data, batch['label']):
yield sample, int(label) - 1 yield sample, int(label) - 1
if not cycle: if not cycle:
break break
......
...@@ -24,21 +24,10 @@ from .layer_function_generator import templatedoc ...@@ -24,21 +24,10 @@ from .layer_function_generator import templatedoc
import numpy import numpy
__all__ = [ __all__ = [
'create_tensor', 'create_tensor', 'create_parameter', 'create_global_var', 'cast', 'concat',
'create_parameter', 'sums', 'assign', 'fill_constant_batch_size_like', 'fill_constant',
'create_global_var', 'argmin', 'argmax', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf',
'cast', 'has_nan', 'isfinite'
'concat',
'sums',
'assign',
'fill_constant_batch_size_like',
'fill_constant',
'argmin',
'argmax',
'argsort',
'ones',
'zeros',
'reverse',
] ]
...@@ -652,3 +641,52 @@ def load_combine(out, file_path): ...@@ -652,3 +641,52 @@ def load_combine(out, file_path):
inputs={}, inputs={},
output={"Out": out}, output={"Out": out},
args={"file_path": file_path}) args={"file_path": file_path})
def has_inf(x):
"""
Test if any of x contains an infinity number
Args:
x(variable): The Tensor/LoDTensor to be checked.
Returns:
Variable: The tensor variable storing the output, only a bool value.
"""
helper = LayerHelper("isinf", **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(type="isinf", inputs={"X": x}, outputs={"Out": out})
return out
def has_nan(x):
"""
Test if any of x contains a NAN
Args:
x(variable): The Tensor/LoDTensor to be checked.
Returns:
Variable: The tensor variable storing the output, only a bool value.
"""
helper = LayerHelper("isnan", **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(type="isnan", inputs={"X": x}, outputs={"Out": out})
return out
def isfinite(x):
"""
Test if any of x contains an infinity/NAN number. If all the elements are finite,
returns true, else false.
Args:
x(variable): The Tensor/LoDTensor to be checked.
Returns:
Variable: The tensor variable storing the output, contains a bool value.
"""
helper = LayerHelper("isfinite", **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(type="isfinite", inputs={"X": x}, outputs={"Out": out})
return out
...@@ -74,7 +74,7 @@ def create_lod_tensor(data, recursive_seq_lens, place): ...@@ -74,7 +74,7 @@ def create_lod_tensor(data, recursive_seq_lens, place):
assert [ assert [
new_recursive_seq_lens new_recursive_seq_lens
] == recursive_seq_lens, "data and recursive_seq_lens do not match" ] == recursive_seq_lens, "data and recursive_seq_lens do not match"
flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = np.concatenate(data, axis=0)
flattened_data = flattened_data.reshape([len(flattened_data), 1]) flattened_data = flattened_data.reshape([len(flattened_data), 1])
return create_lod_tensor(flattened_data, recursive_seq_lens, place) return create_lod_tensor(flattened_data, recursive_seq_lens, place)
elif isinstance(data, np.ndarray): elif isinstance(data, np.ndarray):
......
...@@ -16,6 +16,8 @@ from __future__ import print_function ...@@ -16,6 +16,8 @@ from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.op import Operator
class ElementwiseMulOp(OpTest): class ElementwiseMulOp(OpTest):
...@@ -115,5 +117,56 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): ...@@ -115,5 +117,56 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
} }
class TestElementWiseMulSelectedRows(OpTest):
def setUp(self):
self.rows = [0, 1, 2, 3, 4, 5, 6]
self.feature = 12
self.height = 100
self.input_shape = (len(self.rows), self.feature)
def prepare_input(self, scope, place):
self.input = {
"X": np.random.random(self.input_shape).astype("float32"),
"Y": np.random.random(self.input_shape).astype("float32")
}
def init_input(in_name):
x_selected_rows = scope.var(in_name).get_selected_rows()
x_selected_rows.set_height(self.height)
x_selected_rows.set_rows(self.rows)
x_array = self.input[in_name]
x_tensor = x_selected_rows.get_tensor()
x_tensor.set(x_array, place)
init_input("X")
init_input("Y")
def create_out_selected_row(self, scope):
return scope.var('Out').get_selected_rows()
def check_result(self, out_selected_rows):
assert out_selected_rows.height() == self.height
assert out_selected_rows.rows() == self.rows
out_tensor = np.array(out_selected_rows.get_tensor())
assert out_tensor.shape == self.input_shape
def check_with_place(self, place):
scope = core.Scope()
self.prepare_input(scope, place)
out_selected_rows = self.create_out_selected_row(scope)
out_selected_rows.set_height(0)
out_selected_rows.set_rows([])
elementwise_mul = Operator("elementwise_mul", X='X', Y='Y', Out='Out')
elementwise_mul.run(scope, place)
self.check_result(out_selected_rows)
def test_elewisemul_with_selected_rows_input(self):
places = [core.CPUPlace()]
for place in places:
self.check_with_place(place)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
class TestInf(OpTest):
def setUp(self):
self.op_type = "isinf"
self.dtype = np.float32
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
x[0] = np.inf
x[-1] = np.inf
self.inputs = {'X': x}
self.outputs = {'Out': np.array(True).astype(self.dtype)}
def init_dtype(self):
pass
def test_output(self):
self.check_output()
class TestFP16Inf(TestInf):
def init_dtype(self):
self.dtype = np.float16
class TestNAN(OpTest):
def setUp(self):
self.op_type = "isnan"
self.dtype = np.float32
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
x[0] = np.nan
x[-1] = np.nan
self.inputs = {'X': x}
self.outputs = {'Out': np.array(True).astype(self.dtype)}
def init_dtype(self):
pass
def test_output(self):
self.check_output()
class TestFP16NAN(TestNAN):
def init_dtype(self):
self.dtype = np.float16
class TestIsfinite(OpTest):
def setUp(self):
self.op_type = "isfinite"
self.dtype = np.float32
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
x[0] = np.inf
x[-1] = np.nan
out = np.isinf(x) | np.isnan(x)
self.inputs = {'X': x}
self.outputs = {'Out': np.array(False).astype(self.dtype)}
def init_dtype(self):
pass
def test_output(self):
self.check_output()
class TestFP16Isfinite(TestIsfinite):
def init_dtype(self):
self.dtype = np.float16
if __name__ == '__main__':
unittest.main()
...@@ -243,5 +243,87 @@ class TestKeepDimReduceSumMultiAxises(OpTest): ...@@ -243,5 +243,87 @@ class TestKeepDimReduceSumMultiAxises(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestReduceSumWithDimOne(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")}
self.attrs = {'dim': [1, 2], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']),
keepdims=True)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceSumWithNumelOne(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': False}
self.outputs = {
'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']),
keepdims=False)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceMeanWithDimOne(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': False}
self.outputs = {
'Out': self.inputs['X'].mean(
axis=tuple(self.attrs['dim']), keepdims=False)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceMeanWithNumelOne(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].mean(
axis=tuple(self.attrs['dim']), keepdims=True)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceAll(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((1, 1, 1)).astype("float64")}
self.attrs = {'reduce_all': True, 'keep_dim': False}
self.outputs = {'Out': self.inputs['X'].sum()}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册