提交 6f053924 编写于 作者: D dongzhihong

Merge remote-tracking branch 'origin/develop' into backward2

...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License # limitations under the License
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR})
......
...@@ -290,8 +290,22 @@ function(go_library TARGET_NAME) ...@@ -290,8 +290,22 @@ function(go_library TARGET_NAME)
set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}")
endif() endif()
# Add dummy code to support `make target_name` under Terminal Command
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
# This custom command will always run since it depends on a not
# existing file.
add_custom_command(
OUTPUT dummy_rebulid_${TARGET_NAME}
COMMAND cmake -E touch ${dummyfile}
)
# Create a custom target that depends on the custom command output
# file, so the custom command can be referenced as a dependency by
# `add_dependencies`.
add_custom_target(rebuild_${TARGET_NAME}
DEPENDS dummy_rebulid_${TARGET_NAME}
)
# Add dummy code to support `make target_name` under Terminal Command
file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
if (go_library_SHARED OR go_library_shared) if (go_library_SHARED OR go_library_shared)
add_library(${TARGET_NAME} SHARED ${dummyfile}) add_library(${TARGET_NAME} SHARED ${dummyfile})
...@@ -302,6 +316,12 @@ function(go_library TARGET_NAME) ...@@ -302,6 +316,12 @@ function(go_library TARGET_NAME)
add_dependencies(${TARGET_NAME} ${go_library_DEPS}) add_dependencies(${TARGET_NAME} ${go_library_DEPS})
endif(go_library_DEPS) endif(go_library_DEPS)
# The "source file" of the library is `${dummyfile}` which never
# change, so the target will never rebuild. Make the target depends
# on the custom command that touches the library "source file", so
# rebuild will always happen.
add_dependencies(${TARGET_NAME} rebuild_${TARGET_NAME})
set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}") set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}")
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
......
# ddim lib # ddim lib
cc_library(enforce SRCS enforce.cc DEPS glog)
cc_test(enforce_test SRCS enforce_test.cc DEPS enforce)
cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library(tensor SRCS tensor.cc DEPS ddim place enforce paddle_memory)
cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_test(variable_test SRCS variable_test.cc) cc_test(variable_test SRCS variable_test.cc)
cc_test(scope_test SRCS scope_test.cc) cc_test(scope_test SRCS scope_test.cc)
proto_library(attr_type SRCS attr_type.proto) proto_library(attr_type SRCS attr_type.proto)
proto_library(op_proto SRCS op_proto.proto DEPS attr_type) proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
proto_library(op_desc SRCS op_desc.proto DEPS attr_type) proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor) cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
# cc_library(fc_op SRCS fully_connected_op.cc DEPS operator) cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc enforce)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator)
py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto) py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto)
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/framework/enforce.h" #include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/framework/enforce.h" #include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#include "paddle/framework/dim.h" #include "paddle/framework/dim.h"
#include "paddle/framework/enforce.h" #include "paddle/platform/enforce.h"
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
namespace paddle { namespace paddle {
...@@ -119,17 +119,6 @@ int arity(const DDim& ddim); ...@@ -119,17 +119,6 @@ int arity(const DDim& ddim);
std::ostream& operator<<(std::ostream&, const DDim&); std::ostream& operator<<(std::ostream&, const DDim&);
template <int NDIMS>
Eigen::DSizes<Eigen::DenseIndex, NDIMS> ToEigenDSizes(const DDim& dims) {
int rank = arity(dims);
PADDLE_ENFORCE(rank == NDIMS, "DDim and NDIMS must be same");
Eigen::DSizes<Eigen::DenseIndex, NDIMS> dsizes;
for (int d = 0; d < rank; d++) {
dsizes[d] = dims[d];
}
return dsizes;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
namespace framework {
// EigenDim converts paddle::platform::DDim into Eigen::DSizes.
template <int D>
struct EigenDim {
using Type = Eigen::DSizes<Eigen::DenseIndex, D>;
static Type From(const DDim& dims) {
PADDLE_ENFORCE(arity(dims) == D, "D must match arity(DDim)");
Type ret;
for (int d = 0; d < arity(dims); d++) {
ret[d] = dims[d];
}
return ret;
}
};
// Interpret paddle::platform::Tensor as EigenTensor and EigenConstTensor.
template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenTensor {
// TODO(qijun) Now, default type in unaligned, and we will make a benchmark on
// the speed of aligned and unaligned version in future.
using Type = Eigen::TensorMap<Eigen::Tensor<T, D, MajorType, IndexType>>;
using ConstType =
Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
static Type From(Tensor& tensor, DDim dims) {
return Type(tensor.data<T>(), EigenDim<D>::From(dims));
}
static Type From(Tensor& tensor) { return From(tensor, tensor.dims_); }
static ConstType From(const Tensor& tensor, DDim dims) {
return ConstType(tensor.data<T>(), EigenDim<D>::From(dims));
}
static ConstType From(const Tensor& tensor) {
return From(tensor, tensor.dims_);
}
};
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
// Flatten is to reshape a Tensor into a one dimension EigenVector
static typename EigenTensor<T, 1>::Type Flatten(Tensor& tensor) {
return EigenTensor<T, 1>::From(
tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
}
static typename EigenTensor<T, 1>::ConstType Flatten(const Tensor& tensor) {
return EigenTensor<T, 1>::From(
tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
}
};
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = EigenTensor<T, 2, MajorType, IndexType>;
} // namespace framework
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/framework/eigen.h"
#include <gtest/gtest.h>
namespace paddle {
namespace framework {
TEST(EigenDim, From) {
EigenDim<3>::Type ed = EigenDim<3>::From(make_ddim({1, 2, 3}));
ASSERT_EQ(1, ed[0]);
ASSERT_EQ(2, ed[1]);
ASSERT_EQ(3, ed[2]);
}
TEST(Eigen, Tensor) {
Tensor t;
float* p = t.mutable_data<float>(make_ddim({1, 2, 3}), platform::CPUPlace());
for (int i = 0; i < 1 * 2 * 3; i++) {
p[i] = static_cast<float>(i);
}
EigenTensor<float, 3>::Type et = EigenTensor<float, 3>::From(t);
ASSERT_EQ(1, et.dimension(0));
ASSERT_EQ(2, et.dimension(1));
ASSERT_EQ(3, et.dimension(2));
for (int i = 0; i < 1; i++) {
for (int j = 0; j < 2; j++) {
for (int k = 0; k < 3; k++) {
ASSERT_NEAR((i * 2 + j) * 3 + k, et(i, j, k), 1e-6f);
}
}
}
}
TEST(Eigen, VectorFrom) {
Tensor t;
float* p = t.mutable_data<float>(make_ddim({6}), platform::CPUPlace());
for (int i = 0; i < 6; i++) {
p[i] = static_cast<float>(i);
}
EigenVector<float>::Type ev = EigenVector<float>::From(t);
ASSERT_EQ(6, ev.dimension(0));
for (int i = 0; i < 6; i++) {
ASSERT_NEAR(i, ev(i), 1e-6f);
}
}
TEST(Eigen, VectorFlatten) {
Tensor t;
float* p = t.mutable_data<float>(make_ddim({1, 2, 3}), platform::CPUPlace());
for (int i = 0; i < 1 * 2 * 3; i++) {
p[i] = static_cast<float>(i);
}
EigenVector<float>::Type ev = EigenVector<float>::Flatten(t);
ASSERT_EQ(1 * 2 * 3, ev.dimension(0));
for (int i = 0; i < 1 * 2 * 3; i++) {
ASSERT_NEAR(i, ev(i), 1e-6f);
}
}
TEST(Eigen, Matrix) {
Tensor t;
float* p = t.mutable_data<float>(make_ddim({2, 3}), platform::CPUPlace());
for (int i = 0; i < 2 * 3; i++) {
p[i] = static_cast<float>(i);
}
EigenMatrix<float>::Type em = EigenMatrix<float>::From(t);
ASSERT_EQ(2, em.dimension(0));
ASSERT_EQ(3, em.dimension(1));
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 3; j++) {
ASSERT_NEAR(i * 3 + j, em(i, j), 1e-6f);
}
}
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/enforce.h"
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/string/printf.h>
#include <exception>
#include <sstream>
namespace paddle {
namespace framework {
/**
* @brief Enforce exception. Inherits std::exception
*
* All enforce condition not met, will throw an EnforceNotMet exception.
*/
class EnforceNotMet : public std::exception {
public:
EnforceNotMet(const std::string& msg, const char* file, int fileline) {
std::ostringstream sout;
sout << msg << " at [" << file << ":" << fileline << "];";
all_msg_ = sout.str();
}
const char* what() const noexcept override { return all_msg_.c_str(); }
private:
std::string all_msg_;
};
// From https://stackoverflow.com/questions/30130930/
// __buildin_expect is in C++ 11 standard. Since the condition which enforced
// should be true in most situation, it will make the compiler generate faster
// code by adding `UNLIKELY` macro.
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
/**
* @brief Throw a EnforceNotMet exception, automatically filled __FILE__ &
* __LINE__
*
* This macro take __VA_ARGS__, user can pass any type if that type can
* serialize to std::ostream
*/
#define PADDLE_THROW(...) \
do { \
throw ::paddle::framework::EnforceNotMet( \
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \
} while (0)
/**
* @brief Enforce a condition, otherwise throw an EnforceNotMet
*/
#ifdef NDEBUG
#define PADDLE_ENFORCE(condition, ...) \
do { \
if (UNLIKELY(!(condition))) { \
PADDLE_THROW(__VA_ARGS__); \
} \
} while (0)
#else
#define PADDLE_ENFORCE(condition, ...) \
CHECK(condition) << ::paddle::string::Sprintf(__VA_ARGS__);
#endif
} // namespace framework
} // namespace paddle
...@@ -66,8 +66,7 @@ TEST(OpKernel, all) { ...@@ -66,8 +66,7 @@ TEST(OpKernel, all) {
net->Run(scope, dev_ctx); net->Run(scope, dev_ctx);
ASSERT_EQ(2, infer_shape_cnt); ASSERT_EQ(2, infer_shape_cnt);
ASSERT_EQ(2, run_cnt); ASSERT_EQ(2, run_cnt);
ASSERT_THROW(net->AddOp(op2), std::runtime_error);
ASSERT_THROW(net->AddOp(op2), EnforceNotMet);
} }
TEST(AddBackwardOp, TestGradOp) { TEST(AddBackwardOp, TestGradOp) {
auto net = std::make_shared<PlainNet>(); auto net = std::make_shared<PlainNet>();
......
...@@ -91,7 +91,7 @@ TEST(OpRegistry, IllegalAttr) { ...@@ -91,7 +91,7 @@ TEST(OpRegistry, IllegalAttr) {
try { try {
paddle::framework::OperatorPtr op __attribute__((unused)) = paddle::framework::OperatorPtr op __attribute__((unused)) =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
} catch (paddle::framework::EnforceNotMet err) { } catch (std::runtime_error& err) {
caught = true; caught = true;
std::string msg = "larger_than check fail"; std::string msg = "larger_than check fail";
const char* err_msg = err.what(); const char* err_msg = err.what();
...@@ -138,7 +138,7 @@ TEST(OpRegistry, CustomChecker) { ...@@ -138,7 +138,7 @@ TEST(OpRegistry, CustomChecker) {
try { try {
paddle::framework::OperatorPtr op __attribute__((unused)) = paddle::framework::OperatorPtr op __attribute__((unused)) =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
} catch (paddle::framework::EnforceNotMet err) { } catch (std::runtime_error& err) {
caught = true; caught = true;
std::string msg = "Attribute 'test_attr' is required!"; std::string msg = "Attribute 'test_attr' is required!";
const char* err_msg = err.what(); const char* err_msg = err.what();
...@@ -157,7 +157,7 @@ TEST(OpRegistry, CustomChecker) { ...@@ -157,7 +157,7 @@ TEST(OpRegistry, CustomChecker) {
try { try {
paddle::framework::OperatorPtr op __attribute__((unused)) = paddle::framework::OperatorPtr op __attribute__((unused)) =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
} catch (paddle::framework::EnforceNotMet err) { } catch (std::runtime_error& err) {
caught = true; caught = true;
std::string msg = "'test_attr' must be even!"; std::string msg = "'test_attr' must be even!";
const char* err_msg = err.what(); const char* err_msg = err.what();
...@@ -196,7 +196,7 @@ TEST(ProtoMaker, DuplicatedAttr) { ...@@ -196,7 +196,7 @@ TEST(ProtoMaker, DuplicatedAttr) {
pd::OpProto op_proto; pd::OpProto op_proto;
pd::OpAttrChecker op_checker; pd::OpAttrChecker op_checker;
auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker); auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet); ASSERT_THROW(proto_maker.Validate(), std::runtime_error);
} }
class TestInOutProtoMaker : public pd::OpProtoAndCheckerMaker { class TestInOutProtoMaker : public pd::OpProtoAndCheckerMaker {
...@@ -212,5 +212,5 @@ TEST(ProtoMaker, DuplicatedInOut) { ...@@ -212,5 +212,5 @@ TEST(ProtoMaker, DuplicatedInOut) {
pd::OpProto op_proto; pd::OpProto op_proto;
pd::OpAttrChecker op_checker; pd::OpAttrChecker op_checker;
auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet); ASSERT_THROW(proto_maker.Validate(), std::runtime_error);
} }
...@@ -19,9 +19,8 @@ limitations under the License. */ ...@@ -19,9 +19,8 @@ limitations under the License. */
#include <memory> #include <memory>
#include <typeindex> #include <typeindex>
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/framework/enforce.h"
#include "paddle/framework/tensor_types.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
...@@ -35,6 +34,15 @@ struct CastToPyBufferImpl; ...@@ -35,6 +34,15 @@ struct CastToPyBufferImpl;
namespace framework { namespace framework {
class Tensor { class Tensor {
template <bool less, size_t i, typename... args>
friend struct paddle::pybind::details::CastToPyBufferImpl;
template <typename T, size_t D, int MajorType, typename IndexType>
friend struct EigenTensor;
template <typename T, int MajorType, typename IndexType>
friend struct EigenVector;
public: public:
Tensor() : offset_(0) {} Tensor() : offset_(0) {}
...@@ -46,7 +54,7 @@ class Tensor { ...@@ -46,7 +54,7 @@ class Tensor {
} }
template <typename T> template <typename T>
T* raw_data() const { T* data() {
CheckDims<T>(); CheckDims<T>();
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_); offset_);
...@@ -86,66 +94,6 @@ class Tensor { ...@@ -86,66 +94,6 @@ class Tensor {
offset_); offset_);
} }
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::Tensor shaped(DDim new_dims) {
Eigen::array<Eigen::DenseIndex, NDIMS> dims =
paddle::framework::ToEigenDSizes<NDIMS>(new_dims);
return typename TTypes<T, NDIMS>::Tensor(raw_data<T>(), dims);
}
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::Tensor tensor() {
return typename TTypes<T, NDIMS>::Tensor(
raw_data<T>(), paddle::framework::ToEigenDSizes<NDIMS>(dims_));
}
// flat to rank = 1
template <typename T>
typename TTypes<T>::Flat flat() {
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
}
// to TensorType Vec
template <typename T>
typename TTypes<T>::Vec vec() {
return tensor<T, 1>();
}
// to TensorType Matrix
template <typename T>
typename TTypes<T>::Matrix matrix() {
return tensor<T, 2>();
}
// const versions of all the methods above.
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::Tensor shaped(DDim new_dims) const {
Eigen::array<Eigen::DenseIndex, NDIMS> dims =
paddle::framework::ToEigenDSizes<NDIMS>(new_dims);
return typename TTypes<T, NDIMS>::Tensor(data<T>(), dims);
}
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::ConstantTensor tensor() const {
return typename TTypes<T, NDIMS>::Tensor(
data<T>(), paddle::framework::ToEigenDSizes<NDIMS>(dims_));
}
template <typename T>
typename TTypes<T>::ConstFlat flat() const {
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
}
template <typename T>
typename TTypes<T>::ConstVec vec() const {
return tensor<T, 1>();
}
template <typename T>
typename TTypes<T>::ConstMatrix matrix() const {
return tensor<T, 2>();
}
template <typename T> template <typename T>
void ShareDataFrom(const Tensor& src) { void ShareDataFrom(const Tensor& src) {
src.CheckDims<T>(); src.CheckDims<T>();
...@@ -251,8 +199,6 @@ class Tensor { ...@@ -251,8 +199,6 @@ class Tensor {
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated. std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_; DDim dims_;
size_t offset_; // marks the begin of tensor data area. size_t offset_; // marks the begin of tensor data area.
template <bool less, size_t i, typename... args>
friend struct paddle::pybind::details::CastToPyBufferImpl;
}; };
} // namespace framework } // namespace framework
......
...@@ -33,7 +33,7 @@ TEST(Tensor, DataAssert) { ...@@ -33,7 +33,7 @@ TEST(Tensor, DataAssert) {
bool caught = false; bool caught = false;
try { try {
src_tensor.data<double>(); src_tensor.data<double>();
} catch (paddle::framework::EnforceNotMet err) { } catch (std::runtime_error& err) {
caught = true; caught = true;
std::string msg = std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first."; "Tenosr holds no memory. Call Tensor::mutable_data first.";
...@@ -107,7 +107,7 @@ TEST(Tensor, ShareDataFrom) { ...@@ -107,7 +107,7 @@ TEST(Tensor, ShareDataFrom) {
bool caught = false; bool caught = false;
try { try {
dst_tensor.ShareDataFrom<float>(src_tensor); dst_tensor.ShareDataFrom<float>(src_tensor);
} catch (EnforceNotMet err) { } catch (std::runtime_error& err) {
caught = true; caught = true;
std::string msg = std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first."; "Tenosr holds no memory. Call Tensor::mutable_data first.";
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
namespace framework {
// Helper to define Tensor types given that the scalar is of type T.
template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex>
struct TTypes {
// Rank-<NDIMS> tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Tensor;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstTensor;
// Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
typedef Eigen::TensorMap<
Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Scalar;
typedef Eigen::TensorMap<Eigen::TensorFixedSize<const T, Eigen::Sizes<>,
Eigen::RowMajor, IndexType>,
Eigen::Aligned>
ConstScalar;
// Rank-1 tensor (vector) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Flat;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstFlat;
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Vec;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstVec;
// Rank-2 tensor (matrix) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Matrix;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstMatrix;
};
} // namespace framework
} // namespace paddle
...@@ -36,6 +36,7 @@ if(WITH_GPU) ...@@ -36,6 +36,7 @@ if(WITH_GPU)
add_simple_unittest(MulOpTest) add_simple_unittest(MulOpTest)
add_simple_unittest(CosSimOpTest) add_simple_unittest(CosSimOpTest)
add_simple_unittest(RowConvOpTest) add_simple_unittest(RowConvOpTest)
add_simple_unittest(CropOpTest)
endif() endif()
add_simple_unittest(ConvOpTest) add_simple_unittest(ConvOpTest)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "CropOp.h"
#include "paddle/function/TensorShape.h"
#include "paddle/math/Vector.h"
namespace paddle {
template <>
void Crop<DEVICE_TYPE_CPU>(real* outputs,
const real* inputs,
const TensorShape inShape,
const TensorShape outShape,
const FuncConfig& conf) {
std::vector<uint32_t> crop_corner =
conf.get<std::vector<uint32_t>>("crop_corner");
int cCrop = crop_corner[1];
int hCrop = crop_corner[2];
int wCrop = crop_corner[3];
int num = inShape[0];
int inC = inShape[1];
int inH = inShape[2];
int inW = inShape[3];
int outC = outShape[1];
int outH = outShape[2];
int outW = outShape[3];
for (int n = 0; n < num; n++) {
for (int c = 0; c < outC; c++) {
for (int h = 0; h < outH; h++) {
int outoff = ((n * outC + c) * outH + h) * outW;
int inoff = ((n * inC + c + cCrop) * inH + h + hCrop) * inW + wCrop;
memcpy(outputs + outoff, inputs + inoff, outW * sizeof(real));
}
}
}
}
template <>
void CropGrad<DEVICE_TYPE_CPU>(const real* inGrad,
real* outGrad,
const TensorShape inShape,
const TensorShape outShape,
const FuncConfig& conf) {
std::vector<uint32_t> crop_corner =
conf.get<std::vector<uint32_t>>("crop_corner");
int cCrop = crop_corner[1];
int hCrop = crop_corner[2];
int wCrop = crop_corner[3];
int num = outShape[0];
int outC = outShape[1];
int outH = outShape[2];
int outW = outShape[3];
int inC = inShape[1];
int inH = inShape[2];
int inW = inShape[3];
for (int n = 0; n < num; n++) {
for (int c = 0; c < inC; c++) {
for (int h = 0; h < inH; h++) {
int outoff = ((n * outC + c + cCrop) * outH + h + hCrop) * outW + wCrop;
int inoff = ((n * inC + c) * inH + h) * inW;
CpuVector inG = CpuVector(inW, const_cast<real*>(inGrad + inoff));
CpuVector outG = CpuVector(inW, outGrad + outoff);
outG += inG;
}
}
}
}
/**
* \brief Crop input according to the specify corner and shape.
* The input and output is a 4D tensor. In CropFunc, we only
* crop the 2nd to 4th dimension.
*
* Argument in this Function:
* \param pad_ A struct object contains the cropping corner and shape.
* \param inputs A 4D tensor, only one input.
* \param outputs A 4D tensor, the output value after cropping.
*
* For example,
* Input(2,2,2,3) = [
* [ [[1,2,3], [3,4,5]],
* [[2,3,5], [1,6,7]] ],
* [ [[4,3,1], [1,8,7]],
* [[3,8,9], [2,3,5]] ]
* ] # the input shape is (2,2,2,3)
*
* pad_: if corner = (0,1,1) and crop_shape = (2,1,2)
* Output(2,2,1,2) = [
* [ [[4,5]],
* [[6,7]] ],
* [ [[8,7]],
* [[3,5]] ]
* ] # the input shape is (2,2,2,3)
*/
template <DeviceType Device>
class CropFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override { conf_ = config; }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());
CHECK_EQ(1UL, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
TensorShape inShape = inputs[0].shape();
TensorShape outShape = outputs[0].shape();
Crop<Device>(outputs[0].data<real>(),
inputs[0].data<real>(),
inShape,
outShape,
conf_);
}
private:
FuncConfig conf_;
};
/**
* \brief The backward propagation of cropping Function.
*
* Argument in this Function:
* \param crop_ The same meaning as it in CropFunc.
* \param inputs The gradient with respect to the output value of CropFunc.
* \param outputs The gradient with respect to the input value of CropFunc.
*/
template <DeviceType Device>
class CropGradFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override { conf_ = config; }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());
CHECK_EQ(1UL, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
TensorShape outShape = outputs[0].shape();
TensorShape inShape = inputs[0].shape();
CropGrad<Device>(inputs[0].data<real>(),
outputs[0].data<real>(),
inShape,
outShape,
conf_);
}
private:
FuncConfig conf_;
};
REGISTER_TYPED_FUNC(Crop, CPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(Crop, GPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace paddle {
/**
* \brief This funtion crops inputs according to the specify start point and
*shape.
*
* \param[out] outputs save results.
* \param[in] inputs input data.
* \param[in] inShape the shape of input tensor.
* \param[in] conf the cropping config
*/
template <DeviceType Device>
void Crop(real* outputs,
const real* inputs,
const TensorShape inShape,
const TensorShape outShape,
const FuncConfig& conf);
/**
* \brief Cropping operation backward.
*
* \param[out] inGrad gradients of previous layer
* \param[in] outGrad output gradient
* \param[in] inShape the shape of input tensor.
* \param[in] conf the cropping config
*/
template <DeviceType Device>
void CropGrad(const real* inGrad,
real* outGrad,
const TensorShape inShape,
const TensorShape outShape,
const FuncConfig& conf);
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "CropOp.h"
namespace paddle {
__global__ void KeCrop(real* outputs, const real* inputs,
int inC, int inH, int inW,
int cropC, int cropH, int cropW,
int outC, int outH, int outW, int nthreads) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < nthreads) {
const int w = idx % outW;
const int h = (idx / outW) % outH;
const int c = (idx / outW / outH) % outC;
const int n = idx / outW / outH / outC;
const int off = ((n * inC + c + cropC) * inH + h + cropH) * inW + cropW + w;
outputs[idx] = inputs[off];
}
}
template <>
void Crop<DEVICE_TYPE_GPU>(real* outputs,
const real* inputs,
const TensorShape inShape,
const TensorShape outShape,
const FuncConfig& conf) {
std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner");
int cropC = crop_corner[1];
int cropH = crop_corner[2];
int cropW = crop_corner[3];
int num = inShape[0];
int inC = inShape[1];
int inH = inShape[2];
int inW = inShape[3];
int outC = outShape[1];
int outH = outShape[2];
int outW = outShape[3];
size_t nth = num * outC * outH * outW;
int blockSize = 1024;
int gridSize = (nth + blockSize - 1) / blockSize;
KeCrop<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(outputs, inputs, inC, inH, inW, cropC, cropH, cropW,
outC, outH, outW, nth);
CHECK_SYNC("Crop");
}
__global__ void KeCropDiff(const real* inGrad, real* outGrad,
int inC, int inH, int inW,
int cropC, int cropH, int cropW,
int outC, int outH, int outW, int nthreads) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < nthreads) {
const int w = idx % inW;
const int h = (idx / inW) % inH;
const int c = (idx / inW / inH) % inC;
const int n = idx / inW / inH / inC;
const int off = ((n * outC + c + cropC) * outH + h + cropH) * outW + cropW + w;
outGrad[off] += inGrad[idx];
}
}
template <>
void CropGrad<DEVICE_TYPE_GPU>(const real* inGrad,
real* outGrad,
const TensorShape inShape,
const TensorShape outShape,
const FuncConfig& conf) {
std::vector<uint32_t> crop_corner = conf.get<std::vector<uint32_t>>("crop_corner");
int cropC = crop_corner[1];
int cropH = crop_corner[2];
int cropW = crop_corner[3];
int num = outShape[0];
int outC = outShape[1];
int outH = outShape[2];
int outW = outShape[3];
int inC = inShape[1];
int inH = inShape[2];
int inW = inShape[3];
size_t nth = num * inC * inH * inW;
int blockSize = 1024;
int gridSize = (nth + blockSize - 1) / blockSize;
KeCropDiff <<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(inGrad, outGrad, inC, inH, inW, cropC, cropH, cropW,
outC, outH, outW, nth);
CHECK_SYNC("CropGrad");
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "FunctionTest.h"
namespace paddle {
TEST(Crop, real) {
for (size_t numSamples : {5, 32}) {
for (size_t channels : {5, 5, 32}) {
for (size_t imgSizeH : {5, 33, 100}) {
for (size_t imgSizeW : {5, 32, 96}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
for (bool test_grad : {false, true}) {
CpuGpuFuncCompare compare(
test_grad ? "CropGrad" : "Crop",
FuncConfig()
.set<std::vector<uint32_t>>("crop_corner", {0, 1, 1, 1})
.set<std::vector<uint32_t>>("crop_shape", {0, 2, 3, 3}));
TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW};
TensorShape outDims{numSamples, 2, 3, 3};
compare.addInputs(
BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims));
compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT,
test_grad ? inDims : outDims,
test_grad ? ADD_TO : ASSIGN_TO),
test_grad ? ADD_TO : ASSIGN_TO);
compare.run();
}
}
}
}
}
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "CropLayer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_LAYER(crop, CropLayer);
bool CropLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
CHECK_LE(static_cast<int>(inputLayers_.size()), 2);
CHECK_GE(static_cast<int>(inputLayers_.size()), 1);
crop_axis_ = config_.axis();
for (int i = 0; i < config_.offset_size(); i++) {
crop_offsets_.push_back(config_.offset(i));
}
// 1. get input_0 shape
auto& input0_img_conf = config_.inputs(0).image_conf();
inDims_ = TensorShape({0,
input0_img_conf.channels(),
input0_img_conf.has_img_size_y()
? input0_img_conf.img_size_y()
: input0_img_conf.img_size(),
input0_img_conf.img_size()});
// 2. get target dims from config
if (config_.inputs_size() == 1) {
targetDims_ = TensorShape({config_.shape(0),
config_.shape(1),
config_.shape(2),
config_.shape(3)});
} else {
// 2. get input_1 shape
auto& input1_img_conf = config_.inputs(1).image_conf();
targetDims_ = TensorShape({0,
input1_img_conf.channels(),
input1_img_conf.has_img_size_y()
? input1_img_conf.img_size_y()
: input1_img_conf.img_size(),
input1_img_conf.img_size()});
}
// 3. get final crop corner
int dimSize = 4;
crop_corner_ = {0, 0, 0, 0};
for (int i = 0; i < dimSize; i++) {
if (i >= crop_axis_) {
if (crop_offsets_.size() > 1) {
crop_corner_[i] = crop_offsets_[i - crop_axis_];
} else {
crop_corner_[i] = crop_offsets_[0];
}
}
}
outDims_ = TensorShape(4);
createFunction(
forward_, "Crop", FuncConfig().set("crop_corner", crop_corner_));
createFunction(
backward_, "CropGrad", FuncConfig().set("crop_corner", crop_corner_));
return true;
}
void CropLayer::setOutDims() {
MatrixPtr input = inputLayers_[1]->getOutputValue();
size_t batchSize = input->getHeight();
// get target dims from input_1
if (config_.inputs_size() == 2) {
targetDims_.setDim(0, batchSize);
int ch = config_.inputs(0).image_conf().channels();
if (ch != 0) targetDims_.setDim(1, ch);
int h = inputLayers_[1]->getOutput().getFrameHeight();
if (h != 0) targetDims_.setDim(2, h);
int w = inputLayers_[1]->getOutput().getFrameWidth();
if (w != 0) targetDims_.setDim(3, w);
}
// get final crop shape from target dims and crop axis
std::vector<uint32_t> crop_shape;
int dimSize = 4;
for (int i = 0; i < dimSize; i++) {
if (i >= crop_axis_) {
crop_shape.push_back(targetDims_[i]);
} else {
crop_shape.push_back(inDims_[i]);
}
}
outDims_.reshape(
{crop_shape[0], crop_shape[1], crop_shape[2], crop_shape[3]});
output_.setFrameHeight(crop_shape[2]);
output_.setFrameWidth(crop_shape[3]);
}
void CropLayer::setInDims() {
MatrixPtr input = inputLayers_[0]->getOutputValue();
size_t batchSize = input->getHeight();
inDims_.setDim(0, batchSize);
int h = inputLayers_[0]->getOutput().getFrameHeight();
if (h != 0) inDims_.setDim(2, h);
int w = inputLayers_[0]->getOutput().getFrameWidth();
if (w != 0) inDims_.setDim(3, w);
}
void CropLayer::forward(PassType passType) {
Layer::forward(passType);
setInDims();
setOutDims();
int size = outDims_[1] * outDims_[2] * outDims_[3];
resetOutput(outDims_[0], size);
MatrixPtr outV = getOutputValue();
REGISTER_TIMER_INFO("CropForward", getName().c_str());
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(0), inDims_);
outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO);
forward_[0]->calc(inputs, outputs);
}
void CropLayer::backward(const UpdateCallback& callback) {
(void)callback;
REGISTER_TIMER_INFO("CropBackward", getName().c_str());
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getOutputGrad(), outDims_);
outputs.addArg(*getInputGrad(0), inDims_, ADD_TO);
backward_[0]->calc(inputs, outputs);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
namespace paddle {
/**
* \brief This layer crop input according to the specify conf.
* input_0: input to be cropped
* input_1: optional reference input
* axis: start dimension to be croped
* offset: offset of cropping in each dimension
* shape: if reference input layer was not setted,
* crop input as this shape conf
*/
class CropLayer : public Layer {
public:
explicit CropLayer(const LayerConfig& config) : Layer(config) {}
~CropLayer() {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
protected:
void setOutDims();
void setInDims();
int32_t crop_axis_;
std::vector<uint32_t> crop_offsets_;
std::vector<uint32_t> crop_corner_;
TensorShape inDims_;
TensorShape targetDims_;
TensorShape outDims_;
};
} // namespace paddle
...@@ -56,7 +56,7 @@ add_test(NAME test_DetectionOutput ...@@ -56,7 +56,7 @@ add_test(NAME test_DetectionOutput
add_unittest_without_exec(test_ConvUnify add_unittest_without_exec(test_ConvUnify
test_ConvUnify.cpp test_ConvUnify.cpp
LayerGradUtil.cpp) LayerGradUtil.cpp)
add_test(NAME test_ConvUnify add_test(NAME test_ConvUnify
COMMAND test_ConvUnify) COMMAND test_ConvUnify)
################# test_BatchNorm ####################### ################# test_BatchNorm #######################
......
...@@ -1802,6 +1802,34 @@ TEST(Layer, RowConvLayer) { ...@@ -1802,6 +1802,34 @@ TEST(Layer, RowConvLayer) {
} }
} }
TEST(Layer, CropLayer) {
TestConfig config;
// config input_0
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
ImageConfig* img = input->mutable_image_conf();
img->set_channels(4);
img->set_img_size(16);
config.layerConfig.set_axis(2);
config.layerConfig.add_offset(0);
config.layerConfig.add_offset(0);
// config input_1
config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0});
input = config.layerConfig.add_inputs();
img = input->mutable_image_conf();
img->set_channels(2);
img->set_img_size(8);
// config crop layer
config.layerConfig.set_type("crop");
config.layerConfig.set_name("cropLayer");
for (auto useGpu : {false, true}) {
testLayerGrad(config, "crop", 100, false, useGpu, false);
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/system_allocator.h"
#include "paddle/platform/assert.h" #include "paddle/platform/assert.h"
#include "paddle/platform/error.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/gpu_info.h" #include "paddle/platform/gpu_info.h"
#include <stdlib.h> // for malloc and free #include <stdlib.h> // for malloc and free
...@@ -128,8 +128,7 @@ void GPUAllocator::Free(void* p, size_t size, size_t index) { ...@@ -128,8 +128,7 @@ void GPUAllocator::Free(void* p, size_t size, size_t index) {
// process is terminating, in which case we don't care if // process is terminating, in which case we don't care if
// cudaFree succeeds. // cudaFree succeeds.
if (err != cudaErrorCudartUnloading) { if (err != cudaErrorCudartUnloading) {
platform::throw_on_error(err, PADDLE_ENFORCE(err, "cudaFree{Host} failed in GPUAllocator::Free.");
"cudaFree{Host} failed in GPUAllocator::Free.");
} }
} }
......
...@@ -51,3 +51,5 @@ op_library(softmax_op SRCS softmax_op.cc softmax_op.cu) ...@@ -51,3 +51,5 @@ op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op
softmax_op net) softmax_op net)
op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
...@@ -29,8 +30,10 @@ public: ...@@ -29,8 +30,10 @@ public:
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
output->flat<T>().device(*(context.GetEigenDevice<Place>())) = framework::EigenVector<T>::Flatten(*output).device(
input0.flat<T>() + input1.flat<T>(); *(context.GetEigenDevice<Place>())) =
framework::EigenVector<T>::Flatten(input0) +
framework::EigenVector<T>::Flatten(input1);
} }
}; };
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sgd_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle {
namespace operators {
class SGDOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "Input size of SGDOp must be two");
PADDLE_ENFORCE(outputs.size() == 1, "Output size of SGDOp must be one");
PADDLE_ENFORCE(inputs[0] != nullptr, "inputs[0] mast be set");
PADDLE_ENFORCE(inputs[1] != nullptr, "inputs[1] mast be set");
PADDLE_ENFORCE(outputs[0] != nullptr, "outputs[0] mast be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
"Two input of SGD Op's dimension must be same.");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("param", "input parameter");
AddInput("grad", "input gradient");
AddOutput("param_out", "output parameter");
AddAttr<float>("learning_rate", "learning rate of sgd");
AddComment(R"DOC(
Simplest sgd algorithm.
param_out = param - learning_rate * grad;
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(sgd, paddle::operators::SGDOp, paddle::operators::SGDOpMaker);
typedef paddle::operators::SGDOpKernel<::paddle::platform::CPUPlace, float>
SGDOpKernel_CPU_float;
REGISTER_OP_CPU_KERNEL(sgd, SGDOpKernel_CPU_float);
#include "paddle/operators/sgd_op.h"
#include "paddle/framework/op_registry.h"
typedef paddle::operators::SGDOpKernel<::paddle::platform::GPUPlace, float> SGDOpKernel_GPU_float;
REGISTER_OP_GPU_KERNEL(sgd, SGDOpKernel_GPU_float);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class SGDOpKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext& ctx) const override {
auto param = ctx.Input("param")->Get<framework::Tensor>();
auto grad = ctx.Input("grad")->Get<framework::Tensor>();
auto* param_out = ctx.Output(0)->GetMutable<framework::Tensor>();
float lr = ctx.op_.GetAttr<float>("learning_rate");
param_out->mutable_data<T>(ctx.GetPlace());
framework::EigenVector<T>::Flatten(*param_out)
.device(*(ctx.GetEigenDevice<Place>())) =
framework::EigenVector<T>::Flatten(param) -
lr * framework::EigenVector<T>::Flatten(grad);
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <paddle/framework/op_registry.h>
USE_OP(sgd);
TEST(SGDOp, GetOpProto) {
auto& protos = paddle::framework::OpRegistry::protos();
auto it = protos.find("sgd");
ASSERT_NE(it, protos.end());
}
...@@ -8,6 +8,8 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) ...@@ -8,6 +8,8 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
add_subdirectory(dynload) add_subdirectory(dynload)
cc_test(enforce_test SRCS enforce_test.cc)
IF(WITH_GPU) IF(WITH_GPU)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader) set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
ELSE() ELSE()
......
...@@ -22,7 +22,6 @@ limitations under the License. */ ...@@ -22,7 +22,6 @@ limitations under the License. */
#endif #endif
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/platform/error.h"
DEFINE_double(fraction_of_cpu_memory_to_use, 1, DEFINE_double(fraction_of_cpu_memory_to_use, 1,
"Default use 100% of CPU memory for PaddlePaddle," "Default use 100% of CPU memory for PaddlePaddle,"
......
...@@ -11,12 +11,13 @@ limitations under the License. */ ...@@ -11,12 +11,13 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/dynload/curand.h" #include "paddle/platform/dynload/curand.h"
#include "paddle/platform/error.h"
#include "paddle/platform/gpu_info.h" #include "paddle/platform/gpu_info.h"
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#endif #endif
...@@ -71,8 +72,7 @@ class CUDADeviceContext : public DeviceContext { ...@@ -71,8 +72,7 @@ class CUDADeviceContext : public DeviceContext {
public: public:
explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) { explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) {
GPUPlaceGuard guard(gpu_place_); GPUPlaceGuard guard(gpu_place_);
paddle::platform::throw_on_error(cudaStreamCreate(&stream_), PADDLE_ENFORCE(cudaStreamCreate(&stream_), "cudaStreamCreate failed");
"cudaStreamCreate failed");
eigen_stream_.reset(new Eigen::CudaStreamDevice(&stream_)); eigen_stream_.reset(new Eigen::CudaStreamDevice(&stream_));
eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get())); eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
} }
...@@ -83,8 +83,8 @@ class CUDADeviceContext : public DeviceContext { ...@@ -83,8 +83,8 @@ class CUDADeviceContext : public DeviceContext {
} }
void Wait() { void Wait() {
paddle::platform::throw_on_error(cudaStreamSynchronize(stream_), PADDLE_ENFORCE(cudaStreamSynchronize(stream_),
"cudaStreamSynchronize failed"); "cudaStreamSynchronize failed");
} }
cudaStream_t stream() { return stream_; } cudaStream_t stream() { return stream_; }
...@@ -94,12 +94,11 @@ class CUDADeviceContext : public DeviceContext { ...@@ -94,12 +94,11 @@ class CUDADeviceContext : public DeviceContext {
cublasHandle_t cublas_handle() { cublasHandle_t cublas_handle() {
if (!blas_handle_) { if (!blas_handle_) {
GPUPlaceGuard guard(gpu_place_); GPUPlaceGuard guard(gpu_place_);
PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_) == PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_),
CUBLAS_STATUS_SUCCESS,
"cublasCreate failed"); "cublasCreate failed");
PADDLE_ENFORCE(paddle::platform::dynload::cublasSetStream( PADDLE_ENFORCE(
blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS, paddle::platform::dynload::cublasSetStream(blas_handle_, stream_),
"cublasSetStream failed"); "cublasSetStream failed");
} }
return blas_handle_; return blas_handle_;
} }
...@@ -107,12 +106,11 @@ class CUDADeviceContext : public DeviceContext { ...@@ -107,12 +106,11 @@ class CUDADeviceContext : public DeviceContext {
cudnnHandle_t cudnn_handle() { cudnnHandle_t cudnn_handle() {
if (!dnn_handle_) { if (!dnn_handle_) {
GPUPlaceGuard guard(gpu_place_); GPUPlaceGuard guard(gpu_place_);
PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_) == PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_),
CUDNN_STATUS_SUCCESS,
"cudnnCreate failed"); "cudnnCreate failed");
PADDLE_ENFORCE(paddle::platform::dynload::cudnnSetStream( PADDLE_ENFORCE(
dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS, paddle::platform::dynload::cudnnSetStream(dnn_handle_, stream_),
"cudnnSetStream failed"); "cudnnSetStream failed");
} }
return dnn_handle_; return dnn_handle_;
} }
...@@ -121,16 +119,15 @@ class CUDADeviceContext : public DeviceContext { ...@@ -121,16 +119,15 @@ class CUDADeviceContext : public DeviceContext {
if (!rand_generator_) { if (!rand_generator_) {
GPUPlaceGuard guard(gpu_place_); GPUPlaceGuard guard(gpu_place_);
PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator( PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator(
&rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) == &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT),
CURAND_STATUS_SUCCESS,
"curandCreateGenerator failed"); "curandCreateGenerator failed");
PADDLE_ENFORCE( PADDLE_ENFORCE(
paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed( paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed(
rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS, rand_generator_, random_seed_),
"curandSetPseudoRandomGeneratorSeed failed"); "curandSetPseudoRandomGeneratorSeed failed");
PADDLE_ENFORCE(paddle::platform::dynload::curandSetStream( PADDLE_ENFORCE(
rand_generator_, stream_) == CURAND_STATUS_SUCCESS, paddle::platform::dynload::curandSetStream(rand_generator_, stream_),
"curandSetStream failed"); "curandSetStream failed");
} }
return rand_generator_; return rand_generator_;
} }
...@@ -138,26 +135,23 @@ class CUDADeviceContext : public DeviceContext { ...@@ -138,26 +135,23 @@ class CUDADeviceContext : public DeviceContext {
~CUDADeviceContext() { ~CUDADeviceContext() {
Wait(); Wait();
if (blas_handle_) { if (blas_handle_) {
PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_) == PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_),
CUBLAS_STATUS_SUCCESS,
"cublasDestroy failed"); "cublasDestroy failed");
} }
if (dnn_handle_) { if (dnn_handle_) {
PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_) == PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_),
CUDNN_STATUS_SUCCESS,
"cudnnDestroy failed"); "cudnnDestroy failed");
} }
if (rand_generator_) { if (rand_generator_) {
PADDLE_ENFORCE(paddle::platform::dynload::curandDestroyGenerator( PADDLE_ENFORCE(
rand_generator_) == CURAND_STATUS_SUCCESS, paddle::platform::dynload::curandDestroyGenerator(rand_generator_),
"curandDestroyGenerator failed"); "curandDestroyGenerator failed");
} }
eigen_stream_.reset(); eigen_stream_.reset();
eigen_device_.reset(); eigen_device_.reset();
paddle::platform::throw_on_error(cudaStreamDestroy(stream_), PADDLE_ENFORCE(cudaStreamDestroy(stream_), "cudaStreamDestroy failed");
"cudaStreamDestroy failed");
} }
private: private:
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include <string> #include <string>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/framework/enforce.h" #include "paddle/platform/enforce.h"
DEFINE_string(cudnn_dir, "", DEFINE_string(cudnn_dir, "",
"Specify path for loading libcudnn.so. For instance, " "Specify path for loading libcudnn.so. For instance, "
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <paddle/string/printf.h>
#include <sstream>
#include <stdexcept>
#include <string>
#ifndef PADDLE_ONLY_CPU
#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/dynload/curand.h"
#include <cublas_v2.h>
#include <cudnn.h>
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif // PADDLE_ONLY_CPU
namespace paddle {
namespace platform {
// Because most enforce conditions would evaluate to true, we can use
// __builtin_expect to instruct the C++ compiler to generate code that
// always forces branch prediction of true.
// This generates faster binary code. __builtin_expect is since C++11.
// For more details, please check https://stackoverflow.com/a/43870188/724872.
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#ifndef PADDLE_ONLY_CPU
template <typename... Args>
inline void throw_on_error(cudaError_t e, const Args&... args) {
if (UNLIKELY(e)) {
// clang-format off
throw thrust::system_error(
e, thrust::cuda_category(),
string::Sprintf(args...) +
string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
// clang-format on
}
}
template <typename... Args>
inline void throw_on_error(curandStatus_t stat, const Args&... args) {
if (stat != CURAND_STATUS_SUCCESS) {
// clang-format off
throw thrust::system_error(
cudaErrorLaunchFailure, thrust::cuda_category(),
string::Sprintf(args...) +
string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
// clang-format on
}
}
template <typename... Args>
inline void throw_on_error(cudnnStatus_t stat, const Args&... args) {
if (stat == CUDNN_STATUS_SUCCESS) {
return;
} else {
// clang-format off
throw std::runtime_error(
platform::dynload::cudnnGetErrorString(stat) +
string::Sprintf(args...) +
string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
// clang-format on
}
}
template <typename... Args>
inline void throw_on_error(cublasStatus_t stat, const Args&... args) {
std::string err;
if (stat == CUBLAS_STATUS_SUCCESS) {
return;
} else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
err = "CUBLAS: not initialized, ";
} else if (stat == CUBLAS_STATUS_ALLOC_FAILED) {
err = "CUBLAS: alloc failed, ";
} else if (stat == CUBLAS_STATUS_INVALID_VALUE) {
err = "CUBLAS: invalid value, ";
} else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) {
err = "CUBLAS: arch mismatch, ";
} else if (stat == CUBLAS_STATUS_MAPPING_ERROR) {
err = "CUBLAS: mapping error, ";
} else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) {
err = "CUBLAS: execution failed, ";
} else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) {
err = "CUBLAS: internal error, ";
} else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) {
err = "CUBLAS: not supported, ";
} else if (stat == CUBLAS_STATUS_LICENSE_ERROR) {
err = "CUBLAS: license error, ";
}
throw std::runtime_error(err + string::Sprintf(args...) +
string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
}
#endif // PADDLE_ONLY_CPU
template <typename... Args>
inline void throw_on_error(int stat, const Args&... args) {
if (UNLIKELY(!(stat))) {
throw std::runtime_error(
string::Sprintf(args...) +
string::Sprintf(" at [%s:%s];", __FILE__, __LINE__));
}
}
#define PADDLE_THROW(...) \
do { \
throw std::runtime_error( \
string::Sprintf(__VA_ARGS__) + \
string::Sprintf(" at [%s:%s];", __FILE__, __LINE__)); \
} while (0)
/**
* @brief Enforce a condition, otherwise throw an EnforceNotMet
*/
#define PADDLE_ENFORCE(condition, ...) \
do { \
::paddle::platform::throw_on_error(condition, __VA_ARGS__); \
} while (0)
} // namespace platform
} // namespace paddle
...@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <gtest/gtest.h> #include "paddle/platform/enforce.h"
#include <paddle/framework/enforce.h> #include "gtest/gtest.h"
TEST(ENFORCE, OK) { TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345);
...@@ -23,13 +23,14 @@ TEST(ENFORCE, FAILED) { ...@@ -23,13 +23,14 @@ TEST(ENFORCE, FAILED) {
bool in_catch = false; bool in_catch = false;
try { try {
PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123);
} catch (paddle::framework::EnforceNotMet err) { } catch (const std::runtime_error& error) {
// your error handling code here
in_catch = true; in_catch = true;
std::string msg = "Enforce is not ok 123 at all"; std::string msg = "Enforce is not ok 123 at all";
const char* what = err.what(); const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) { for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]); ASSERT_EQ(what[i], msg[i]);
} }
} }
ASSERT_TRUE(in_catch); ASSERT_TRUE(in_catch);
} }
\ No newline at end of file
#pragma once
#include <sstream>
#include <stdexcept>
#include <string>
#ifndef PADDLE_ONLY_CPU
#include <cublas_v2.h>
#include <cudnn.h>
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif // PADDLE_ONLY_CPU
namespace paddle {
namespace platform {
#ifndef PADDLE_ONLY_CPU
inline void throw_on_error(cudaError_t e, const char* message) {
if (e) {
throw thrust::system_error(e, thrust::cuda_category(), message);
}
}
inline void throw_on_error(curandStatus_t stat, const char* message) {
if (stat != CURAND_STATUS_SUCCESS) {
throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(),
message);
}
}
inline void throw_on_error(cudnnStatus_t stat, const char* message) {
std::stringstream ss;
if (stat == CUDNN_STATUS_SUCCESS) {
return;
} else {
ss << cudnnGetErrorString(stat);
ss << ", " << message;
throw std::runtime_error(ss.str());
}
}
inline void throw_on_error(cublasStatus_t stat, const char* message) {
std::stringstream ss;
if (stat == CUBLAS_STATUS_SUCCESS) {
return;
} else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
ss << "CUBLAS: not initialized";
} else if (stat == CUBLAS_STATUS_ALLOC_FAILED) {
ss << "CUBLAS: alloc failed";
} else if (stat == CUBLAS_STATUS_INVALID_VALUE) {
ss << "CUBLAS: invalid value";
} else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) {
ss << "CUBLAS: arch mismatch";
} else if (stat == CUBLAS_STATUS_MAPPING_ERROR) {
ss << "CUBLAS: mapping error";
} else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) {
ss << "CUBLAS: execution failed";
} else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) {
ss << "CUBLAS: internal error";
} else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) {
ss << "CUBLAS: not supported";
} else if (stat == CUBLAS_STATUS_LICENSE_ERROR) {
ss << "CUBLAS: license error";
}
ss << ", " << message;
throw std::runtime_error(ss.str());
}
inline void throw_on_error(cublasStatus_t stat) {
const char* message = "";
throw_on_error(stat, message);
}
#endif // PADDLE_ONLY_CPU
inline void throw_on_error(int stat, const char* message) {
if (stat) {
throw std::runtime_error(message + (", stat = " + std::to_string(stat)));
}
}
} // namespace platform
} // namespace paddle
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/gpu_info.h" #include "paddle/platform/gpu_info.h"
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/platform/error.h" #include "paddle/platform/enforce.h"
DEFINE_double(fraction_of_gpu_memory_to_use, 0.95, DEFINE_double(fraction_of_gpu_memory_to_use, 0.95,
"Default use 95% of GPU memory for PaddlePaddle," "Default use 95% of GPU memory for PaddlePaddle,"
...@@ -25,7 +25,7 @@ namespace platform { ...@@ -25,7 +25,7 @@ namespace platform {
int GetDeviceCount() { int GetDeviceCount() {
int count; int count;
throw_on_error( PADDLE_ENFORCE(
cudaGetDeviceCount(&count), cudaGetDeviceCount(&count),
"cudaGetDeviceCount failed in paddle::platform::GetDeviceCount"); "cudaGetDeviceCount failed in paddle::platform::GetDeviceCount");
return count; return count;
...@@ -33,19 +33,19 @@ int GetDeviceCount() { ...@@ -33,19 +33,19 @@ int GetDeviceCount() {
int GetCurrentDeviceId() { int GetCurrentDeviceId() {
int device_id; int device_id;
throw_on_error( PADDLE_ENFORCE(
cudaGetDevice(&device_id), cudaGetDevice(&device_id),
"cudaGetDevice failed in paddle::platform::GetCurrentDeviceId"); "cudaGetDevice failed in paddle::platform::GetCurrentDeviceId");
return device_id; return device_id;
} }
void SetDeviceId(int id) { void SetDeviceId(int id) {
throw_on_error(cudaSetDevice(id), PADDLE_ENFORCE(cudaSetDevice(id),
"cudaSetDevice failed in paddle::platform::SetDeviceId"); "cudaSetDevice failed in paddle::platform::SetDeviceId");
} }
void GpuMemoryUsage(size_t& available, size_t& total) { void GpuMemoryUsage(size_t& available, size_t& total) {
throw_on_error(cudaMemGetInfo(&available, &total), PADDLE_ENFORCE(cudaMemGetInfo(&available, &total),
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage"); "cudaMemGetInfo failed in paddle::platform::GetMemoryUsage");
} }
......
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
add_op fc_op) add_op fc_op sgd_op)
...@@ -28,6 +28,7 @@ namespace pd = paddle::framework; ...@@ -28,6 +28,7 @@ namespace pd = paddle::framework;
USE_OP(add_two); USE_OP(add_two);
USE_OP_WITHOUT_KERNEL(fc); USE_OP_WITHOUT_KERNEL(fc);
USE_OP(sgd);
PYBIND11_PLUGIN(core) { PYBIND11_PLUGIN(core) {
py::module m("core", "C++ core of Paddle Paddle"); py::module m("core", "C++ core of Paddle Paddle");
......
...@@ -472,10 +472,16 @@ message LayerConfig { ...@@ -472,10 +472,16 @@ message LayerConfig {
// blank label used in ctc loss // blank label used in ctc loss
optional uint32 blank = 52 [default = 0]; optional uint32 blank = 52 [default = 0];
// stride parameter for seqlastins layer, AverageLayer, MaxLayer, which // stride parameter for seqlastins layer, AverageLayer, MaxLayer, which
// controls the scope of pooling operation. can be set > 0. // controls the scope of pooling operation. can be set > 0.
// leave empty or set to -1 to disable this stride pooling. // leave empty or set to -1 to disable this stride pooling.
optional int32 seq_pool_stride = 53 [default = -1]; optional int32 seq_pool_stride = 53 [default = -1];
// for crop layer
optional int32 axis = 54 [default = 2];
repeated uint32 offset = 55;
repeated uint32 shape = 56;
} }
message EvaluatorConfig { message EvaluatorConfig {
......
...@@ -1998,6 +1998,23 @@ class PadLayer(LayerBase): ...@@ -1998,6 +1998,23 @@ class PadLayer(LayerBase):
self.config.size = out_ch * out_h * out_w self.config.size = out_ch * out_h * out_w
@config_layer('crop')
class CropLayer(LayerBase):
def __init__(self, name, inputs, axis, offset, shape, **xargs):
super(CropLayer, self).__init__(name, 'crop', 0, inputs=inputs, **xargs)
self.config.axis = axis
self.config.offset.extend(offset)
self.config.shape.extend(shape)
# get channel, width and height from input_0 layer
input_layer = self.get_input_layer(0)
image_conf = self.config.inputs[0].image_conf
image_conf.img_size = input_layer.width
image_conf.img_size_y = input_layer.height
image_conf.channels = input_layer.size / (input_layer.width *
input_layer.height)
@config_layer('batch_norm') @config_layer('batch_norm')
class BatchNormLayer(LayerBase): class BatchNormLayer(LayerBase):
layer_type = 'batch_norm' layer_type = 'batch_norm'
......
...@@ -127,6 +127,7 @@ __all__ = [ ...@@ -127,6 +127,7 @@ __all__ = [
'dropout_layer', 'dropout_layer',
'prelu_layer', 'prelu_layer',
'gated_unit_layer', 'gated_unit_layer',
'crop_layer',
] ]
...@@ -218,6 +219,7 @@ class LayerType(object): ...@@ -218,6 +219,7 @@ class LayerType(object):
SMOOTH_L1 = 'smooth_l1' SMOOTH_L1 = 'smooth_l1'
PRELU = 'prelu' PRELU = 'prelu'
CROP_LAYER = 'crop'
@staticmethod @staticmethod
def is_layer_type(type_name): def is_layer_type(type_name):
...@@ -5970,3 +5972,52 @@ def gated_unit_layer(input, ...@@ -5970,3 +5972,52 @@ def gated_unit_layer(input,
name="%s_gated_act" % name, name="%s_gated_act" % name,
input=dotmul_operator(input_proj, gate), input=dotmul_operator(input_proj, gate),
layer_attr=layer_attr) layer_attr=layer_attr)
@wrap_name_default()
@layer_support()
def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
"""
The crop layer crops images by offset and shape. User can set crop shape by
args 'shape' explicitly or by reference input layer.
The example usage is:
.. code-block:: python
crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3])
:param input: The input layer.If two inputs were setted,
the second input will be regarded as reference input
:type input: LayerOutput or Sequence
:param offset: The crop offset
:type offset: Sequence
:param axis: start axis to be cropped. To image input layer:
- 0: batch size
- 1: channels
- 2: height
- 3: width
:type partial_sum: int
:param shape: The shape to be cropped. Default is None.
:type shape: Sequence | None
:param name: Name of this layer.
:type name: basestring
:return: LayerOutput object.
:rtype: LayerOutput
"""
if isinstance(input, LayerOutput):
input = [input]
else:
assert isinstance(input, collections.Sequence)
l = Layer(
inputs=[x.name for x in input],
axis=axis,
offset=offset,
shape=shape,
name=name,
type=LayerType.CROP_LAYER,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name=name,
layer_type=LayerType.CROP_LAYER,
parents=input,
size=l.config.size)
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
data = data_layer(name='data', size=2016, height=48, width=42)
refernce_data = data_layer(name='data', size=768, height=16, width=16)
conv = img_conv_layer(
input=data,
filter_size=3,
num_channels=1,
num_filters=16,
padding=1,
act=LinearActivation(),
bias_attr=True)
pool = img_pool_layer(input=conv, pool_size=2, stride=2, pool_type=MaxPooling())
crop = crop_layer(input=[pool, refernce_data], axis=2)
outputs(pad)
...@@ -217,6 +217,10 @@ def create_op_creation_method(op_proto): ...@@ -217,6 +217,10 @@ def create_op_creation_method(op_proto):
return core.Operator.create(opdesc.SerializeToString()) return core.Operator.create(opdesc.SerializeToString())
__impl__.__doc__ = get_docstring_from_op_proto(op_proto) __impl__.__doc__ = get_docstring_from_op_proto(op_proto)
__impl__.all_input_args = [var.name for var in op_proto.inputs]
__impl__.all_output_args = [var.name for var in op_proto.outputs]
__impl__.all_attr_args = [attr.name for attr in op_proto.attrs]
return __impl__ return __impl__
......
add_python_test(test_framework test_protobuf.py test_scope.py add_python_test(test_framework test_protobuf.py test_scope.py
test_default_scope_funcs.py test_op_creation_methods.py test_default_scope_funcs.py test_op_creation_methods.py
test_tensor.py test_fc_op.py) test_tensor.py test_fc_op.py test_add_two_op.py test_sgd_op.py)
import paddle.v2.framework.core as core
import unittest
import numpy
import paddle.v2.framework.create_op_creation_methods as creation
class OpTestMeta(type):
"""
Operator Test ClassMeta.
It injects `test_all` method into user's OperatorTest class, to make Python
unittest module run that method.
The `test_all` read what value is stored in `self`. It use self's values to
create and run a operator, and check whether that op is OK or not.
See `test_add_two_op` for example usage.
"""
def __new__(cls, name, bases, attrs):
obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs)
def test_all(self):
func = getattr(creation.op_creations, self.type, None)
self.assertIsNotNone(func)
scope = core.Scope(None)
kwargs = dict()
for in_name in func.all_input_args:
if hasattr(self, in_name):
kwargs[in_name] = in_name
var = scope.create_var(in_name).get_tensor()
arr = getattr(self, in_name)
var.set_dims(arr.shape)
var.set(arr)
else:
kwargs[in_name] = "@EMPTY@"
for out_name in func.all_output_args:
if hasattr(self, out_name):
kwargs[out_name] = out_name
scope.create_var(out_name).get_tensor()
for attr_name in func.all_attr_args:
if hasattr(self, attr_name):
kwargs[attr_name] = getattr(self, attr_name)
op = func(**kwargs)
op.infer_shape(scope)
ctx = core.DeviceContext.cpu_context()
op.run(scope, ctx)
for out_name in func.all_output_args:
actual = numpy.array(scope.get_var(out_name).get_tensor())
expect = getattr(self, out_name)
numpy.testing.assert_almost_equal(actual, expect)
obj.test_all = test_all
return obj
import unittest
from op_test_util import OpTestMeta
import numpy
class TestAddOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "add_two"
self.X = numpy.random.random((342, 345)).astype("float32")
self.Y = numpy.random.random((342, 345)).astype("float32")
self.Out = self.X + self.Y
if __name__ == '__main__':
unittest.main()
import unittest
import numpy
from op_test_util import OpTestMeta
class TestSGD(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "sgd"
self.param = numpy.random.random((342, 345)).astype("float32")
self.grad = numpy.random.random((342, 345)).astype("float32")
self.learning_rate = 0.1
self.param_out = self.param - self.learning_rate * self.grad
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册