提交 7fdfe1a9 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into unit_test

...@@ -36,8 +36,8 @@ include(simd) ...@@ -36,8 +36,8 @@ include(simd)
################################ Configurations ####################################### ################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF)
option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF)
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
......
...@@ -187,7 +187,13 @@ function(cc_library TARGET_NAME) ...@@ -187,7 +187,13 @@ function(cc_library TARGET_NAME)
endif() endif()
# cpplint code style # cpplint code style
add_style_check_target(${TARGET_NAME} ${cc_library_SRCS}) foreach(source_file ${cc_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS})
else(cc_library_SRCS) else(cc_library_SRCS)
if (cc_library_DEPS) if (cc_library_DEPS)
...@@ -239,6 +245,14 @@ function(nv_library TARGET_NAME) ...@@ -239,6 +245,14 @@ function(nv_library TARGET_NAME)
add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) add_dependencies(${TARGET_NAME} ${nv_library_DEPS})
target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) target_link_libraries(${TARGET_NAME} ${nv_library_DEPS})
endif() endif()
# cpplint code style
foreach(source_file ${nv_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS})
else(nv_library_SRCS) else(nv_library_SRCS)
if (nv_library_DEPS) if (nv_library_DEPS)
merge_static_libs(${TARGET_NAME} ${nv_library_DEPS}) merge_static_libs(${TARGET_NAME} ${nv_library_DEPS})
......
...@@ -25,18 +25,15 @@ limitations under the License. */ ...@@ -25,18 +25,15 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace {
typedef boost::variant<Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>,
Dim<8>, Dim<9>>
DDimVar;
}
/** /**
* \brief A dynamically sized dimension. * \brief A dynamically sized dimension.
* *
* The number of dimensions must be between [1, 9]. * The number of dimensions must be between [1, 9].
*/ */
struct DDim { struct DDim {
typedef boost::variant<Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>,
Dim<8>, Dim<9>>
DDimVar;
DDimVar var; DDimVar var;
DDim() : var(Dim<1>()) {} DDim() : var(Dim<1>()) {}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
......
...@@ -314,7 +314,7 @@ class OpRegistry { ...@@ -314,7 +314,7 @@ class OpRegistry {
static std::unordered_map<std::string, OpProto>& protos() { static std::unordered_map<std::string, OpProto>& protos() {
static std::unordered_map<std::string, OpProto> protos_; static std::unordered_map<std::string, OpProto> protos_;
return protos_; return protos_;
}; }
static std::unordered_map<std::string, std::string>& grad_ops() { static std::unordered_map<std::string, std::string>& grad_ops() {
static std::unordered_map<std::string, std::string> grad_ops_; static std::unordered_map<std::string, std::string> grad_ops_;
...@@ -336,7 +336,7 @@ class OpRegistry { ...@@ -336,7 +336,7 @@ class OpRegistry {
static std::unordered_map<std::string, OpAttrChecker>& op_checkers() { static std::unordered_map<std::string, OpAttrChecker>& op_checkers() {
static std::unordered_map<std::string, OpAttrChecker> op_checkers_; static std::unordered_map<std::string, OpAttrChecker> op_checkers_;
return op_checkers_; return op_checkers_;
}; }
static void GenerateTempVariableName(OperatorBase* op) { static void GenerateTempVariableName(OperatorBase* op) {
static std::atomic<size_t> gUniqId(0UL); static std::atomic<size_t> gUniqId(0UL);
...@@ -353,7 +353,7 @@ class OpRegistry { ...@@ -353,7 +353,7 @@ class OpRegistry {
template <typename OpType, typename ProtoMakerType> template <typename OpType, typename ProtoMakerType>
class OpRegisterHelper { class OpRegisterHelper {
public: public:
OpRegisterHelper(const char* op_type) { explicit OpRegisterHelper(const char* op_type) {
OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type); OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type);
} }
}; };
......
...@@ -284,7 +284,7 @@ class OperatorWithKernel : public OperatorBase { ...@@ -284,7 +284,7 @@ class OperatorWithKernel : public OperatorBase {
platform::Place place_; platform::Place place_;
OpKernelKey() = default; OpKernelKey() = default;
OpKernelKey(const platform::DeviceContext& dev_ctx) { explicit OpKernelKey(const platform::DeviceContext& dev_ctx) {
place_ = dev_ctx.GetPlace(); place_ = dev_ctx.GetPlace();
} }
......
...@@ -105,7 +105,16 @@ PYBIND11_PLUGIN(core) { ...@@ -105,7 +105,16 @@ PYBIND11_PLUGIN(core) {
.def("set", PyCUDATensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>) .def("set", PyCUDATensorSetFromArray<int>)
#endif #endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }); .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
.def("set_float_element",
[](Tensor &self, size_t offset, float f) {
// TODO(yuyang18): Only support GPU now.
self.data<float>()[offset] = f;
})
.def("get_float_element", [](Tensor &self, size_t offset) -> float {
// TODO(yuyang18): Only support GPU now.
return self.data<float>()[offset];
});
py::class_<Variable>(m, "Variable", R"DOC(Variable Class. py::class_<Variable>(m, "Variable", R"DOC(Variable Class.
......
...@@ -967,8 +967,9 @@ void RecurrentGradientMachine::generateSequence() { ...@@ -967,8 +967,9 @@ void RecurrentGradientMachine::generateSequence() {
size_t numSequences = getGenBatchSize(); size_t numSequences = getGenBatchSize();
resizeBootFrame(numSequences); resizeBootFrame(numSequences);
// We create only two sub-network in generation for alternate use. // We create only two sub-network in generation, one stores states of all
// Thus, we can reduce total memory of output_ in layer forward. // layers in previous time step and the other storing the states at current
// time step.
resizeOrCreateFrames(2); resizeOrCreateFrames(2);
// outFrameLines_.size() > 1UL // outFrameLines_.size() > 1UL
...@@ -1001,10 +1002,9 @@ void RecurrentGradientMachine::generateSequence() { ...@@ -1001,10 +1002,9 @@ void RecurrentGradientMachine::generateSequence() {
// init outArg // init outArg
size_t resultNum = generator_.config.num_results_per_sample(); size_t resultNum = generator_.config.num_results_per_sample();
IVector::resizeOrCreate( size_t maxGenWordCount =
generator_.outArg.ids, generator_.config.max_num_frames() * numSequences * resultNum;
generator_.config.max_num_frames() * numSequences * resultNum, IVector::resizeOrCreate(generator_.outArg.ids, maxGenWordCount, false);
false);
if (resultNum > 1) { if (resultNum > 1) {
CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size())); CHECK_LE(resultNum, static_cast<size_t>(generator_.config.beam_size()));
Matrix::resizeOrCreate(generator_.outArg.in, Matrix::resizeOrCreate(generator_.outArg.in,
...@@ -1012,6 +1012,11 @@ void RecurrentGradientMachine::generateSequence() { ...@@ -1012,6 +1012,11 @@ void RecurrentGradientMachine::generateSequence() {
/* width */ resultNum, /* width */ resultNum,
false, false,
/* useGpu */ false); /* useGpu */ false);
Matrix::resizeOrCreate(generator_.outArg.value,
/* height */ maxGenWordCount,
/* width */ 1,
false,
/* useGpu */ false);
} }
ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions, ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
numSequences + 1, numSequences + 1,
...@@ -1313,13 +1318,20 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1313,13 +1318,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
starts[0] = 0; starts[0] = 0;
if (numResults > 1) { if (numResults > 1) {
real* probs = generator_.outArg.in->getData(); real* probs = generator_.outArg.in->getData();
real* idsProb = generator_.outArg.value->getData();
size_t curPos = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
Path& path = finalPaths_[i][j]; Path& path = finalPaths_[i][j];
generator_.ids.push_back(path.ids.size()); // sequence size size_t genLen = path.ids.size();
generator_.ids.push_back(genLen); // sequence size
generator_.ids.insert( generator_.ids.insert(
generator_.ids.end(), path.ids.begin(), path.ids.end()); generator_.ids.end(), path.ids.begin(), path.ids.end());
generator_.ids.push_back(-1); // end of sequence generator_.ids.push_back(-1); // end of sequence
memcpy(idsProb + curPos, path.idsProb.data(), sizeof(real) * genLen);
curPos += genLen;
idsProb[curPos++] = -1.0;
probs[i * numResults + j] = path.logProb; probs[i * numResults + j] = path.logProb;
if (!j && dataArgsSize_) { if (!j && dataArgsSize_) {
......
...@@ -189,6 +189,11 @@ public: ...@@ -189,6 +189,11 @@ public:
*/ */
std::vector<int> ids; std::vector<int> ids;
/**
* @brief idsProb, log probability of each generated words.
*/
std::vector<real> idsProb;
/** /**
* @brief logProb, current probability of path. * @brief logProb, current probability of path.
*/ */
...@@ -228,11 +233,13 @@ public: ...@@ -228,11 +233,13 @@ public:
*/ */
Path(Path& old, int newId, real logProb, int machineId, int topIndex) Path(Path& old, int newId, real logProb, int machineId, int topIndex)
: ids(old.ids), : ids(old.ids),
idsProb(old.idsProb),
logProb(old.logProb + logProb), logProb(old.logProb + logProb),
machineId(machineId), machineId(machineId),
topIndex(topIndex), topIndex(topIndex),
seqId(old.seqId) { seqId(old.seqId) {
ids.push_back(newId); ids.push_back(newId);
idsProb.push_back(logProb);
if (!old.probHistory.empty()) { if (!old.probHistory.empty()) {
this->probHistory = old.probHistory; this->probHistory = old.probHistory;
// probHistory store current prob, not sum // probHistory store current prob, not sum
...@@ -411,8 +418,9 @@ protected: ...@@ -411,8 +418,9 @@ protected:
struct Generator { struct Generator {
GeneratorConfig config; GeneratorConfig config;
std::vector<int> ids; // store generated sequences std::vector<int> ids; // store generated sequences
Argument outArg; // final output argument std::vector<real> idsProb; // log probability of each generated word
Argument outArg; // final output argument
}; };
bool generating_; bool generating_;
Generator generator_; Generator generator_;
......
...@@ -39,7 +39,7 @@ class BuddyAllocator { ...@@ -39,7 +39,7 @@ class BuddyAllocator {
public: public:
void* Alloc(size_t unaligned_size); void* Alloc(size_t unaligned_size);
void Free(void*); void Free(void* ptr);
size_t Used(); size_t Used();
public: public:
......
...@@ -33,17 +33,17 @@ namespace detail { ...@@ -33,17 +33,17 @@ namespace detail {
*/ */
class MetadataCache { class MetadataCache {
public: public:
MetadataCache(bool uses_gpu); explicit MetadataCache(bool uses_gpu);
public: public:
/*! \brief Load the associated metadata for the specified memory block. */ /*! \brief Load the associated metadata for the specified memory block. */
Metadata load(const MemoryBlock*); Metadata load(const MemoryBlock* memory_block);
/*! \brief Store the associated metadata for the specified memory block. */ /*! \brief Store the associated metadata for the specified memory block. */
void store(MemoryBlock*, const Metadata&); void store(MemoryBlock* memory_block, const Metadata& meta_data);
/*! \brief Indicate that the specified metadata will no longer be used. */ /*! \brief Indicate that the specified metadata will no longer be used. */
void invalidate(MemoryBlock*); void invalidate(MemoryBlock* memory_block);
public: public:
MetadataCache(const MetadataCache&) = delete; MetadataCache(const MetadataCache&) = delete;
......
...@@ -68,7 +68,7 @@ class PODDeleter { ...@@ -68,7 +68,7 @@ class PODDeleter {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
public: public:
PODDeleter(Place place) : place_(place) {} explicit PODDeleter(Place place) : place_(place) {}
void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); } void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); }
private: private:
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/add_op.h" #include "paddle/operators/add_op.h"
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h" #include "paddle/operators/cross_entropy_op.h"
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>); ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/fill_zeros_like_op.h" #include "paddle/operators/fill_zeros_like_op.h"
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
fill_zeros_like, fill_zeros_like,
paddle::operators::FillZerosLikeKernel<paddle::platform::GPUPlace, float>); paddle::operators::FillZerosLikeKernel<paddle::platform::GPUPlace, float>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/mean_op.h" #include "paddle/operators/mean_op.h"
REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel<ops::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::GPUPlace, float>);
\ No newline at end of file
...@@ -15,4 +15,4 @@ ...@@ -15,4 +15,4 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/mul_op.h" #include "paddle/operators/mul_op.h"
REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>);
\ No newline at end of file
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using namespace paddle::framework; using namespace paddle::framework; // NOLINT
namespace rnn { namespace rnn {
...@@ -94,7 +94,7 @@ void InitArgument(const ArgumentName& name, Argument* arg); ...@@ -94,7 +94,7 @@ void InitArgument(const ArgumentName& name, Argument* arg);
}; // namespace rnn }; // namespace rnn
// The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now. // The sequence format in RecurrentOp is Tensor<seq_len, batch_size, dim> now.
// TODO: // TODO(Yan Chunwei):
// 1. No-padding computing for sequences with indifinite length in one batch. // 1. No-padding computing for sequences with indifinite length in one batch.
// 2. Hierarchical RNN for sequence with sub-sequence. // 2. Hierarchical RNN for sequence with sub-sequence.
// 3. Internal Memory. // 3. Internal Memory.
...@@ -172,12 +172,10 @@ public: ...@@ -172,12 +172,10 @@ public:
/** /**
* InferShape must be called before Run. * InferShape must be called before Run.
*/ */
virtual void InferShape(const Scope& scope) const override { void InferShape(const Scope& scope) const override { alg_.InferShape(scope); }
alg_.InferShape(scope);
}
virtual void Run(const Scope& scope, void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override { const platform::DeviceContext& dev_ctx) const override {
alg_.Run(scope, dev_ctx); alg_.Run(scope, dev_ctx);
} }
...@@ -194,12 +192,10 @@ public: ...@@ -194,12 +192,10 @@ public:
/** /**
* InferShape must be called before Run. * InferShape must be called before Run.
*/ */
virtual void InferShape(const Scope& scope) const override { void InferShape(const Scope& scope) const override { alg_.InferShape(scope); }
alg_.InferShape(scope);
}
virtual void Run(const Scope& scope, void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override { const platform::DeviceContext& dev_ctx) const override {
alg_.Run(scope, dev_ctx); alg_.Run(scope, dev_ctx);
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/rowwise_add_op.h" #include "paddle/operators/rowwise_add_op.h"
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/sgd_op.h" #include "paddle/operators/sgd_op.h"
REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/sigmoid_op.h" #include "paddle/operators/sigmoid_op.h"
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/softmax_op.h" #include "paddle/operators/softmax_op.h"
REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel<ops::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(softmax_grad, ops::SoftmaxGradKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(softmax_grad,
ops::SoftmaxGradKernel<ops::GPUPlace, float>);
...@@ -40,7 +40,7 @@ class DeviceContext { ...@@ -40,7 +40,7 @@ class DeviceContext {
class CPUDeviceContext : public DeviceContext { class CPUDeviceContext : public DeviceContext {
public: public:
CPUDeviceContext(); CPUDeviceContext();
CPUDeviceContext(CPUPlace); explicit CPUDeviceContext(CPUPlace);
virtual ~CPUDeviceContext() {} virtual ~CPUDeviceContext() {}
Eigen::DefaultDevice* eigen_device() const; Eigen::DefaultDevice* eigen_device() const;
...@@ -55,7 +55,7 @@ class CPUDeviceContext : public DeviceContext { ...@@ -55,7 +55,7 @@ class CPUDeviceContext : public DeviceContext {
class CUDADeviceContext : public DeviceContext { class CUDADeviceContext : public DeviceContext {
public: public:
explicit CUDADeviceContext(GPUPlace); CUDADeviceContext(GPUPlace); // NOLINT
virtual ~CUDADeviceContext(); virtual ~CUDADeviceContext();
/*! \brief Wait for all operations completion in the stream. */ /*! \brief Wait for all operations completion in the stream. */
...@@ -69,10 +69,10 @@ class CUDADeviceContext : public DeviceContext { ...@@ -69,10 +69,10 @@ class CUDADeviceContext : public DeviceContext {
// clang-format off // clang-format off
/*! \brief Return cublas handle in the device context. */ /*! \brief Return cublas handle in the device context. */
cublasHandle_t cublas_handle (); cublasHandle_t cublas_handle();
/*! \brief Return cudnn handle in the device context. */ /*! \brief Return cudnn handle in the device context. */
cudnnHandle_t cudnn_handle (); cudnnHandle_t cudnn_handle();
/*! \brief Return curand handle in the device context. */ /*! \brief Return curand handle in the device context. */
curandGenerator_t curand_generator(); curandGenerator_t curand_generator();
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/platform/dynload/cublas.h> #include <paddle/platform/dynload/cublas.h>
namespace paddle { namespace paddle {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/platform/dynload/cudnn.h> #include <paddle/platform/dynload/cudnn.h>
namespace paddle { namespace paddle {
...@@ -25,4 +39,4 @@ CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP); ...@@ -25,4 +39,4 @@ CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP);
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/platform/dynload/curand.h> #include <paddle/platform/dynload/curand.h>
namespace paddle { namespace paddle {
...@@ -10,6 +24,7 @@ void *curand_dso_handle; ...@@ -10,6 +24,7 @@ void *curand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP); CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
}
} } // namespace dynload
} } // namespace platform
\ No newline at end of file } // namespace paddle
...@@ -32,7 +32,7 @@ struct CPUPlace { ...@@ -32,7 +32,7 @@ struct CPUPlace {
struct GPUPlace { struct GPUPlace {
GPUPlace() : GPUPlace(0) {} GPUPlace() : GPUPlace(0) {}
GPUPlace(int d) : device(d) {} GPUPlace(int d) : device(d) {} // NOLINT
// needed for variant equality comparison // needed for variant equality comparison
inline bool operator==(const GPUPlace &o) const { return device == o.device; } inline bool operator==(const GPUPlace &o) const { return device == o.device; }
......
...@@ -39,8 +39,8 @@ public: ...@@ -39,8 +39,8 @@ public:
// size_ is 0. // size_ is 0.
Piece(); Piece();
Piece(const char* d, size_t n); Piece(const char* d, size_t n);
Piece(const char* d); Piece(const char* d); // NOLINT
Piece(const std::string& s); Piece(const std::string& s); // NOLINT
const char* data() const { return data_; } const char* data() const { return data_; }
size_t len() const { return size_; } size_t len() const { return size_; }
......
...@@ -13,4 +13,5 @@ add_python_test(test_framework ...@@ -13,4 +13,5 @@ add_python_test(test_framework
test_sigmoid_op.py test_sigmoid_op.py
test_softmax_op.py test_softmax_op.py
test_rowwise_add_op.py test_rowwise_add_op.py
test_network.py) test_network.py
gradient_checker.py)
import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations
import numpy
import unittest
__all__ = ['get_numeric_gradient']
def get_numeric_gradient(op,
input_values,
output_name,
input_to_check,
delta=1e-2,
local_scope=None):
"""
Get Numeric Gradient for an operator's input.
:param op: C++ operator instance, could be an network
:param input_values: The input variables. Should be an dictionary, key is
variable name. Value is numpy array.
:param output_name: The final output variable name.
:param input_to_check: The input variable need to get gradient.
:param delta: The perturbation value for numeric gradient method. The
smaller delta is, the more accurate result will get. But if that delta is
too small, it could occur numerical stability problem.
:param local_scope: The local scope used for get_numeric_gradient.
:return: The gradient array in numpy format.
"""
if local_scope is None:
local_scope = core.Scope()
# Create all input variable in local_scope
for var_name in input_values:
var = local_scope.new_var(var_name)
tensor = var.get_tensor()
tensor.set_dims(input_values[var_name].shape)
tensor.alloc_float(core.CPUPlace())
tensor.set(input_values[var_name], core.CPUPlace())
# Create all output variable in local_scope
for output in op.outputs():
if local_scope.find_var(output) is None:
local_scope.new_var(output).get_tensor()
op.infer_shape(local_scope)
# allocate output memory
for output in op.outputs():
local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace())
# TODO(yuyang18): Only CPU is support now.
cpu_ctx = core.DeviceContext.create(core.CPUPlace())
def get_output():
op.run(local_scope, cpu_ctx)
return numpy.array(local_scope.find_var(output_name).get_tensor()).sum()
def product(dim):
return reduce(lambda a, b: a * b, dim, 1)
tensor_to_check = local_scope.find_var(input_to_check).get_tensor()
tensor_size = product(tensor_to_check.get_dims())
gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32')
for i in xrange(tensor_size):
origin = tensor_to_check.get_float_element(i)
x_pos = origin + delta
tensor_to_check.set_float_element(i, x_pos)
y_pos = get_output()
x_neg = origin - delta
tensor_to_check.set_float_element(i, x_neg)
y_neg = get_output()
tensor_to_check.set_float_element(i, origin) # restore old value
gradient_flat[i] = (y_pos - y_neg) / delta / 2
return gradient_flat.reshape(tensor_to_check.get_dims())
if __name__ == '__main__':
class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self):
add_op = op_creations.add_two(X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32")
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2)
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册