提交 4de2b8e1 编写于 作者: L liaogang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into environ

...@@ -137,9 +137,9 @@ set(EXTERNAL_LIBS ...@@ -137,9 +137,9 @@ set(EXTERNAL_LIBS
) )
if(WITH_GPU) if(WITH_GPU)
list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO) if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
endif(WITH_GPU) endif(WITH_GPU)
......
...@@ -32,9 +32,9 @@ class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { ...@@ -32,9 +32,9 @@ class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
public: public:
RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input X of Add").AsNoGradient(); AddInput("X", "Input X of Add").NotInGradient();
AddInput("b", "Bias of Add").AsNoGradient(); AddInput("b", "Bias of Add").NotInGradient();
AddOutput("Out", "Out of Add").AsNoGradient(); AddOutput("Out", "Out of Add").NotInGradient();
AddComment("Add Op"); AddComment("Add Op");
} }
}; };
......
...@@ -60,7 +60,7 @@ message OpProto { ...@@ -60,7 +60,7 @@ message OpProto {
optional bool duplicable = 3 [ default = false ]; optional bool duplicable = 3 [ default = false ];
optional bool intermediate = 4 [ default = false ]; optional bool intermediate = 4 [ default = false ];
optional bool no_gradient = 5 [ default = false ]; optional bool not_in_gradient = 5 [ default = false ];
} }
// AttrProto describes the C++ type Attribute. // AttrProto describes the C++ type Attribute.
......
...@@ -28,7 +28,7 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type, ...@@ -28,7 +28,7 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
const auto& src_arg_list = const auto& src_arg_list =
src_type == OpArgType::IN ? proto->inputs() : proto->outputs(); src_type == OpArgType::IN ? proto->inputs() : proto->outputs();
for (const auto& arg : src_arg_list) { for (const auto& arg : src_arg_list) {
if (arg.no_gradient() && !is_grad) continue; if (arg.not_in_gradient() && !is_grad) continue;
const std::string src_name = arg.name(); const std::string src_name = arg.name();
std::string dst_name = is_grad ? GradVarName(src_name) : src_name; std::string dst_name = is_grad ? GradVarName(src_name) : src_name;
dst_inout[dst_name].reserve(src_inout.at(src_name).size()); dst_inout[dst_name].reserve(src_inout.at(src_name).size());
......
...@@ -26,10 +26,10 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { ...@@ -26,10 +26,10 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker {
IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker) IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("In1", "a single input"); AddInput("In1", "a single input");
AddInput("In2_mult", "a multiple input").AsDuplicable().AsNoGradient(); AddInput("In2_mult", "a multiple input").AsDuplicable().NotInGradient();
AddInput("In3_mult", "another multiple input").AsDuplicable(); AddInput("In3_mult", "another multiple input").AsDuplicable();
AddOutput("Out1_mult", "a multiple output").AsDuplicable(); AddOutput("Out1_mult", "a multiple output").AsDuplicable();
AddOutput("Out2", "a single output").AsNoGradient(); AddOutput("Out2", "a single output").NotInGradient();
AddComment("op with inputs and outputs ignored in gradient calculating"); AddComment("op with inputs and outputs ignored in gradient calculating");
} }
}; };
......
...@@ -184,11 +184,8 @@ class OpProtoAndCheckerMaker { ...@@ -184,11 +184,8 @@ class OpProtoAndCheckerMaker {
return *this; return *this;
} }
// TODO(FengJiayi, yuyang18): `AsNoGradient` is a very bad name, because it VariableBuilder& NotInGradient() {
// means that input/output is not needed when calculate gradient. It does var_->set_not_in_gradient(true);
// not mean no gradient when backward. It should be changed soon.
VariableBuilder& AsNoGradient() {
var_->set_no_gradient(true);
return *this; return *this;
} }
}; };
......
...@@ -57,11 +57,14 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap, ...@@ -57,11 +57,14 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap,
} }
void MKLDNNFcLayer::convertWeightsFromPaddle() { void MKLDNNFcLayer::convertWeightsFromPaddle() {
if (FLAGS_use_mkldnn_wgt) { if (hasInitedWgt_) {
return; return;
} }
if (hasInitedWgt_) { // TODO(TJ): dst format should get from wgtVal_
int dstFmt = PARAM_FORMAT_MKLDNN_OI;
int srcFmt = weight_->getParameterPtr()->getHeaderFormat();
if (srcFmt == dstFmt) {
return; return;
} }
...@@ -78,6 +81,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() { ...@@ -78,6 +81,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
MatrixPtr paddleWgtT; MatrixPtr paddleWgtT;
paddleWgt->transpose(paddleWgtT, true); paddleWgt->transpose(paddleWgtT, true);
weight_->getW()->copyFrom(*paddleWgtT); weight_->getW()->copyFrom(*paddleWgtT);
weight_->getParameterPtr()->setHeaderFormat(dstFmt);
hasInitedWgt_ = true; hasInitedWgt_ = true;
} }
......
...@@ -330,9 +330,7 @@ void MKLDNNTester::run(const TestConfig& dnn, ...@@ -330,9 +330,7 @@ void MKLDNNTester::run(const TestConfig& dnn,
log_ = log; log_ = log;
lvl_ = level; lvl_ = level;
// Firstly test FLAGS_use_mkldnn_wgt = false // Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight
FLAGS_use_mkldnn_wgt = false;
// reset and run once
reset(dnn, ref, batchSize); reset(dnn, ref, batchSize);
randomWgtDatas(); randomWgtDatas();
clearWgtDiffs(); clearWgtDiffs();
...@@ -342,17 +340,32 @@ void MKLDNNTester::run(const TestConfig& dnn, ...@@ -342,17 +340,32 @@ void MKLDNNTester::run(const TestConfig& dnn,
runOnce(); runOnce();
} }
// Then test FLAGS_use_mkldnn_wgt = true if (parameters_[DNN].empty()) {
FLAGS_use_mkldnn_wgt = true; // has no paramters
// after run once the mkldnn weight has been stored in dnnlayer return;
}
// After run some iterations, the mkldnn weight has been stored in dnnLayer
// and we can also get the mkldnn weight parameter header format.
// Weight parameter should always be index 0 (and bias index 1).
// TODO(TJ): should also consider mean and var format when batchnorm ready
int dnnWgtFmt = parameters_[DNN][0]->getHeaderFormat();
int refWgtFmt = parameters_[REF][0]->getHeaderFormat();
if (dnnWgtFmt == refWgtFmt) {
// weight format are equal, so no need check more
return;
}
// then save the weights and restart again // then save the weights and restart again
vector<VectorPtr> dnnWgts, refWgts; vector<VectorPtr> dnnWgts, refWgts;
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
saveWgt(parameters_[DNN], dnnWgts); saveWgt(parameters_[DNN], dnnWgts);
saveWgt(parameters_[REF], refWgts); saveWgt(parameters_[REF], refWgts);
// restart again with flag true // restart again with dnn weight format
reset(dnn, ref, batchSize); reset(dnn, ref, batchSize);
// TODO(TJ): should also considerate mean and var format when batchnorm ready
parameters_[DNN][0]->setHeaderFormat(dnnWgtFmt);
// restore wgt // restore wgt
restoreWgt(dnnWgts, parameters_[DNN]); restoreWgt(dnnWgts, parameters_[DNN]);
......
...@@ -108,7 +108,7 @@ private: ...@@ -108,7 +108,7 @@ private:
* if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the
* max(diff/ref) * max(diff/ref)
* else return sum(abs(a-b)) / sum(abs(b)) * else return sum(abs(a-b)) / sum(abs(b))
* The return value should smaller than eps when passing. * The return value should be smaller than eps when passing.
*/ */
double getDelta(const real* d1, double getDelta(const real* d1,
const real* d2, const real* d2,
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <algorithm> // for transform #include <algorithm> // for transform
#include <cstring> // for memcpy #include <cstring> // for memcpy
#include <memory> // for unique_ptr
#include <mutex> // for call_once #include <mutex> // for call_once
#include "glog/logging.h" #include "glog/logging.h"
......
...@@ -34,7 +34,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -34,7 +34,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op"); AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op").AsNoGradient(); AddOutput("Out", "The output of mean op").NotInGradient();
AddComment("Mean Operator"); AddComment("Mean Operator");
} }
}; };
......
...@@ -55,9 +55,10 @@ class MeanGradKernel : public framework::OpKernel { ...@@ -55,9 +55,10 @@ class MeanGradKernel : public framework::OpKernel {
IG->mutable_data<T>(context.GetPlace()); IG->mutable_data<T>(context.GetPlace());
T ig_size = (T)framework::product(IG->dims()); T ig_size = (T)framework::product(IG->dims());
Eigen::DSizes<int, 1> bcast(ig_size);
EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) = EigenVector<T>::Flatten(*IG).device(context.GetEigenDevice<Place>()) =
EigenScalar<T>::From(*OG) / ig_size; (EigenVector<T>::From(*OG) / ig_size).broadcast(bcast);
} }
}; };
......
...@@ -30,7 +30,7 @@ class SGDOpKernel : public framework::OpKernel { ...@@ -30,7 +30,7 @@ class SGDOpKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto param = ctx.Input<Tensor>("param"); auto param = ctx.Input<Tensor>("param");
auto grad = ctx.Input<Tensor>("grad"); auto grad = ctx.Input<Tensor>("grad");
auto param_out = ctx.Output<Tensor>(0); auto param_out = ctx.Output<Tensor>("param_out");
float lr = ctx.op_.GetAttr<float>("learning_rate"); float lr = ctx.op_.GetAttr<float>("learning_rate");
param_out->mutable_data<T>(ctx.GetPlace()); param_out->mutable_data<T>(ctx.GetPlace());
......
...@@ -44,7 +44,8 @@ class SigmoidOpGrad : public framework::OperatorWithKernel { ...@@ -44,7 +44,8 @@ class SigmoidOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims()); ctx.Output<Tensor>(framework::GradVarName("X"))
->Resize(ctx.Input<Tensor>("Y")->dims());
} }
}; };
......
...@@ -37,7 +37,7 @@ class SigmoidKernel : public framework::OpKernel { ...@@ -37,7 +37,7 @@ class SigmoidKernel : public framework::OpKernel {
auto Y = EigenVector<T>::Flatten(*output); auto Y = EigenVector<T>::Flatten(*output);
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
Y.device(place) = 1.0 / (1.0 + (-1.0 * X).exp()); Y.device(place) = 1. / (1. + (-X).exp());
} }
}; };
......
...@@ -48,7 +48,8 @@ Parameter::Parameter(const ParameterConfig& config, bool useGpu, bool doInit) ...@@ -48,7 +48,8 @@ Parameter::Parameter(const ParameterConfig& config, bool useGpu, bool doInit)
deviceId_(-1), deviceId_(-1),
sharedCount_(0), sharedCount_(0),
updateCounter_(0), updateCounter_(0),
updated_(false) { updated_(false),
headerFormat_(PARAM_FORMAT_ORIGINAL) {
setID(-1); /* capture uninitialized id */ setID(-1); /* capture uninitialized id */
if (useGpu_ && FLAGS_parallel_nn) { if (useGpu_ && FLAGS_parallel_nn) {
/* gpu environment is specified by device property */ /* gpu environment is specified by device property */
...@@ -285,7 +286,7 @@ bool Parameter::save(const std::string& filename) const { ...@@ -285,7 +286,7 @@ bool Parameter::save(const std::string& filename) const {
bool Parameter::save(std::ostream& s) const { bool Parameter::save(std::ostream& s) const {
CpuVector vec(*bufs_[PARAMETER_VALUE].get()); CpuVector vec(*bufs_[PARAMETER_VALUE].get());
Header header; Header header;
header.version = kFormatVersion; header.format = headerFormat_;
header.valueSize = sizeof(real); header.valueSize = sizeof(real);
header.size = getSize(); header.size = getSize();
...@@ -344,8 +345,9 @@ bool Parameter::load(std::istream& s) { ...@@ -344,8 +345,9 @@ bool Parameter::load(std::istream& s) {
Header header; Header header;
CHECK(s.read(reinterpret_cast<char*>(&header), sizeof(header))) CHECK(s.read(reinterpret_cast<char*>(&header), sizeof(header)))
<< "Fail to read parameter " << getName(); << "Fail to read parameter " << getName();
CHECK_EQ(header.version, kFormatVersion) << "Incorrect format version: " CHECK(isHeaderFormatSupported(header.format)) << "Incorrect format version: "
<< header.version; << header.format;
headerFormat_ = header.format;
CHECK_EQ(header.size, getSize()) CHECK_EQ(header.size, getSize())
<< "The size (" << header.size << ") in the file does not match the size " << "The size (" << header.size << ") in the file does not match the size "
<< "(" << getSize() << ") of the parameter: " << getName(); << "(" << getSize() << ") of the parameter: " << getName();
......
...@@ -34,6 +34,20 @@ limitations under the License. */ ...@@ -34,6 +34,20 @@ limitations under the License. */
namespace paddle { namespace paddle {
typedef enum {
/// The paddle original basic format
PARAM_FORMAT_ORIGINAL = 0,
/// See mkldnn_memory_format_t in
/// https://github.com/01org/mkl-dnn/blob/master/include/mkldnn_types.h
/// for a detailed description.
/// 2D weights tensor in the format (output channels, input channels).
PARAM_FORMAT_MKLDNN_OI,
/// The total format items numbers
PARAM_FORMAT_ITEMS,
} PARAM_FORMAT;
class SparsePrefetchRowCpuMatrix; class SparsePrefetchRowCpuMatrix;
class Parameter; class Parameter;
...@@ -242,14 +256,30 @@ public: ...@@ -242,14 +256,30 @@ public:
/// Initialize the value to 0 /// Initialize the value to 0
void zeroMem(); void zeroMem();
static const int kFormatVersion = 0;
/// file header structure /// file header structure
struct Header { struct Header {
int32_t version; // = 0, file format version int32_t format; // = PARAM_FORMAT
uint32_t valueSize; // = sizeof(real) uint32_t valueSize; // = sizeof(real)
uint64_t size; // = getSize() uint64_t size; // = getSize()
}; };
/**
* @brief Is the header format supported.
*/
static bool isHeaderFormatSupported(int32_t fmt) {
return fmt < PARAM_FORMAT_ITEMS;
}
/**
* @brief Get the format in header.
*/
int getHeaderFormat() { return headerFormat_; }
/**
* @brief Set the format in header.
*/
void setHeaderFormat(int32_t fmt) { headerFormat_ = fmt; }
/** /**
* @brief Parameter Update Hook. * @brief Parameter Update Hook.
* *
...@@ -321,6 +351,9 @@ protected: ...@@ -321,6 +351,9 @@ protected:
bool updated_; bool updated_;
SparseFormat format_; SparseFormat format_;
/// The header format for saving or loading param
int32_t headerFormat_;
std::vector<std::shared_ptr<IParameterUpdaterHook>> updaterHooks_; std::vector<std::shared_ptr<IParameterUpdaterHook>> updaterHooks_;
public: public:
......
...@@ -1032,8 +1032,8 @@ void ParameterServer2::loadValueVector(const LoadValueRequest& request, ...@@ -1032,8 +1032,8 @@ void ParameterServer2::loadValueVector(const LoadValueRequest& request,
Parameter::Header header; Parameter::Header header;
CHECK(fs.read(reinterpret_cast<char*>(&header), sizeof(header))) CHECK(fs.read(reinterpret_cast<char*>(&header), sizeof(header)))
<< "Fail to read parameters in pserver"; << "Fail to read parameters in pserver";
CHECK_EQ(header.version, Parameter::kFormatVersion) CHECK(Parameter::isHeaderFormatSupported(header.format))
<< "Incorrect format version: " << header.version; << "Incorrect format version: " << header.format;
CHECK_EQ(header.size, (size_t)size_) CHECK_EQ(header.size, (size_t)size_)
<< "The size (" << header.size << ") in the file does not match the size " << "The size (" << header.size << ") in the file does not match the size "
<< "(" << size_ << ") of the pserver: " << serverId_; << "(" << size_ << ") of the pserver: " << serverId_;
...@@ -1063,7 +1063,8 @@ void ParameterServer2::saveValueVector(const SaveValueRequest& request, ...@@ -1063,7 +1063,8 @@ void ParameterServer2::saveValueVector(const SaveValueRequest& request,
CpuVector& vec = vectors_[PARAMETER_APPLY] ? *vectors_[PARAMETER_APPLY] CpuVector& vec = vectors_[PARAMETER_APPLY] ? *vectors_[PARAMETER_APPLY]
: *vectors_[PARAMETER_VALUE]; : *vectors_[PARAMETER_VALUE];
Parameter::Header header; Parameter::Header header;
header.version = Parameter::kFormatVersion; // TODO(TJ): save param headerFormat_
header.format = PARAM_FORMAT_ORIGINAL;
header.valueSize = sizeof(real); header.valueSize = sizeof(real);
header.size = size_; header.size = size_;
......
...@@ -29,7 +29,6 @@ DECLARE_bool(with_gpu); ...@@ -29,7 +29,6 @@ DECLARE_bool(with_gpu);
DECLARE_bool(parallel_nn); DECLARE_bool(parallel_nn);
DECLARE_string(config_args); DECLARE_string(config_args);
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserModuleName = "paddle.trainer.config_parser";
const char *kConfigParserFuncName = "parse_config_and_serialize"; const char *kConfigParserFuncName = "parse_config_and_serialize";
...@@ -47,7 +46,6 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) ...@@ -47,7 +46,6 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath)
<< ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu
<< ",parallel_nn=" << FLAGS_parallel_nn << ",parallel_nn=" << FLAGS_parallel_nn
<< ",use_mkldnn=" << FLAGS_use_mkldnn << ",use_mkldnn=" << FLAGS_use_mkldnn
<< ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt
<< ",cudnn_version=" << hl_get_cudnn_lib_version(); << ",cudnn_version=" << hl_get_cudnn_lib_version();
if (!FLAGS_config_args.empty()) { if (!FLAGS_config_args.empty()) {
configArgs << "," << FLAGS_config_args; configArgs << "," << FLAGS_config_args;
......
...@@ -27,7 +27,6 @@ DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); ...@@ -27,7 +27,6 @@ DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training");
DEFINE_bool(use_mkldnn, false, "Only support CPU training"); DEFINE_bool(use_mkldnn, false, "Only support CPU training");
#endif #endif
DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight");
DEFINE_bool(parallel_nn, DEFINE_bool(parallel_nn,
false, false,
"Whether to use multi-threads to calculate one neural network." "Whether to use multi-threads to calculate one neural network."
......
...@@ -41,4 +41,3 @@ DECLARE_string(predict_file); ...@@ -41,4 +41,3 @@ DECLARE_string(predict_file);
DECLARE_bool(prev_batch_state); DECLARE_bool(prev_batch_state);
DECLARE_string(init_model_path); DECLARE_string(init_model_path);
DECLARE_bool(use_mkldnn); DECLARE_bool(use_mkldnn);
DECLARE_bool(use_mkldnn_wgt);
...@@ -25,3 +25,5 @@ py_test(test_operator SRCS test_operator.py) ...@@ -25,3 +25,5 @@ py_test(test_operator SRCS test_operator.py)
# py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) # py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py)
py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py)
py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py)
py_test(test_sgd_op SRCS test_sgd_op.py)
py_test(test_gradient_checker SRCS test_gradient_checker.py)
import unittest import unittest
import numpy import numpy
import itertools
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator from paddle.v2.framework.op import Operator
...@@ -8,6 +9,7 @@ __all__ = ['get_numeric_gradient'] ...@@ -8,6 +9,7 @@ __all__ = ['get_numeric_gradient']
def create_op(op_type): def create_op(op_type):
# TODO need to set attrs
kwargs = dict() kwargs = dict()
for in_name in Operator.get_op_input_names(op_type): for in_name in Operator.get_op_input_names(op_type):
kwargs[in_name] = in_name kwargs[in_name] = in_name
...@@ -66,7 +68,6 @@ def get_numeric_gradient(op, ...@@ -66,7 +68,6 @@ def get_numeric_gradient(op,
local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace( local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace(
)) ))
# TODO(yuyang18): Only CPU is support now.
cpu_ctx = core.DeviceContext.create(core.CPUPlace()) cpu_ctx = core.DeviceContext.create(core.CPUPlace())
def get_output(): def get_output():
...@@ -109,12 +110,110 @@ def get_numeric_gradient(op, ...@@ -109,12 +110,110 @@ def get_numeric_gradient(op,
class GradientChecker(unittest.TestCase): class GradientChecker(unittest.TestCase):
def assert_is_close(self, numeric_grads, scope, max_relative_error, def __get_gradient(self, forward_op, backward_op, input_value, grad_names,
msg_prefix): place):
for name in numeric_grads: """Get the input gradients after running forward and backward operators
b = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) on the given places.
a = numeric_grads[name]
:param forward_op: forward operator
:type forward_op: Operator
:param backward_op: backward operator
:type backward_op: Operator
:param input_value: input values.
:type input_value: dict{string:numpy.array}
:param grad_names: the names of returned input gradients.
:type input_value: a list of string
:param place: the device type.
:type place: CPUPlace or GPUPlace
:return: the input grdients of given grad_names.
:rtype: a list of numpy.array
"""
scope = core.Scope()
ctx = core.DeviceContext.create(place)
inputs = forward_op.inputs()
in_names = [item for k in inputs for item in inputs[k]]
outputs = forward_op.outputs()
out_names = [item for k in outputs for item in outputs[k]]
# create input var and set value
for name, value in input_value.iteritems():
if name not in in_names:
raise ValueError(name + "does not exist in Op's inputs.")
var = scope.new_var(name).get_tensor()
var.set_dims(value.shape)
var.set(value, place)
# run forward op
for out_name in out_names:
scope.new_var(out_name)
forward_op.infer_shape(scope)
forward_op.run(scope, ctx)
# set output var's shape
# set output grad to ones
for name in out_names:
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
grad_tensor.set_dims(out_tensor.shape())
data = numpy.ones(out_tensor.shape(), dtype=numpy.float32)
grad_tensor.set(data, place)
# run backward op
for name in backward_op.outputs():
scope.new_var(name)
backward_op.infer_shape(scope)
backward_op.run(scope, ctx)
outs = [
numpy.array(scope.find_var(name).get_tensor())
for name in grad_names
]
return outs
def compare_grad(self, forward_op, input_value):
""" Compare the input gradients between CPU and GPU for the given forward
operator.
:param forward_op: forward operator
:type forward_op: Operator
:param input_value: input values.
:type input_value: dict{string:numpy.array}
:raises: AssertionError, there is different gradient value.
"""
backward_op = core.Operator.backward(forward_op, set())
# return if not compile with GPU or not implementing GPU kernel
if not (core.is_compile_gpu() and backward_op.support_gpu()):
return
outputs = backward_op.outputs()
out_names = [item for k in outputs for item in outputs[k]]
cpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
out_names, core.CPUPlace())
gpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
out_names, core.GPUPlace(0))
for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads,
out_names):
self.assertTrue(
numpy.allclose(
c_grad, g_grad, atol=1e-4),
"output name: " + name + " has diff")
def __assert_is_close(self, numeric_grads, analytic_grads, names,
max_relative_error, msg_prefix):
"""Use relative error for the comparison.
:param numeric_grads: the numerical graidents.
:type numeric_grads: a list of numpy.array
:param analytic_grads: the analytical graidents.
:type analytic_grads: a list of numpy.array
:param name: the names of gradients, used to print for debug.
:type names: a list of string
:param msg_prefix: string info, used to print for debug.
:type msf_prefix: string
"""
for a, b, name in itertools.izip(numeric_grads, analytic_grads, names):
abs_a = numpy.abs(a) abs_a = numpy.abs(a)
# if abs_a is nearly zero, then use abs error for a, not relative # if abs_a is nearly zero, then use abs error for a, not relative
# error. # error.
...@@ -159,105 +258,26 @@ class GradientChecker(unittest.TestCase): ...@@ -159,105 +258,26 @@ class GradientChecker(unittest.TestCase):
inputs = forward_op.inputs() inputs = forward_op.inputs()
in_names = [item for k in inputs for item in inputs[k]] in_names = [item for k in inputs for item in inputs[k]]
outputs = forward_op.outputs()
out_names = [item for k in outputs for item in outputs[k]]
for no_grad in no_grad_set: for no_grad in no_grad_set:
if no_grad not in in_names: if no_grad not in in_names:
raise ValueError("no_grad should be in in_names") raise ValueError("no_grad should be in in_names")
backward_op = core.Operator.backward(forward_op, no_grad_set) backward_op = core.Operator.backward(forward_op, no_grad_set)
bwd_outputs = backward_op.outputs()
bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]]
places = [core.CPUPlace()] places = [core.CPUPlace()]
if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
places.append(core.GPUPlace(0)) places.append(core.GPUPlace(0))
numeric_grad = dict() # get numerical gradients
# get numeric gradient numeric_grads = [
for check_name in inputs_to_check: get_numeric_gradient(forward_op, input_vars, output_name, name)
numeric_grad[check_name] = \ for name in inputs_to_check
get_numeric_gradient(forward_op, input_vars, output_name, ]
check_name)
# get operator gradient according to different device check_names = [grad_var_name(name) for name in inputs_to_check]
for place in places: for place in places:
scope = core.Scope() # get analytical gradients according to different device
ctx = core.DeviceContext.create(place) analytic_grads = self.__get_gradient(forward_op, backward_op,
input_vars, check_names, place)
# create input var and set value self.__assert_is_close(numeric_grads, analytic_grads, check_names,
for name, value in input_vars.iteritems(): max_relative_error,
if name not in in_names:
raise ValueError(name + " not in op.inputs_")
var = scope.new_var(name).get_tensor()
var.set_dims(value.shape)
var.set(value, place)
# create output var
for out_name in out_names:
scope.new_var(out_name).get_tensor()
# infer the shape of output var and compute/set value of output var
forward_op.infer_shape(scope)
forward_op.run(scope, ctx)
# create output grad var
# set shape as the output var
# set value of this grad to ones
for name in out_names:
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
grad_tensor.set_dims(out_tensor.shape())
data = 1.0 * numpy.ones(out_tensor.shape())
grad_tensor.set(data, place)
# create input grad var
for name in bwd_out_names:
scope.new_var(name).get_tensor()
# infer the shape of input gradient var and compute/set it's value
# with backward op
backward_op.infer_shape(scope)
backward_op.run(scope, ctx)
self.assert_is_close(numeric_grad, scope, max_relative_error,
"Gradient Check On %s" % str(place)) "Gradient Check On %s" % str(place))
if __name__ == '__main__':
class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self):
add_op = Operator('add_two', X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32")
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-2)
def test_softmax_op(self):
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
shiftx = x - numpy.max(x)
exps = numpy.exp(shiftx)
return exps / numpy.sum(exps)
def label_softmax_grad(Y, dY):
dX = Y * 0.0
for i in range(Y.shape[0]):
d = numpy.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return dX
softmax_op = Operator("softmax", X="X", Y="Y")
X = numpy.random.random((2, 2)).astype("float32")
Y = numpy.apply_along_axis(stable_softmax, 1, X)
dY = numpy.ones(Y.shape)
dX = label_softmax_grad(Y, dY)
arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X')
numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
unittest.main()
import unittest
import numpy
from paddle.v2.framework.op import Operator
from gradient_checker import GradientChecker
from gradient_checker import get_numeric_gradient
class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self):
add_op = Operator('add_two', X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32")
arr = get_numeric_gradient(add_op, {'X': x, "Y": y}, 'Z', 'X')
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4)
def test_softmax_op(self):
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
shiftx = x - numpy.max(x)
exps = numpy.exp(shiftx)
return exps / numpy.sum(exps)
def label_softmax_grad(Y, dY):
dX = Y * 0.0
for i in range(Y.shape[0]):
d = numpy.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return dX
softmax_op = Operator("softmax", X="X", Y="Y")
X = numpy.random.random((2, 2)).astype("float32")
Y = numpy.apply_along_axis(stable_softmax, 1, X)
dY = numpy.ones(Y.shape)
dX = label_softmax_grad(Y, dY)
arr = get_numeric_gradient(softmax_op, {"X": X}, 'Y', 'X')
numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
if __name__ == '__main__':
unittest.main()
import unittest import unittest
from op_test_util import OpTestMeta from op_test_util import OpTestMeta
from gradient_checker import GradientChecker, create_op
import numpy as np import numpy as np
...@@ -12,5 +13,12 @@ class TestMeanOp(unittest.TestCase): ...@@ -12,5 +13,12 @@ class TestMeanOp(unittest.TestCase):
self.outputs = {'Out': np.mean(self.inputs['X'])} self.outputs = {'Out': np.mean(self.inputs['X'])}
class MeanGradOpTest(GradientChecker):
def test_normal(self):
op = create_op("mean")
inputs = {"X": np.random.random((10, 10)).astype("float32")}
self.check_grad(op, inputs, set("X"), "Out")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
import unittest import unittest
from op_test_util import OpTestMeta
import numpy as np import numpy as np
from op_test_util import OpTestMeta
from gradient_checker import GradientChecker, create_op
class TestSigmoidOp(unittest.TestCase): class TestSigmoidOp(unittest.TestCase):
...@@ -8,12 +9,20 @@ class TestSigmoidOp(unittest.TestCase): ...@@ -8,12 +9,20 @@ class TestSigmoidOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.type = "sigmoid" self.type = "sigmoid"
self.inputs = {'X': np.random.random((32, 100)).astype("float32")} self.inputs = {'X': np.random.random((15, 31)).astype("float32")}
self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))} self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))}
#class TestSigmoidGradOp(unittest.TestCase): class TestSigmoidGradOp(GradientChecker):
#TODO(qingqing) add unit test def test_grad(self):
op = create_op("sigmoid")
inputs = {"X": np.random.uniform(0.1, 1, [11, 17]).astype("float32")}
# compare gpu and cpu results for backward op.
# this test will be skiped if only compiling CPU version.
self.compare_grad(op, inputs)
# check gradients
self.check_grad(op, inputs, set("X"), "Y", max_relative_error=0.007)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册