提交 f191c820 编写于 作者: G guosheng

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into fix-GRUUnitOp-dev

...@@ -5,6 +5,7 @@ height = 224 ...@@ -5,6 +5,7 @@ height = 224
width = 224 width = 224
num_class = 1000 num_class = 1000
batch_size = get_config_arg('batch_size', int, 128) batch_size = get_config_arg('batch_size', int, 128)
use_gpu = get_config_arg('use_gpu', bool, True)
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
define_py_data_sources2( define_py_data_sources2(
...@@ -16,6 +17,8 @@ settings( ...@@ -16,6 +17,8 @@ settings(
learning_method=MomentumOptimizer(0.9), learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * batch_size)) regularization=L2Regularization(0.0005 * batch_size))
conv_projection = conv_projection if use_gpu else img_conv_layer
def inception2(name, input, channels, \ def inception2(name, input, channels, \
filter1, filter1,
filter3R, filter3, filter3R, filter3,
...@@ -138,7 +141,7 @@ def inception(name, input, channels, \ ...@@ -138,7 +141,7 @@ def inception(name, input, channels, \
cat = concat_layer( cat = concat_layer(
name=name, name=name,
input=[cov1, cov3, cov5, covprj], input=[cov1, cov3, cov5, covprj],
bias_attr=True, bias_attr=True if use_gpu else False,
act=ReluActivation()) act=ReluActivation())
return cat return cat
......
...@@ -40,6 +40,7 @@ fi ...@@ -40,6 +40,7 @@ fi
for use_mkldnn in True False; do for use_mkldnn in True False; do
for batchsize in 64 128 256; do for batchsize in 64 128 256; do
train vgg 19 $batchsize $use_mkldnn train vgg 19 $batchsize $use_mkldnn
train resnet 50 $batchsize $use_mkldnn train resnet 50 $batchsize $use_mkldnn
train googlenet v1 $batchsize $use_mkldnn
done done
done done
...@@ -212,6 +212,37 @@ Error __must_check backward(Argument& act) { ...@@ -212,6 +212,37 @@ Error __must_check backward(Argument& act) {
} }
END_DEFINE_ACTIVATION(sequence_softmax) END_DEFINE_ACTIVATION(sequence_softmax)
/*
* @brief SoftSign Activation.
* \f[
* f(z) = \frac{z}{1 + |z|}
* \f]
*/
BEGIN_DEFINE_ACTIVATION(softsign)
private:
MatrixPtr denominator_;
Error __must_check forward(Argument& act) {
size_t height = act.value->getHeight();
size_t width = act.value->getWidth();
Matrix::resizeOrCreate(
denominator_, height, width, false, useGpu(act.deviceId));
denominator_->assign(*act.value);
denominator_->abs2();
denominator_->add(1.);
act.value->dotDiv(*act.value, *denominator_);
return Error();
}
Error __must_check backward(Argument& act) {
denominator_->square2();
denominator_->scalarDiv(*denominator_, 1.);
act.grad->dotMul(*act.grad, *denominator_);
return Error();
}
END_DEFINE_ACTIVATION(softsign)
/** /**
* @brief Relu Activation. * @brief Relu Activation.
* forward. y = max(0, z) * forward. y = max(0, z)
......
...@@ -38,12 +38,13 @@ bool MKLDNNAddtoLayer::init(const LayerMap& layerMap, ...@@ -38,12 +38,13 @@ bool MKLDNNAddtoLayer::init(const LayerMap& layerMap,
} }
void MKLDNNAddtoLayer::reshape( void MKLDNNAddtoLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed"; CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed";
reshapeInput(bs, ih, iw); reshapeInput(bs, ih, iw);
ic = inputLayers_[0]->getSize() / ih / iw; ic = inputLayers_[0]->getSize() / ih / iw;
CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize());
CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(),
(size_t)bs * ic * ih * iw);
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize()); CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize());
CHECK_EQ(layerSize_, inputLayers_[i]->getSize()); CHECK_EQ(layerSize_, inputLayers_[i]->getSize());
...@@ -57,47 +58,43 @@ void MKLDNNAddtoLayer::reshape( ...@@ -57,47 +58,43 @@ void MKLDNNAddtoLayer::reshape(
} }
void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline, void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
resetFwdBuffers(inVals_, bias, out); resetFwdBuffers(inputs, biasVal_, out);
in = inVals_[0];
std::shared_ptr<sum::primitive_desc> fwdPD; std::shared_ptr<sum::primitive_desc> fwdPD;
std::shared_ptr<sum::primitive_desc> biasPD; std::shared_ptr<sum::primitive_desc> biasPD;
resetFwdPD(fwdPD, biasPD, inVals_, bias, out); resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out);
resetFwdPipeline(pipeline, fwdPD, biasPD, inVals_, bias, out); resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out);
} }
void MKLDNNAddtoLayer::resetBwd(std::vector<primitive>& pipeline, void MKLDNNAddtoLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
resetBwdBuffers(inGrads_, bias, out); resetBwdBuffers(inputs, biasGrad_, out);
in = inGrads_[0];
// backward only need share output grad to input grad // backward only need share output grad to input grad
for (size_t i = 0; i < inGrads_.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
if (inGrads_[i] != nullptr) { if (inputs[i] != nullptr) {
inGrads_[i] = out; inputs[i] = out;
inputLayers_[i]->getOutputGrad()->setData(inGrads_[i]->getData()); inputLayers_[i]->getOutputGrad()->setData(inputs[i]->getData());
} }
} }
// backward bias // backward bias
bwdBias_ = nullptr; bwdBias_ = nullptr;
if (bias) { if (biasGrad_) {
std::vector<float> scales(bs_, 1.0); std::vector<float> scales(bs_, 1.0);
std::vector<memory::primitive_desc> srcPDs(bs_, bias->getPrimitiveDesc()); std::vector<memory::primitive_desc> srcPDs(bs_,
auto biasPD = sum::primitive_desc(bias->getMemoryDesc(), scales, srcPDs); biasGrad_->getPrimitiveDesc());
auto biasPD =
sum::primitive_desc(biasGrad_->getMemoryDesc(), scales, srcPDs);
std::vector<primitive::at> srcs; std::vector<primitive::at> srcs;
for (size_t i = 0; i < grads_.size(); ++i) { for (size_t i = 0; i < grads_.size(); ++i) {
srcs.push_back(*(grads_[i])); srcs.push_back(*(grads_[i]));
} }
bwdBias_.reset(new sum(biasPD, srcs, *bias)); bwdBias_.reset(new sum(biasPD, srcs, *biasGrad_));
pipeline.push_back(*bwdBias_); pipeline.push_back(*bwdBias_);
} }
} }
...@@ -208,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, ...@@ -208,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
inputs.resize(inputLayers_.size()); inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i); resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i);
CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc()); CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc());
} }
......
...@@ -26,9 +26,6 @@ namespace paddle { ...@@ -26,9 +26,6 @@ namespace paddle {
*/ */
class MKLDNNAddtoLayer : public MKLDNNLayer { class MKLDNNAddtoLayer : public MKLDNNLayer {
protected: protected:
std::vector<MKLDNNMatrixPtr> inVals_;
std::vector<MKLDNNMatrixPtr> inGrads_;
// layer size == ic * ih * iw == oc * oh *ow, and can not be changed // layer size == ic * ih * iw == oc * oh *ow, and can not be changed
size_t layerSize_; size_t layerSize_;
...@@ -50,52 +47,19 @@ public: ...@@ -50,52 +47,19 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void reshape( void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline, void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline, void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override; void updateWeights(const UpdateCallback& callback) override;
void printValueFormat() override {
for (size_t i = 0; i < inVals_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>";
}
if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
}
if (extOutVal_) {
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
}
}
void printGradFormat() override {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< ";
}
for (size_t i = 0; i < inGrads_.size(); ++i) {
VLOG(MKLDNN_FMTS) << i << " input: " << inGrads_[i]->getFormat() << "<<<";
}
}
protected: protected:
/**
* Forward functions: reset buffers(inputs, output, bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
...@@ -110,17 +74,10 @@ protected: ...@@ -110,17 +74,10 @@ protected:
std::vector<MKLDNNMatrixPtr>& inputs, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(inputs, output, bias)
*/
void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* prepare for bias
*/
void prepareBias(MKLDNNMatrixPtr& bias, void prepareBias(MKLDNNMatrixPtr& bias,
const MatrixPtr& biasMat, const MatrixPtr& biasMat,
const MKLDNNMatrixPtr& out, const MKLDNNMatrixPtr& out,
......
...@@ -116,21 +116,20 @@ void MKLDNNBatchNormLayer::calMovingMeanAndVar() { ...@@ -116,21 +116,20 @@ void MKLDNNBatchNormLayer::calMovingMeanAndVar() {
} }
void MKLDNNBatchNormLayer::reshape( void MKLDNNBatchNormLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw); reshapeInput(bs, ih, iw);
oh = ih; oh = ih;
ow = iw; ow = iw;
// ic_ and oc can not be changed // ic_ and oc can not be changed
CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) CHECK_EQ((size_t)ic,
inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw)
<< "Input channel can not be changed"; << "Input channel can not be changed";
reshapeOutput(oh, ow); reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow); resizeOutput(bs, oc * oh * ow);
} }
void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline, void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
// In training phase, it will always calculate mean and var, // In training phase, it will always calculate mean and var,
// so useGlobalStats must be false. // so useGlobalStats must be false.
...@@ -140,25 +139,23 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline, ...@@ -140,25 +139,23 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
useGlobalStats_ = false; useGlobalStats_ = false;
} }
resetFwdBuffers(in, wgt, out); resetFwdBuffers(inputs[0], wgtVal_, out);
resetFwdPD(fwdPD_, in, wgt, out); resetFwdPD(fwdPD_, inputs[0], wgtVal_, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, out); resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out);
} }
void MKLDNNBatchNormLayer::resetBwd(std::vector<primitive>& pipeline, void MKLDNNBatchNormLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
std::shared_ptr<bn_bwd::primitive_desc> pd; std::shared_ptr<bn_bwd::primitive_desc> pd;
resetBwdBuffers(in, wgt, out); resetBwdBuffers(inputs[0], wgtGrad_, out);
resetBwdPD(pd, in, wgt, out); resetBwdPD(pd, inputs[0], wgtGrad_, out);
resetBwdPipeline(pipeline, pd, in, wgt, out); resetBwdPipeline(pipeline, pd, inputs[0], wgtGrad_, out);
} }
void MKLDNNBatchNormLayer::forward(PassType passType) { void MKLDNNBatchNormLayer::forward(PassType passType) {
...@@ -260,9 +257,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline( ...@@ -260,9 +257,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline(
void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
CHECK(inVal_ && outVal_); CHECK(inVals_[0] && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc()); resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc()); resetInGrad(in, inVals_[0]->getPrimitiveDesc());
if (gradScaleShift_) { if (gradScaleShift_) {
CHECK(wgtVal_); CHECK(wgtVal_);
resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc()); resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc());
...@@ -297,11 +294,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline( ...@@ -297,11 +294,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline(
if (pd == nullptr) { if (pd == nullptr) {
return; return;
} }
CHECK(inVal_); CHECK(inVals_[0]);
bwdData_.reset( bwdData_.reset(
wgt && wgtVal_ wgt && wgtVal_
? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt) ? new bn_bwd(
: new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in)); *pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt)
: new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in));
pipeline.push_back(*bwdData_); pipeline.push_back(*bwdData_);
} }
......
...@@ -73,18 +73,14 @@ public: ...@@ -73,18 +73,14 @@ public:
void forward(PassType passType) override; void forward(PassType passType) override;
void reshape( void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline, void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline, void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override; void updateWeights(const UpdateCallback& callback) override;
...@@ -98,11 +94,7 @@ protected: ...@@ -98,11 +94,7 @@ protected:
* moving = moving * AvgFraction + local * (1 - AvgFraction) * moving = moving * AvgFraction + local * (1 - AvgFraction)
*/ */
void calMovingMeanAndVar(); void calMovingMeanAndVar();
/**
* Forward functions: reset buffers(input, weight, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(MKLDNNMatrixPtr& in, void resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
...@@ -115,12 +107,6 @@ protected: ...@@ -115,12 +107,6 @@ protected:
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(input, weight, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetBwdBuffers(MKLDNNMatrixPtr& in, void resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
......
...@@ -32,17 +32,16 @@ bool MKLDNNConcatLayer::init(const LayerMap& layerMap, ...@@ -32,17 +32,16 @@ bool MKLDNNConcatLayer::init(const LayerMap& layerMap,
} }
void MKLDNNConcatLayer::reshape( void MKLDNNConcatLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw); reshapeInput(bs, ih, iw);
ic = inputLayers_[0]->getSize() / ih / iw; ic = inputLayers_[0]->getSize() / ih / iw;
CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize());
CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(),
(size_t)bs * ic * ih * iw);
CHECK_GT(inputLayers_.size(), 1UL); CHECK_GT(inputLayers_.size(), 1UL);
channels_.resize(inputLayers_.size()); channels_.resize(inputLayers_.size());
channels_[0] = ic; channels_[0] = ic;
// need change the output channel, so use oc_ instead oc = ic;
// TODO(TJ): change API, use &oc
oc_ = ic;
for (size_t i = 1; i < inputLayers_.size(); i++) { for (size_t i = 1; i < inputLayers_.size(); i++) {
int batchsize, height, witdh; int batchsize, height, witdh;
reshapeInput(batchsize, height, witdh, i); reshapeInput(batchsize, height, witdh, i);
...@@ -52,37 +51,31 @@ void MKLDNNConcatLayer::reshape( ...@@ -52,37 +51,31 @@ void MKLDNNConcatLayer::reshape(
channels_[i] = inputLayers_[i]->getSize() / height / witdh; channels_[i] = inputLayers_[i]->getSize() / height / witdh;
CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize()); CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize());
oc_ += channels_[i]; oc += channels_[i];
} }
oh = ih; oh = ih;
ow = iw; ow = iw;
reshapeOutput(oh, ow); reshapeOutput(oh, ow);
resizeOutput(bs, oc_ * oh * ow); resizeOutput(bs, oc * oh * ow);
} }
void MKLDNNConcatLayer::resetFwd(std::vector<primitive>& pipeline, void MKLDNNConcatLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
resetFwdBuffers(inVals_, out); resetFwdBuffers(inputs, out);
in = inVals_[0];
std::shared_ptr<concat::primitive_desc> fwdPD; std::shared_ptr<concat::primitive_desc> fwdPD;
resetFwdPD(fwdPD, inVals_, out); resetFwdPD(fwdPD, inputs, out);
resetFwdPipeline(pipeline, fwdPD, inVals_, out); resetFwdPipeline(pipeline, fwdPD, inputs, out);
} }
void MKLDNNConcatLayer::resetBwd(std::vector<primitive>& pipeline, void MKLDNNConcatLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
resetBwdBuffers(inGrads_, out); resetBwdBuffers(inputs, out);
in = inGrads_[0];
resetBwdPipeline(pipeline, bwds_, inGrads_, out); resetBwdPipeline(pipeline, bwds_, inputs, out);
} }
void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
...@@ -90,10 +83,7 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, ...@@ -90,10 +83,7 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
inputs.resize(inputLayers_.size()); inputs.resize(inputLayers_.size());
bool has8c = false, has16c = false, hasnc = false; bool has8c = false, has16c = false, hasnc = false;
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
// resetInValue will use ic_ so temporary change as current input's channel resetInValue(inputs[i], nullptr, i, channels_[i]);
// TODO(TJ): change ic_ as vector then can remove channels_
ic_ = channels_[i];
resetInValue(inputs[i], nullptr, i);
CHECK(inputs[i]); CHECK(inputs[i]);
auto dm = inputs[i]->getDims(); auto dm = inputs[i]->getDims();
// inputs format can be different, but ndims must equal // inputs format can be different, but ndims must equal
...@@ -114,8 +104,6 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, ...@@ -114,8 +104,6 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
has16c = true; has16c = true;
} }
} }
// change back, ic_ always save the input 0 size
ic_ = channels_[0];
format outFmt; format outFmt;
if (has16c && oc_ % 16 == 0) { if (has16c && oc_ % 16 == 0) {
...@@ -168,14 +156,9 @@ void MKLDNNConcatLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, ...@@ -168,14 +156,9 @@ void MKLDNNConcatLayer::resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
inputs.resize(inputLayers_.size()); inputs.resize(inputLayers_.size());
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
CHECK(inVals_[i]); CHECK(inVals_[i]);
// resetInGrad will use inVal_
// TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_
inVal_ = inVals_[i];
resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i);
CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc()); CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc());
} }
// change back, inVal_ always save the input 0
inVal_ = inVals_[0];
} }
void MKLDNNConcatLayer::resetBwdPipeline( void MKLDNNConcatLayer::resetBwdPipeline(
......
...@@ -26,8 +26,6 @@ namespace paddle { ...@@ -26,8 +26,6 @@ namespace paddle {
*/ */
class MKLDNNConcatLayer : public MKLDNNLayer { class MKLDNNConcatLayer : public MKLDNNLayer {
protected: protected:
std::vector<MKLDNNMatrixPtr> inVals_;
std::vector<MKLDNNMatrixPtr> inGrads_;
std::vector<std::shared_ptr<mkldnn::primitive>> bwds_; std::vector<std::shared_ptr<mkldnn::primitive>> bwds_;
// input channel numbers // input channel numbers
std::vector<int> channels_; std::vector<int> channels_;
...@@ -47,18 +45,14 @@ public: ...@@ -47,18 +45,14 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void reshape( void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline, void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline, void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void printSizeInfo() override { void printSizeInfo() override {
...@@ -72,38 +66,16 @@ public: ...@@ -72,38 +66,16 @@ public:
<< ", " << ow_; << ", " << ow_;
} }
void printValueFormat() override { size_t keepCondition() {
for (size_t i = 0; i < inVals_.size(); ++i) { // reset when the total element size of all inputs changed
VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() size_t totalSize = inputLayers_[0]->getOutputValue()->getElementCnt();
<< ": " << inVals_[i]->getFormat() << " >>>"; for (size_t i = 1; i < inputLayers_.size(); ++i) {
} totalSize += inputLayers_[i]->getOutputValue()->getElementCnt();
if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> ";
}
if (extOutVal_) {
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
}
}
void printGradFormat() override {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< ";
}
for (size_t i = 0; i < inGrads_.size(); ++i) {
VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
<< ": " << inGrads_[i]->getFormat() << "<<<";
} }
return totalSize;
} }
protected: protected:
/**
* Forward functions: reset buffers(inputs, output, bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, void resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<mkldnn::concat::primitive_desc>& pd, void resetFwdPD(std::shared_ptr<mkldnn::concat::primitive_desc>& pd,
...@@ -113,11 +85,6 @@ protected: ...@@ -113,11 +85,6 @@ protected:
std::shared_ptr<mkldnn::concat::primitive_desc>& pd, std::shared_ptr<mkldnn::concat::primitive_desc>& pd,
std::vector<MKLDNNMatrixPtr>& inputs, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(inputs, output, bias)
* reset primitives and pipeline
*/
void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs, void resetBwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline, void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline,
......
...@@ -90,7 +90,7 @@ void MKLDNNConvLayer::convertWeightsToPaddle() { ...@@ -90,7 +90,7 @@ void MKLDNNConvLayer::convertWeightsToPaddle() {
} }
void MKLDNNConvLayer::reshape( void MKLDNNConvLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw); reshapeInput(bs, ih, iw);
// cal output sizes // cal output sizes
...@@ -105,21 +105,17 @@ void MKLDNNConvLayer::reshape( ...@@ -105,21 +105,17 @@ void MKLDNNConvLayer::reshape(
} }
void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline, void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
resetFwdPD(fwdPD_); resetFwdPD(fwdPD_);
resetFwdBuffers(fwdPD_, in, wgt, bias, out); resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out);
} }
void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline, void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
std::shared_ptr<conv_bwdWgt::primitive_desc> bwdWgtPD; std::shared_ptr<conv_bwdWgt::primitive_desc> bwdWgtPD;
std::shared_ptr<conv_bwdData::primitive_desc> bwdDataPD; std::shared_ptr<conv_bwdData::primitive_desc> bwdDataPD;
...@@ -128,9 +124,10 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline, ...@@ -128,9 +124,10 @@ void MKLDNNConvLayer::resetBwd(std::vector<primitive>& pipeline,
resetBwdDataPD(bwdDataPD); resetBwdDataPD(bwdDataPD);
resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); resetBwdBuffers(bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out);
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); resetBwdPipeline(
pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out);
} }
void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) {
...@@ -236,14 +233,14 @@ void MKLDNNConvLayer::resetBwdWgtPD( ...@@ -236,14 +233,14 @@ void MKLDNNConvLayer::resetBwdWgtPD(
loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
// create backward weight using input, output and weight value memory desc // create backward weight using input, output and weight value memory desc
CHECK(inVal_) << "Should have internal input value"; CHECK(inVals_[0]) << "Should have internal input value";
CHECK(outVal_) << "Should have internal output value"; CHECK(outVal_) << "Should have internal output value";
CHECK(wgtVal_) << "Should have weight value"; CHECK(wgtVal_) << "Should have weight value";
algorithm algo = algorithm::convolution_direct; algorithm algo = algorithm::convolution_direct;
padding_kind padKind = padding_kind::zero; padding_kind padKind = padding_kind::zero;
auto bwdWgtDesc = biasVal_ != nullptr auto bwdWgtDesc = biasVal_ != nullptr
? conv_bwdWgt::desc(algo, ? conv_bwdWgt::desc(algo,
inVal_->getMemoryDesc(), inVals_[0]->getMemoryDesc(),
wgtVal_->getMemoryDesc(), wgtVal_->getMemoryDesc(),
biasVal_->getMemoryDesc(), biasVal_->getMemoryDesc(),
outVal_->getMemoryDesc(), outVal_->getMemoryDesc(),
...@@ -252,7 +249,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( ...@@ -252,7 +249,7 @@ void MKLDNNConvLayer::resetBwdWgtPD(
padR, padR,
padKind) padKind)
: conv_bwdWgt::desc(algo, : conv_bwdWgt::desc(algo,
inVal_->getMemoryDesc(), inVals_[0]->getMemoryDesc(),
wgtVal_->getMemoryDesc(), wgtVal_->getMemoryDesc(),
outVal_->getMemoryDesc(), outVal_->getMemoryDesc(),
strides, strides,
...@@ -260,7 +257,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( ...@@ -260,7 +257,7 @@ void MKLDNNConvLayer::resetBwdWgtPD(
padR, padR,
padKind); padKind);
pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
CHECK_PRIMITIVE_DESC_EQ(inVal_, pd->src_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc());
CHECK_PRIMITIVE_DESC_EQ( CHECK_PRIMITIVE_DESC_EQ(
outVal_, outVal_,
pd->diff_dst_primitive_desc(), pd->diff_dst_primitive_desc(),
...@@ -280,12 +277,12 @@ void MKLDNNConvLayer::resetBwdDataPD( ...@@ -280,12 +277,12 @@ void MKLDNNConvLayer::resetBwdDataPD(
memory::dims wgtDims, biasDims, strides, dilations, padL, padR; memory::dims wgtDims, biasDims, strides, dilations, padL, padR;
loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR);
CHECK(inVal_) << "Should have internal input value"; CHECK(inVals_[0]) << "Should have internal input value";
CHECK(outVal_) << "Should have internal output value"; CHECK(outVal_) << "Should have internal output value";
// create backward data using input and output value memory desc // create backward data using input and output value memory desc
// but using weight memory desc with any format // but using weight memory desc with any format
auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct,
inVal_->getMemoryDesc(), inVals_[0]->getMemoryDesc(),
MKLDNNMatrix::createMemoryDesc(wgtDims), MKLDNNMatrix::createMemoryDesc(wgtDims),
outVal_->getMemoryDesc(), outVal_->getMemoryDesc(),
strides, strides,
...@@ -294,7 +291,7 @@ void MKLDNNConvLayer::resetBwdDataPD( ...@@ -294,7 +291,7 @@ void MKLDNNConvLayer::resetBwdDataPD(
padding_kind::zero); padding_kind::zero);
pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_));
CHECK_PRIMITIVE_DESC_EQ( CHECK_PRIMITIVE_DESC_EQ(
inVal_, inVals_[0],
pd->diff_src_primitive_desc(), pd->diff_src_primitive_desc(),
"primitive desc of in value and grad should be equal"); "primitive desc of in value and grad should be equal");
CHECK_PRIMITIVE_DESC_EQ( CHECK_PRIMITIVE_DESC_EQ(
...@@ -346,12 +343,12 @@ void MKLDNNConvLayer::resetBwdPipeline( ...@@ -346,12 +343,12 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
CHECK(inVal_); CHECK(inVals_[0]);
// add bwdWgt handle // add bwdWgt handle
if (bias) { if (bias) {
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias));
} else { } else {
bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt));
} }
pipeline.push_back(*bwdWgt_); pipeline.push_back(*bwdWgt_);
......
...@@ -69,18 +69,14 @@ public: ...@@ -69,18 +69,14 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void reshape( void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline, void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline, void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override; void updateWeights(const UpdateCallback& callback) override;
...@@ -107,48 +103,26 @@ protected: ...@@ -107,48 +103,26 @@ protected:
mkldnn::memory::dims& padL, mkldnn::memory::dims& padL,
mkldnn::memory::dims& padR); mkldnn::memory::dims& padR);
/**
* reset the forward primitive descriptor.
*/
void resetFwdPD(std::shared_ptr<conv_fwd::primitive_desc>& pd); void resetFwdPD(std::shared_ptr<conv_fwd::primitive_desc>& pd);
/**
* reset the MKLDNNMatrix buffers used in forward.
*/
void resetFwdBuffers(std::shared_ptr<conv_fwd::primitive_desc>& pd, void resetFwdBuffers(std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* reset the forward pipeline.
*/
void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline, void resetFwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<conv_fwd::primitive_desc>& pd, std::shared_ptr<conv_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* reset the backward weight primitive descriptor.
*/
void resetBwdWgtPD(std::shared_ptr<conv_bwdWgt::primitive_desc>& pd); void resetBwdWgtPD(std::shared_ptr<conv_bwdWgt::primitive_desc>& pd);
/**
* reset the backward data primitive descriptor.
*/
void resetBwdDataPD(std::shared_ptr<conv_bwdData::primitive_desc>& pd); void resetBwdDataPD(std::shared_ptr<conv_bwdData::primitive_desc>& pd);
/**
* reset the MKLDNNMatrix buffers used in backward.
*/
void resetBwdBuffers(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, void resetBwdBuffers(std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD, std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* reset the backward pipeline.
*/
void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline, void resetBwdPipeline(std::vector<mkldnn::primitive>& pipeline,
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD,
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD, std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
......
...@@ -74,7 +74,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { ...@@ -74,7 +74,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
} }
void MKLDNNFcLayer::reshape( void MKLDNNFcLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw); reshapeInput(bs, ih, iw);
CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
...@@ -87,32 +87,29 @@ void MKLDNNFcLayer::reshape( ...@@ -87,32 +87,29 @@ void MKLDNNFcLayer::reshape(
} }
void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline, void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
resetFwdBuffers(in, wgt, bias, out); resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out);
resetFwdPD(fwdPD_, in, wgt, bias, out); resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out);
resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out);
} }
void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline, void MKLDNNFcLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
std::shared_ptr<fc_bwdWgt::primitive_desc> bwdWgtPD; std::shared_ptr<fc_bwdWgt::primitive_desc> bwdWgtPD;
std::shared_ptr<fc_bwdData::primitive_desc> bwdDataPD; std::shared_ptr<fc_bwdData::primitive_desc> bwdDataPD;
resetBwdBuffers(in, wgt, bias, out); resetBwdBuffers(inputs[0], wgtGrad_, biasGrad_, out);
resetBwdWgtPD(bwdWgtPD, wgt, bias, out); resetBwdWgtPD(bwdWgtPD, wgtGrad_, biasGrad_, out);
resetBwdDataPD(bwdDataPD, in, out); resetBwdDataPD(bwdDataPD, inputs[0], out);
resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); resetBwdPipeline(
pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out);
} }
void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
...@@ -193,9 +190,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, ...@@ -193,9 +190,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
CHECK(inVal_ && outVal_); CHECK(inVals_[0] && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc()); resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc()); resetInGrad(in, inVals_[0]->getPrimitiveDesc());
CHECK(wgtVal_); CHECK(wgtVal_);
resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc());
...@@ -212,14 +209,15 @@ void MKLDNNFcLayer::resetBwdWgtPD( ...@@ -212,14 +209,15 @@ void MKLDNNFcLayer::resetBwdWgtPD(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
CHECK(inVal_); CHECK(inVals_[0]);
fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), fc_bwdWgt::desc bwdWgtDesc =
wgt->getMemoryDesc(), bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(),
bias->getMemoryDesc(), wgt->getMemoryDesc(),
out->getMemoryDesc()) bias->getMemoryDesc(),
: fc_bwdWgt::desc(inVal_->getMemoryDesc(), out->getMemoryDesc())
wgt->getMemoryDesc(), : fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(),
out->getMemoryDesc()); wgt->getMemoryDesc(),
out->getMemoryDesc());
pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_));
} }
...@@ -245,11 +243,11 @@ void MKLDNNFcLayer::resetBwdPipeline( ...@@ -245,11 +243,11 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
CHECK(inVal_); CHECK(inVals_[0]);
if (bias) { if (bias) {
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias));
} else { } else {
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt));
} }
pipeline.push_back(*bwdWgt_); pipeline.push_back(*bwdWgt_);
......
...@@ -52,18 +52,14 @@ public: ...@@ -52,18 +52,14 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void reshape( void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline, void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline, void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void updateWeights(const UpdateCallback& callback) override; void updateWeights(const UpdateCallback& callback) override;
...@@ -73,11 +69,6 @@ public: ...@@ -73,11 +69,6 @@ public:
void convertWeightsToPaddle() override; void convertWeightsToPaddle() override;
protected: protected:
/**
* Forward functions: reset buffers(input, output, weight and bias),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(MKLDNNMatrixPtr& in, void resetFwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
...@@ -93,13 +84,6 @@ protected: ...@@ -93,13 +84,6 @@ protected:
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(input, output, weight and bias),
* reset primitive descriptor for backward weight,
* reset primitive descriptor for backward data,
* reset pipeline.
*/
void resetBwdBuffers(MKLDNNMatrixPtr& in, void resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
......
...@@ -48,31 +48,20 @@ void MKLDNNLayer::forward(PassType passType) { ...@@ -48,31 +48,20 @@ void MKLDNNLayer::forward(PassType passType) {
REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
CHECK(!inputLayers_.empty()); CHECK(!inputLayers_.empty());
copySeqInfoToOutputs(); copySeqInfoToOutputs();
size_t elemenCnt = inputLayers_[0]->getOutputValue()->getElementCnt(); if (condition_ != keepCondition()) {
if (inputElemenCnt_ != elemenCnt) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
// reset when input total sizes changed, not only the batchsize condition_ = keepCondition();
inputElemenCnt_ = elemenCnt;
pipelineFwd_.clear();
reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
// all cpu device output grad or value share output's printSizeInfo();
// the output_.value and output_.grad are shared with CPU device
shareCPUDevice(); shareCPUDevice();
resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); pipelineFwd_.clear();
// MKLDNNLayer output value should be MKLDNNMatrix inVals_.resize(inputLayers_.size(), nullptr);
// so external output value is necessary. extInVals_.resize(inputLayers_.size(), nullptr);
// Then external input value is not necessary, cvtInVals_.resize(inputLayers_.size(), nullptr);
// since input may be mkldnn internal buffer. resetFwd(pipelineFwd_, inVals_, outVal_);
CHECK(extOutVal_) << "external output value is necessary"; prepareValueConversions(pipelineFwd_);
output_.value = std::dynamic_pointer_cast<Matrix>(extOutVal_);
CHECK(inVal_ && outVal_) << "internal memories are necessary";
if (cvtInVal_) {
pipelineFwd_.insert(pipelineFwd_.begin(), *cvtInVal_);
}
if (cvtOutVal_) {
pipelineFwd_.push_back(*cvtOutVal_);
}
convertWeightsFromPaddle(); convertWeightsFromPaddle();
printSizeInfo();
printValueFormat(); printValueFormat();
needResetBwd_ = true; needResetBwd_ = true;
} }
...@@ -80,8 +69,8 @@ void MKLDNNLayer::forward(PassType passType) { ...@@ -80,8 +69,8 @@ void MKLDNNLayer::forward(PassType passType) {
if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) { if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) {
// Update input value data when input layer is "data" type, // Update input value data when input layer is "data" type,
// since the input value data address might be changed. // since the input value data address might be changed.
CHECK(extInVal_); CHECK(extInVals_[0]);
extInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData());
} }
if (!outputOnlyMKLDNN_) { if (!outputOnlyMKLDNN_) {
...@@ -99,22 +88,13 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { ...@@ -99,22 +88,13 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) {
if (needResetBwd_) { if (needResetBwd_) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
pipelineBwd_.clear(); pipelineBwd_.clear();
inGrads_.resize(inputLayers_.size(), nullptr);
extInGrads_.resize(inputLayers_.size(), nullptr);
cvtInGrads_.resize(inputLayers_.size(), nullptr);
pipelineMergeGrad_.clear(); pipelineMergeGrad_.clear();
mergeGrad_ = nullptr; mergeGrad_ = nullptr;
resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); resetBwd(pipelineBwd_, inGrads_, outGrad_);
// external output grad is not necessary prepareGradConversions(pipelineBwd_);
// since output may be mkldnn internal buffer or merge them directly.
CHECK(outGrad_) << "internal output grad is necessary";
if (extOutGrad_) {
CHECK_EQ(extOutGrad_->getData(), output_.grad->getData())
<< "the external buffer should share the same data with output_.grad";
}
if (cvtOutGrad_) {
pipelineBwd_.insert(pipelineBwd_.begin(), *cvtOutGrad_);
}
if (cvtInGrad_) {
pipelineBwd_.push_back(*cvtInGrad_);
}
printGradFormat(); printGradFormat();
needResetBwd_ = false; needResetBwd_ = false;
} }
...@@ -141,8 +121,8 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { ...@@ -141,8 +121,8 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) {
void MKLDNNLayer::reshapeInput(int& batchsize, void MKLDNNLayer::reshapeInput(int& batchsize,
int& height, int& height,
int& width, int& width,
size_t inputIdx) { size_t idx) {
const Argument& input = inputLayers_[inputIdx]->getOutput(); const Argument& input = inputLayers_[idx]->getOutput();
batchsize = input.getBatchSize(); batchsize = input.getBatchSize();
int h = input.getFrameHeight(); int h = input.getFrameHeight();
int w = input.getFrameWidth(); int w = input.getFrameWidth();
...@@ -176,27 +156,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, ...@@ -176,27 +156,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn,
void MKLDNNLayer::resetInValue( void MKLDNNLayer::resetInValue(
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
const std::shared_ptr<memory::primitive_desc>& intPD, const std::shared_ptr<memory::primitive_desc>& intPD,
size_t inputIdx) { size_t idx,
cvtInVal_ = nullptr; int inputChannel) {
extInVal_ = nullptr; cvtInVals_[idx] = nullptr;
extInVals_[idx] = nullptr;
in = nullptr; in = nullptr;
CHECK_GT(bs_ * ic_ * ih_ * iw_, 0); inputChannel = inputChannel == 0 ? ic_ : inputChannel;
CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0);
auto extPD = MKLDNNMatrix::createPrimitiveDesc( auto extPD = MKLDNNMatrix::createPrimitiveDesc(
{bs_, ic_, ih_, iw_}, format::nchw, engine_); {bs_, inputChannel, ih_, iw_}, format::nchw, engine_);
const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue(); const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue();
extInVal_ = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat); extInVals_[idx] = std::dynamic_pointer_cast<MKLDNNMatrix>(inMat);
CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr); CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr);
if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) { if (extInVals_[idx] == nullptr ||
extInVal_ = MKLDNNMatrix::create(extPD, inMat); extInVals_[idx]->getFormat() == format::nc) {
extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat);
} }
in = extInVal_; in = extInVals_[idx];
if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) { if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) {
return; return;
} }
// need create reorder // need create reorder
in = MKLDNNMatrix::create(*intPD); in = MKLDNNMatrix::create(*intPD);
cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in); cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in);
CHECK(cvtInVal_) << "should not be emptry"; CHECK(cvtInVals_[idx]) << "should not be emptry";
} }
void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
...@@ -218,11 +201,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, ...@@ -218,11 +201,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out,
void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
memory::primitive_desc intPD, memory::primitive_desc intPD,
size_t inputIdx) { size_t idx) {
cvtInGrad_ = nullptr; cvtInGrads_[idx] = nullptr;
extInGrad_ = nullptr; extInGrads_[idx] = nullptr;
in = nullptr; in = nullptr;
LayerPtr& input = inputLayers_[inputIdx]; LayerPtr& input = inputLayers_[idx];
if (input->getOutputGrad() == nullptr) { if (input->getOutputGrad() == nullptr) {
// no need input grad // no need input grad
return; return;
...@@ -237,23 +220,25 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, ...@@ -237,23 +220,25 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in,
in = MKLDNNMatrix::create(intPD, inMat); in = MKLDNNMatrix::create(intPD, inMat);
Argument& arg = input->getOutput(this->getName()); Argument& arg = input->getOutput(this->getName());
arg.grad = std::dynamic_pointer_cast<Matrix>(in); arg.grad = std::dynamic_pointer_cast<Matrix>(in);
CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD);
if (inputIsOnlyMKLDNN()) { if (inputIsOnlyMKLDNN()) {
return; return;
} }
extInGrad_ = in; extInGrads_[idx] = in;
if (isPaddleFormat(extInGrad_->getFormat())) { if (isPaddleFormat(extInGrads_[idx]->getFormat())) {
return; return;
} }
// need create reorder // need create reorder
CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat())) CHECK(extInVals_[idx] != nullptr &&
isPaddleFormat(extInVals_[idx]->getFormat()))
<< "should have external input value and the format must be nchw(nc)"; << "should have external input value and the format must be nchw(nc)";
extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat); extInGrads_[idx] =
CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat);
CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD);
in = MKLDNNMatrix::create(intPD); in = MKLDNNMatrix::create(intPD);
cvtInGrad_ = MKLDNNMatrix::createReorder(in, extInGrad_); cvtInGrads_[idx] = MKLDNNMatrix::createReorder(in, extInGrads_[idx]);
CHECK(cvtInGrad_); CHECK(cvtInGrads_[idx]);
} }
void MKLDNNLayer::resetOutGrad(MKLDNNMatrixPtr& out, void MKLDNNLayer::resetOutGrad(MKLDNNMatrixPtr& out,
......
...@@ -34,15 +34,16 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr; ...@@ -34,15 +34,16 @@ typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
*/ */
class MKLDNNLayer : public Layer { class MKLDNNLayer : public Layer {
protected: protected:
// input value element count
size_t inputElemenCnt_;
// batch size // batch size
int bs_; int bs_;
// they sizes are always from the first input layer
// input image channel, height and width // input image channel, height and width
int ic_, ih_, iw_; int ic_, ih_, iw_;
// output image channel, height and width // output image channel, height and width
int oc_, oh_, ow_; int oc_, oh_, ow_;
// the condition that forward need be reset
size_t condition_;
// backward also need reset after reset forward handle // backward also need reset after reset forward handle
bool needResetBwd_; bool needResetBwd_;
...@@ -67,18 +68,18 @@ protected: ...@@ -67,18 +68,18 @@ protected:
* When all layers are mkldnn layers, they could save internal data. * When all layers are mkldnn layers, they could save internal data.
*/ */
// below MKLDNNMatrix buffers are all internal buffers // below MKLDNNMatrix buffers are all internal buffers
MKLDNNMatrixPtr inVal_; std::vector<MKLDNNMatrixPtr> inVals_;
MKLDNNMatrixPtr inGrad_; std::vector<MKLDNNMatrixPtr> inGrads_;
MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outVal_;
MKLDNNMatrixPtr outGrad_; MKLDNNMatrixPtr outGrad_;
// below are external value and grad // below are external value and grad
MKLDNNMatrixPtr extInVal_; std::vector<MKLDNNMatrixPtr> extInVals_;
MKLDNNMatrixPtr extInGrad_; std::vector<MKLDNNMatrixPtr> extInGrads_;
MKLDNNMatrixPtr extOutVal_; MKLDNNMatrixPtr extOutVal_;
MKLDNNMatrixPtr extOutGrad_; MKLDNNMatrixPtr extOutGrad_;
// convert handle between external and internal buffers // convert handle between external and internal buffers
std::shared_ptr<mkldnn::reorder> cvtInVal_; std::vector<std::shared_ptr<mkldnn::reorder>> cvtInVals_;
std::shared_ptr<mkldnn::reorder> cvtInGrad_; std::vector<std::shared_ptr<mkldnn::reorder>> cvtInGrads_;
std::shared_ptr<mkldnn::reorder> cvtOutVal_; std::shared_ptr<mkldnn::reorder> cvtOutVal_;
std::shared_ptr<mkldnn::reorder> cvtOutGrad_; std::shared_ptr<mkldnn::reorder> cvtOutGrad_;
...@@ -102,14 +103,7 @@ protected: ...@@ -102,14 +103,7 @@ protected:
public: public:
explicit MKLDNNLayer(const LayerConfig& config) explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config), : Layer(config),
inputElemenCnt_(0), condition_(0),
bs_(0),
ic_(0),
ih_(0),
iw_(0),
oc_(0),
oh_(0),
ow_(0),
needResetBwd_(true), needResetBwd_(true),
outputOnlyMKLDNN_(false), outputOnlyMKLDNN_(false),
engine_(mkldnn::engine::cpu, 0), engine_(mkldnn::engine::cpu, 0),
...@@ -125,31 +119,28 @@ public: ...@@ -125,31 +119,28 @@ public:
virtual void backward(const UpdateCallback& callback); virtual void backward(const UpdateCallback& callback);
/** /**
* reshape the input image sizes * reshape the input and output channels and image sizes
* and reset output image and buffer size * and reset output buffer size
* output channel can not be changed
*/ */
virtual void reshape( virtual void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0; int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) = 0;
/** /**
* reset the mkldnn forward primitve and memories * reset the mkldnn forward primitve and memories
* only would be called when input size changes * only would be called when input size changes
* weight and bias buffers should be coverd by child class itself
*/ */
virtual void resetFwd(std::vector<mkldnn::primitive>& pipeline, virtual void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) = 0; MKLDNNMatrixPtr& out) = 0;
/** /**
* reset the mkldnn backward primitve and memories * reset the mkldnn backward primitve and memories
* only would be called when needed * only would be called when needed
* weight and bias buffers should be coverd by child class itself
*/ */
virtual void resetBwd(std::vector<mkldnn::primitive>& pipeline, virtual void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) = 0; MKLDNNMatrixPtr& out) = 0;
/** /**
...@@ -175,13 +166,19 @@ public: ...@@ -175,13 +166,19 @@ public:
void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); } void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); }
protected: protected:
/**
* Some layers may have different condition to reset the forward.
* The function returns the condition that do not need reset forward.
*/
inline virtual size_t keepCondition() {
// reset when the first input element size changed, not only the batchsize
return inputLayers_[0]->getOutputValue()->getElementCnt();
}
/** /**
* reshape the input image sizes and input batchsize * reshape the input image sizes and input batchsize
*/ */
void reshapeInput(int& batchsize, void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0);
int& height,
int& width,
size_t inputIdx = 0);
/** /**
* reshape output image sizes * reshape output image sizes
...@@ -199,11 +196,13 @@ protected: ...@@ -199,11 +196,13 @@ protected:
/** /**
* reset input value from input MKLDNNMatrix and internal primitive desc. * reset input value from input MKLDNNMatrix and internal primitive desc.
* reset both internal and external buffer and create reorder if necessary. * reset both internal and external buffer and create reorder if necessary.
* input channel may be different in concat.
*/ */
void resetInValue( void resetInValue(
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr, const std::shared_ptr<mkldnn::memory::primitive_desc>& intPD = nullptr,
size_t inputIdx = 0); size_t idx = 0,
int inputChannel = 0);
/** /**
* reset output value from internal primitive desc. * reset output value from internal primitive desc.
...@@ -218,7 +217,7 @@ protected: ...@@ -218,7 +217,7 @@ protected:
*/ */
void resetInGrad(MKLDNNMatrixPtr& in, void resetInGrad(MKLDNNMatrixPtr& in,
mkldnn::memory::primitive_desc intPD, mkldnn::memory::primitive_desc intPD,
size_t inputIdx = 0); size_t idx = 0);
/** /**
* reset output grad from internal primitive desc. * reset output grad from internal primitive desc.
...@@ -296,17 +295,19 @@ protected: ...@@ -296,17 +295,19 @@ protected:
* print the mkldnn memory format of value * print the mkldnn memory format of value
*/ */
virtual void printValueFormat() { virtual void printValueFormat() {
if (extInVal_) { for (size_t i = 0; i < inVals_.size(); ++i) {
VLOG(MKLDNN_FMTS) << extInVal_->getFormat() << " >>> "; if (!inVals_[i]) {
} continue;
if (inVal_) { }
VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>>"; VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
<< ": " << (extInVals_[i] ? extInVals_[i]->getFormat()
: inVals_[i]->getFormat())
<< " >>> " << inVals_[i]->getFormat() << " >>>";
} }
if (outVal_) { if (outVal_) {
VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "
} << (extOutVal_ ? extOutVal_->getFormat()
if (extOutVal_) { : outVal_->getFormat());
VLOG(MKLDNN_FMTS) << extOutVal_->getFormat();
} }
if (wgtVal_) { if (wgtVal_) {
VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat();
...@@ -320,17 +321,19 @@ protected: ...@@ -320,17 +321,19 @@ protected:
* print the mkldnn memory format of grad * print the mkldnn memory format of grad
*/ */
virtual void printGradFormat() { virtual void printGradFormat() {
if (extOutGrad_) {
VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat();
}
if (outGrad_) { if (outGrad_) {
VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "
<< (extOutGrad_ ? extOutGrad_->getFormat()
: outGrad_->getFormat());
} }
if (inGrad_) { for (size_t i = 0; i < inGrads_.size(); ++i) {
VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<<"; if (!inGrads_[i]) {
} continue;
if (extInGrad_) { }
VLOG(MKLDNN_FMTS) << extInGrad_->getFormat() << " <<< "; VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName()
<< ": " << (extInGrads_[i] ? extInGrads_[i]->getFormat()
: inGrads_[i]->getFormat())
<< " <<< " << inGrads_[i]->getFormat() << " <<<";
} }
if (wgtGrad_) { if (wgtGrad_) {
VLOG(MKLDNN_FMTS) << "Weight grad format: " << wgtGrad_->getFormat(); VLOG(MKLDNN_FMTS) << "Weight grad format: " << wgtGrad_->getFormat();
...@@ -437,6 +440,41 @@ private: ...@@ -437,6 +440,41 @@ private:
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
} }
} }
void prepareValueConversions(std::vector<mkldnn::primitive>& pipeline) {
// MKLDNNLayer output value should be MKLDNNMatrix
// so external output value is necessary.
// Then external input value is not necessary,
// since input may be mkldnn internal buffer.
CHECK(extOutVal_) << "external output value is necessary";
output_.value = std::dynamic_pointer_cast<Matrix>(extOutVal_);
CHECK(inVals_[0] && outVal_) << "internal memories are necessary";
for (size_t i = 0; i < cvtInVals_.size(); ++i) {
if (cvtInVals_[i]) {
pipeline.insert(pipeline.begin(), *cvtInVals_[i]);
}
}
if (cvtOutVal_) {
pipeline.push_back(*cvtOutVal_);
}
}
void prepareGradConversions(std::vector<mkldnn::primitive>& pipeline) {
// external output grad is not necessary
// since output may be mkldnn internal buffer or merge them directly.
CHECK(outGrad_) << "internal output grad is necessary";
if (extOutGrad_) {
CHECK_EQ(extOutGrad_->getData(), output_.grad->getData())
<< "the external buffer should share the same data with output_.grad";
}
if (cvtOutGrad_) {
pipeline.insert(pipeline.begin(), *cvtOutGrad_);
}
for (size_t i = 0; i < cvtInGrads_.size(); ++i) {
if (cvtInGrads_[i]) {
pipeline.push_back(*cvtInGrads_[i]);
}
}
}
}; };
} // namespace paddle } // namespace paddle
...@@ -58,10 +58,11 @@ bool MKLDNNPoolLayer::init(const LayerMap& layerMap, ...@@ -58,10 +58,11 @@ bool MKLDNNPoolLayer::init(const LayerMap& layerMap,
} }
void MKLDNNPoolLayer::reshape( void MKLDNNPoolLayer::reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) {
reshapeInput(bs, ih, iw); reshapeInput(bs, ih, iw);
// ic_ and oc can not be changed // ic_ and oc can not be changed
CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) CHECK_EQ((size_t)ic,
inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw)
<< "Input channel can not be changed"; << "Input channel can not be changed";
// cal output sizes // cal output sizes
...@@ -74,29 +75,25 @@ void MKLDNNPoolLayer::reshape( ...@@ -74,29 +75,25 @@ void MKLDNNPoolLayer::reshape(
} }
void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline, void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
resetFwdBuffers(in, out); resetFwdBuffers(inputs[0], out);
resetFwdPD(fwdPD_, in, out); resetFwdPD(fwdPD_, inputs[0], out);
resetFwdPipeline(pipeline, fwdPD_, in, out); resetFwdPipeline(pipeline, fwdPD_, inputs[0], out);
} }
void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline, void MKLDNNPoolLayer::resetBwd(std::vector<primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
std::shared_ptr<pool_bwd::primitive_desc> pd; std::shared_ptr<pool_bwd::primitive_desc> pd;
resetBwdBuffers(in, out); resetBwdBuffers(inputs[0], out);
resetBwdPD(pd, in, out); resetBwdPD(pd, inputs[0], out);
resetBwdPipeline(pipeline, pd, in, out); resetBwdPipeline(pipeline, pd, inputs[0], out);
} }
void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in,
...@@ -151,9 +148,9 @@ void MKLDNNPoolLayer::resetFwdPipeline( ...@@ -151,9 +148,9 @@ void MKLDNNPoolLayer::resetFwdPipeline(
void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
CHECK(inVal_ && outVal_); CHECK(inVals_[0] && outVal_);
resetOutGrad(out, outVal_->getPrimitiveDesc()); resetOutGrad(out, outVal_->getPrimitiveDesc());
resetInGrad(in, inVal_->getPrimitiveDesc()); resetInGrad(in, inVals_[0]->getPrimitiveDesc());
} }
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd, void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
......
...@@ -53,18 +53,14 @@ public: ...@@ -53,18 +53,14 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
void reshape( void reshape(
int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override;
void resetFwd(std::vector<mkldnn::primitive>& pipeline, void resetFwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void resetBwd(std::vector<mkldnn::primitive>& pipeline, void resetBwd(std::vector<mkldnn::primitive>& pipeline,
MKLDNNMatrixPtr& in, std::vector<MKLDNNMatrixPtr>& inputs,
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) override; MKLDNNMatrixPtr& out) override;
void printSizeInfo() override { void printSizeInfo() override {
...@@ -75,11 +71,6 @@ public: ...@@ -75,11 +71,6 @@ public:
} }
protected: protected:
/**
* Forward functions: reset buffers(input, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
void resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd, void resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr in, MKLDNNMatrixPtr in,
...@@ -88,12 +79,6 @@ protected: ...@@ -88,12 +79,6 @@ protected:
std::shared_ptr<pool_fwd::primitive_desc>& pd, std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out); MKLDNNMatrixPtr& out);
/**
* Backward functions: reset buffers(input, output),
* reset primitive descriptor,
* reset pipeline.
*/
void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out);
void resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd, void resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
......
...@@ -184,6 +184,7 @@ set(DEPS_OPS ...@@ -184,6 +184,7 @@ set(DEPS_OPS
sequence_softmax_op sequence_softmax_op
sum_op sum_op
pool_op pool_op
maxout_op
pool_with_index_op pool_with_index_op
conv_op conv_op
conv_transpose_op conv_transpose_op
...@@ -210,6 +211,7 @@ op_library(sgd_op DEPS selected_rows_functor) ...@@ -210,6 +211,7 @@ op_library(sgd_op DEPS selected_rows_functor)
op_library(adagrad_op DEPS selected_rows_functor) op_library(adagrad_op DEPS selected_rows_functor)
op_library(conv_op DEPS vol2col) op_library(conv_op DEPS vol2col)
op_library(pool_op DEPS pooling) op_library(pool_op DEPS pooling)
op_library(maxout_op DEPS maxouting)
op_library(pool_with_index_op DEPS pooling) op_library(pool_with_index_op DEPS pooling)
op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table) op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table)
op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op) op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op)
......
...@@ -40,7 +40,8 @@ REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad, ...@@ -40,7 +40,8 @@ REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops::ConvOpGrad); ops::ConvOpGrad);
REGISTER_OP_CPU_KERNEL(conv_cudnn, REGISTER_OP_CPU_KERNEL(conv_cudnn,
ops::GemmConvKernel<paddle::platform::CPUPlace, float>); ops::GemmConvKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv_cudnn_grad, conv_cudnn_grad, ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>); ops::GemmConvGradKernel<paddle::platform::CPUPlace, double>);
...@@ -259,6 +259,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> { ...@@ -259,6 +259,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel<float>); REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>);
REGISTER_OP_GPU_KERNEL(conv_cudnn_grad, REGISTER_OP_GPU_KERNEL(conv_cudnn_grad,
paddle::operators::CudnnConvGradOpKernel<float>); paddle::operators::CudnnConvGradOpKernel<float>,
paddle::operators::CudnnConvGradOpKernel<double>);
...@@ -61,10 +61,12 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp, ...@@ -61,10 +61,12 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn, conv2d_transpose_cudnn,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn_grad, conv2d_transpose_cudnn_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad, ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad,
...@@ -72,7 +74,9 @@ REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, ...@@ -72,7 +74,9 @@ REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv3d_transpose_cudnn, conv3d_transpose_cudnn,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv3d_transpose_cudnn_grad, conv3d_transpose_cudnn_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, double>);
...@@ -235,11 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> { ...@@ -235,11 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn, REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn,
ops::CudnnConvTransposeOpKernel<float>); ops::CudnnConvTransposeOpKernel<float>,
ops::CudnnConvTransposeOpKernel<double>);
REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn_grad, REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn_grad,
ops::CudnnConvTransposeGradOpKernel<float>); ops::CudnnConvTransposeGradOpKernel<float>,
ops::CudnnConvTransposeGradOpKernel<double>);
REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn, REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn,
ops::CudnnConvTransposeOpKernel<float>); ops::CudnnConvTransposeOpKernel<float>,
ops::CudnnConvTransposeOpKernel<double>);
REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn_grad, REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn_grad,
ops::CudnnConvTransposeGradOpKernel<float>); ops::CudnnConvTransposeGradOpKernel<float>,
ops::CudnnConvTransposeGradOpKernel<double>);
...@@ -14,6 +14,7 @@ if(WITH_GPU) ...@@ -14,6 +14,7 @@ if(WITH_GPU)
nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context)
nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function)
nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context)
else() else()
cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto) cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto)
cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function)
...@@ -26,6 +27,7 @@ else() ...@@ -26,6 +27,7 @@ else()
cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context)
cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions)
cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function) cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function)
cc_library(maxouting SRCS maxouting.cc DEPS device_context)
endif() endif()
cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/maxouting.h"
namespace paddle {
namespace operators {
namespace math {
// All tensors are in NCHW format, and the groups must be greater than 1
template <typename T>
class MaxOutFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * output,
int groups) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output->dims()[1];
int fea_size = input_height * input_width;
// c_size means the output size of each sample
int c_size = fea_size * output_channels;
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; ++i) {
int new_bindex = c_size * i;
for (int c = 0; c < output_channels; ++c) {
int new_cindex = fea_size * c;
for (int f = 0; f < fea_size; ++f) {
T ele = static_cast<T>(-FLT_MAX);
for (int ph = 0; ph < groups; ++ph) {
T x = input_data[(new_bindex + new_cindex) * groups
+ ph * fea_size + f];
ele = ele > x ? ele : x;
}
output_data[(new_bindex+new_cindex+f)] = ele;
}
}
}
}
};
template <class T>
class MaxOutGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * input_grad,
const framework::Tensor& output,
const framework::Tensor& output_grad,
int groups) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
int fea_size = input_height * input_width;
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; ++i) {
int blen = fea_size * output_channels * i;
for (int c = 0; c < output_channels; ++c) {
int clen = fea_size * c;
for (int f = 0; f < fea_size; ++f) {
int input_idx0 = (blen + clen) * groups + f;
bool continue_match = true;
int output_idx = blen + clen + f;
for (int g = 0; g < groups && continue_match; ++g) {
int input_idx = input_idx0 + fea_size * g;
if (input_data[input_idx] == output_data[output_idx]) {
input_grad_data[input_idx] += output_grad_data[output_idx];
continue_match = false;
}
}
}
}
}
}
};
template class MaxOutGradFunctor<platform::CPUPlace, float>;
template class MaxOutGradFunctor<platform::CPUPlace, double>;
template class MaxOutFunctor<platform::CPUPlace, float>;
template class MaxOutFunctor<platform::CPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 paddlepaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/maxouting.h"
#include "paddle/platform/cuda_helper.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
__global__ void KernelMaxOut(const int nthreads, const T* input_data,
const int channels,
const int input_height, const int input_width,
int groups, T* output_data ) {
const int size = input_height * input_width * channels / groups;
const int feat_len = input_height * input_width;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) {
int batch_idx = i / size;
int batch_offset = i % size;
int channel_idx = batch_offset / feat_len;
int feat_idx = batch_offset % feat_len;
int data_idx =
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
T ele = static_cast<T>(-FLT_MAX);
for (int g = 0; g < groups; ++g) {
T x = input_data[data_idx + g * feat_len];
ele = ele > x ? ele : x;
}
output_data[i] = ele;
}
}
template <typename T>
__global__ void KernelMaxoutGrad(
const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels,
const int input_height, const int input_width, int groups) {
const int size = input_height * input_width * channels / groups;
const int feat_len = input_height * input_width;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int offset = blockDim.x * gridDim.x;
for (int i = index; i < nthreads; i += offset) {
int batch_idx = i / size;
int batch_offset = i % size;
int channel_idx = batch_offset / feat_len;
int feat_idx = batch_offset % feat_len;
int data_idx =
(batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
int max_index = -1;
bool continue_match = true;
for (int g = 0; g < groups && continue_match; ++g) {
if (input_data[data_idx + g * feat_len] == output_data[i]) {
max_index = data_idx + g * feat_len;
continue_match = false;
break;
}
}
if (max_index != -1) {
input_grad[max_index] += output_grad[index];
}
}
}
/*
* All tensors are in NCHW format.
*/
template <typename T>
class MaxOutFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor * output,
int groups) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output->dims()[1];
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = output->numel();
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelMaxOut<
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, input_channels,
input_height, input_width, groups,
output_data);
}
};
/*
* All tensors are in NCHW format.
*/
template <typename T>
class MaxOutGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * input_grad,
const framework::Tensor& output,
const framework::Tensor& output_grad,
int groups) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = output.numel();
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelMaxoutGrad<
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data,
input_channels, input_height, input_width, groups);
}
};
template class MaxOutGradFunctor<platform::GPUPlace, float>;
template class MaxOutGradFunctor<platform::GPUPlace, double>;
template class MaxOutFunctor<platform::GPUPlace, float>;
template class MaxOutFunctor<platform::GPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/hostdevice.h"
namespace paddle {
namespace operators {
namespace math {
#define FLT_MAX \
__FLT_MAX__
template <typename Place, typename T>
class MaxOutFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor * output,
int groups);
};
template <typename Place, class T>
class MaxOutGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input,
framework::Tensor * input_grad,
const framework::Tensor& output,
const framework::Tensor& output_grad, int groups);
};
} // namespace math
} // namespace operators
} // namespace paddle
...@@ -498,8 +498,8 @@ template class Pool3dGradFunctor< ...@@ -498,8 +498,8 @@ template class Pool3dGradFunctor<
* Ksize, strides, paddings are two elements. These two elements represent * Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively. * height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { class MaxPool2dWithIndexFunctor<platform::CPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, std::vector<int>& ksize, const framework::Tensor& input, std::vector<int>& ksize,
...@@ -520,9 +520,9 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -520,9 +520,9 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
const int input_stride = input_height * input_width; const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
const T* input_data = input.data<T>(); const T1* input_data = input.data<T1>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T1* output_data = output->mutable_data<T1>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace()); T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -535,7 +535,7 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -535,7 +535,7 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
int wend = std::min(wstart + ksize_width, input_width); int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
T ele = static_cast<T>(-FLT_MAX); T1 ele = static_cast<T1>(-FLT_MAX);
int index = -1; int index = -1;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
...@@ -563,8 +563,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -563,8 +563,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
* Ksize, strides, paddings are two elements. These two elements represent * Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively. * height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> { class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
...@@ -580,9 +580,9 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -580,9 +580,9 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
const int input_stride = input_height * input_width; const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
const T* mask_data = mask.data<T>(); const T2* mask_data = mask.data<T2>();
const T* output_grad_data = output_grad.data<T>(); const T1* output_grad_data = output_grad.data<T1>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T1* input_grad_data = input_grad->mutable_data<T1>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) { for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -602,18 +602,18 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -602,18 +602,18 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
} }
}; };
template class MaxPool2dWithIndexFunctor<platform::CPUPlace, float>; template class MaxPool2dWithIndexFunctor<platform::CPUPlace, float, int>;
template class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, float>; template class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, float, int>;
template class MaxPool2dWithIndexFunctor<platform::CPUPlace, double>; template class MaxPool2dWithIndexFunctor<platform::CPUPlace, double, int>;
template class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, double>; template class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, double, int>;
/* /*
* All tensors are in NCDHW format. * All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent * Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively. * depth, height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { class MaxPool3dWithIndexFunctor<platform::CPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, std::vector<int>& ksize, const framework::Tensor& input, std::vector<int>& ksize,
...@@ -639,9 +639,9 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -639,9 +639,9 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
const int input_stride = input_depth * input_height * input_width; const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width; const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>(); const T1* input_data = input.data<T1>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T1* output_data = output->mutable_data<T1>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace()); T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -659,7 +659,7 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -659,7 +659,7 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
int output_idx = (pd * output_height + ph) * output_width + pw; int output_idx = (pd * output_height + ph) * output_width + pw;
T ele = static_cast<T>(-FLT_MAX); T1 ele = static_cast<T1>(-FLT_MAX);
int index = -1; int index = -1;
for (int d = dstart; d < dend; ++d) { for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -691,8 +691,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -691,8 +691,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
* Ksize, strides, paddings are three elements. These three elements represent * Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively. * depth, height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> { class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
...@@ -710,9 +710,9 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -710,9 +710,9 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
const int input_stride = input_depth * input_height * input_width; const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width; const int output_stride = output_depth * output_height * output_width;
const T* mask_data = mask.data<T>(); const T2* mask_data = mask.data<T2>();
const T* output_grad_data = output_grad.data<T>(); const T1* output_grad_data = output_grad.data<T1>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T1* input_grad_data = input_grad->mutable_data<T1>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) { for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -735,10 +735,10 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -735,10 +735,10 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
} }
}; };
template class MaxPool3dWithIndexFunctor<platform::CPUPlace, float>; template class MaxPool3dWithIndexFunctor<platform::CPUPlace, float, int>;
template class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, float>; template class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, float, int>;
template class MaxPool3dWithIndexFunctor<platform::CPUPlace, double>; template class MaxPool3dWithIndexFunctor<platform::CPUPlace, double, int>;
template class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, double>; template class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, double, int>;
} // namespace math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -658,13 +658,13 @@ template class Pool3dGradFunctor< ...@@ -658,13 +658,13 @@ template class Pool3dGradFunctor<
template class Pool3dGradFunctor< template class Pool3dGradFunctor<
platform::GPUPlace, paddle::operators::math::AvgPoolGrad<double>, double>; platform::GPUPlace, paddle::operators::math::AvgPoolGrad<double>, double>;
template <typename T> template <typename T1, typename T2>
__global__ void KernelMaxPool2dWithIdx( __global__ void KernelMaxPool2dWithIdx(
const int nthreads, const T* input_data, const int channels, const int nthreads, const T1* input_data, const int channels,
const int input_height, const int input_width, const int output_height, const int input_height, const int input_width, const int output_height,
const int output_width, const int ksize_height, const int ksize_width, const int output_width, const int ksize_height, const int ksize_width,
const int stride_height, const int stride_width, const int padding_height, const int stride_height, const int stride_width, const int padding_height,
const int padding_width, T* output_data, T* mask_data) { const int padding_width, T1* output_data, T2* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -681,7 +681,7 @@ __global__ void KernelMaxPool2dWithIdx( ...@@ -681,7 +681,7 @@ __global__ void KernelMaxPool2dWithIdx(
wstart = max(wstart, 0); wstart = max(wstart, 0);
input_data += (batch_idx * channels + c) * input_height * input_width; input_data += (batch_idx * channels + c) * input_height * input_width;
T ele = -FLT_MAX; T1 ele = -FLT_MAX;
int max_index = -1; int max_index = -1;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
...@@ -697,13 +697,13 @@ __global__ void KernelMaxPool2dWithIdx( ...@@ -697,13 +697,13 @@ __global__ void KernelMaxPool2dWithIdx(
} }
} }
template <typename T> template <typename T1, typename T2>
__global__ void KernelMaxPool2DWithIdxGrad( __global__ void KernelMaxPool2DWithIdxGrad(
const int nthreads, const T* output_grad, const T* mask_data, const int nthreads, const T1* output_grad, const T2* mask_data,
const int channels, const int input_height, const int input_width, const int channels, const int input_height, const int input_width,
const int output_height, const int output_width, const int ksize_height, const int output_height, const int output_width, const int ksize_height,
const int ksize_width, const int stride_height, const int stride_width, const int ksize_width, const int stride_height, const int stride_width,
const int padding_height, const int padding_width, T* input_grad) { const int padding_height, const int padding_width, T1* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
...@@ -724,7 +724,7 @@ __global__ void KernelMaxPool2DWithIdxGrad( ...@@ -724,7 +724,7 @@ __global__ void KernelMaxPool2DWithIdxGrad(
int pw_end = int pw_end =
min((w_offset + padding_width) / stride_width + 1, output_width); min((w_offset + padding_width) / stride_width + 1, output_width);
T gradient = 0; T1 gradient = 0;
int input_current_featuremap_idx = h_offset * input_width + w_offset; int input_current_featuremap_idx = h_offset * input_width + w_offset;
int output_idx = int output_idx =
(batch_idx * channels + c_offset) * output_height * output_width; (batch_idx * channels + c_offset) * output_height * output_width;
...@@ -746,8 +746,8 @@ __global__ void KernelMaxPool2DWithIdxGrad( ...@@ -746,8 +746,8 @@ __global__ void KernelMaxPool2DWithIdxGrad(
* Ksize, strides, paddings are two elements. These two elements represent * Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively. * height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { class MaxPool2dWithIndexFunctor<platform::GPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, std::vector<int>& ksize, const framework::Tensor& input, std::vector<int>& ksize,
...@@ -767,9 +767,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -767,9 +767,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_height = paddings[0]; const int padding_height = paddings[0];
const int padding_width = paddings[1]; const int padding_width = paddings[1];
const T* input_data = input.data<T>(); const T1* input_data = input.data<T1>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T1* output_data = output->mutable_data<T1>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace()); T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -777,9 +777,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -777,9 +777,9 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
dim3 grid(blocks, 1); dim3 grid(blocks, 1);
KernelMaxPool2dWithIdx< KernelMaxPool2dWithIdx<
T><<<grid, threads, 0, T1, T2><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, input_channels, input_height, input_width, nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, ksize_width, stride_height, output_height, output_width, ksize_height, ksize_width, stride_height,
stride_width, padding_height, padding_width, output_data, mask_data); stride_width, padding_height, padding_width, output_data, mask_data);
...@@ -791,8 +791,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -791,8 +791,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
* Ksize, strides, paddings are two elements. These two elements represent * Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively. * height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
...@@ -812,9 +812,9 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -812,9 +812,9 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
const int padding_height = paddings[0]; const int padding_height = paddings[0];
const int padding_width = paddings[1]; const int padding_width = paddings[1];
const T* mask_data = mask.data<T>(); const T2* mask_data = mask.data<T2>();
const T* output_grad_data = output_grad.data<T>(); const T1* output_grad_data = output_grad.data<T1>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T1* input_grad_data = input_grad->mutable_data<T1>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width; int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -822,30 +822,30 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -822,30 +822,30 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
dim3 grid(blocks, 1); dim3 grid(blocks, 1);
KernelMaxPool2DWithIdxGrad< KernelMaxPool2DWithIdxGrad<
T><<<grid, threads, 0, T1, T2><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, output_grad_data, mask_data, .stream()>>>(
input_channels, input_height, input_width, nthreads, output_grad_data, mask_data, input_channels, input_height,
output_height, output_width, ksize_height, input_width, output_height, output_width, ksize_height, ksize_width,
ksize_width, stride_height, stride_width, stride_height, stride_width, padding_height, padding_width,
padding_height, padding_width, input_grad_data); input_grad_data);
} }
}; };
template class MaxPool2dWithIndexFunctor<platform::GPUPlace, float>; template class MaxPool2dWithIndexFunctor<platform::GPUPlace, float, int>;
template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, float>; template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, float, int>;
template class MaxPool2dWithIndexFunctor<platform::GPUPlace, double>; template class MaxPool2dWithIndexFunctor<platform::GPUPlace, double, int>;
template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>; template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double, int>;
template <typename T> template <typename T1, typename T2>
__global__ void KernelMaxPool3DWithIdx( __global__ void KernelMaxPool3DWithIdx(
const int nthreads, const T* input_data, const int channels, const int nthreads, const T1* input_data, const int channels,
const int input_depth, const int input_height, const int input_width, const int input_depth, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width, const int output_depth, const int output_height, const int output_width,
const int ksize_depth, const int ksize_height, const int ksize_width, const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width, const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width, const int padding_depth, const int padding_height, const int padding_width,
T* output_data, T* mask_data) { T1* output_data, T2* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -865,7 +865,7 @@ __global__ void KernelMaxPool3DWithIdx( ...@@ -865,7 +865,7 @@ __global__ void KernelMaxPool3DWithIdx(
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
T ele = -FLT_MAX; T1 ele = -FLT_MAX;
int max_index = -1; int max_index = -1;
input_data += input_data +=
(batch_idx * channels + c) * input_depth * input_height * input_width; (batch_idx * channels + c) * input_depth * input_height * input_width;
...@@ -885,15 +885,15 @@ __global__ void KernelMaxPool3DWithIdx( ...@@ -885,15 +885,15 @@ __global__ void KernelMaxPool3DWithIdx(
} }
} }
template <typename T> template <typename T1, typename T2>
__global__ void KernelMaxPool3DWithIdxGrad( __global__ void KernelMaxPool3DWithIdxGrad(
const int nthreads, const T* output_grad, const T* mask, const int channels, const int nthreads, const T1* output_grad, const T2* mask,
const int input_depth, const int input_height, const int input_width, const int channels, const int input_depth, const int input_height,
const int output_depth, const int output_height, const int output_width, const int input_width, const int output_depth, const int output_height,
const int ksize_depth, const int ksize_height, const int ksize_width, const int output_width, const int ksize_depth, const int ksize_height,
const int stride_depth, const int stride_height, const int stride_width, const int ksize_width, const int stride_depth, const int stride_height,
const int padding_depth, const int padding_height, const int padding_width, const int stride_width, const int padding_depth, const int padding_height,
T* input_grad) { const int padding_width, T1* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
...@@ -922,7 +922,7 @@ __global__ void KernelMaxPool3DWithIdxGrad( ...@@ -922,7 +922,7 @@ __global__ void KernelMaxPool3DWithIdxGrad(
int pw_end = int pw_end =
min((w_offset + padding_width) / stride_width + 1, output_width); min((w_offset + padding_width) / stride_width + 1, output_width);
T gradient = 0; T1 gradient = 0;
int input_current_feature_map_idx = int input_current_feature_map_idx =
(d_offset * input_height + h_offset) * input_width + w_offset; (d_offset * input_height + h_offset) * input_width + w_offset;
int output_idx = (batch_idx * channels + c_offset) * output_depth * int output_idx = (batch_idx * channels + c_offset) * output_depth *
...@@ -949,8 +949,8 @@ __global__ void KernelMaxPool3DWithIdxGrad( ...@@ -949,8 +949,8 @@ __global__ void KernelMaxPool3DWithIdxGrad(
* Ksize, strides, paddings are three elements. These three elements represent * Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively. * depth, height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { class MaxPool3dWithIndexFunctor<platform::GPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, std::vector<int>& ksize, const framework::Tensor& input, std::vector<int>& ksize,
...@@ -975,9 +975,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -975,9 +975,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_height = paddings[1]; const int padding_height = paddings[1];
const int padding_width = paddings[2]; const int padding_width = paddings[2];
const T* input_data = input.data<T>(); const T1* input_data = input.data<T1>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T1* output_data = output->mutable_data<T1>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace()); T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height * int nthreads = batch_size * output_channels * output_depth * output_height *
output_width; output_width;
...@@ -986,9 +986,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -986,9 +986,9 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
dim3 grid(blocks, 1); dim3 grid(blocks, 1);
KernelMaxPool3DWithIdx< KernelMaxPool3DWithIdx<
T><<<grid, threads, 0, T1, T2><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, input_channels, input_depth, input_height, nthreads, input_data, input_channels, input_depth, input_height,
input_width, output_depth, output_height, output_width, ksize_depth, input_width, output_depth, output_height, output_width, ksize_depth,
ksize_height, ksize_width, stride_depth, stride_height, stride_width, ksize_height, ksize_width, stride_depth, stride_height, stride_width,
...@@ -1001,8 +1001,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -1001,8 +1001,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
* Ksize, strides, paddings are three elements. These three elements represent * Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively. * depth, height and width, respectively.
*/ */
template <typename T> template <typename T1, typename T2>
class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T1, T2> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
...@@ -1027,9 +1027,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -1027,9 +1027,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
const int padding_height = paddings[1]; const int padding_height = paddings[1];
const int padding_width = paddings[2]; const int padding_width = paddings[2];
const T* output_grad_data = output_grad.data<T>(); const T1* output_grad_data = output_grad.data<T1>();
const T* mask_data = mask.data<T>(); const T2* mask_data = mask.data<T2>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T1* input_grad_data = input_grad->mutable_data<T1>(context.GetPlace());
int nthreads = int nthreads =
batch_size * input_channels * input_depth * input_height * input_width; batch_size * input_channels * input_depth * input_height * input_width;
...@@ -1038,9 +1038,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -1038,9 +1038,9 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
dim3 grid(blocks, 1); dim3 grid(blocks, 1);
KernelMaxPool3DWithIdxGrad< KernelMaxPool3DWithIdxGrad<
T><<<grid, threads, 0, T1, T2><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, output_grad_data, mask_data, input_channels, input_depth, nthreads, output_grad_data, mask_data, input_channels, input_depth,
input_height, input_width, output_depth, output_height, output_width, input_height, input_width, output_depth, output_height, output_width,
ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height,
...@@ -1049,10 +1049,10 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -1049,10 +1049,10 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
} }
}; };
template class MaxPool3dWithIndexFunctor<platform::GPUPlace, float>; template class MaxPool3dWithIndexFunctor<platform::GPUPlace, float, int>;
template class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, float>; template class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, float, int>;
template class MaxPool3dWithIndexFunctor<platform::GPUPlace, double>; template class MaxPool3dWithIndexFunctor<platform::GPUPlace, double, int>;
template class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, double>; template class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, double, int>;
} // namespace math } // namespace math
} // namespace operators } // namespace operators
......
...@@ -153,7 +153,7 @@ class MaxPool3dGradFunctor { ...@@ -153,7 +153,7 @@ class MaxPool3dGradFunctor {
* In pool2d, all tensors are in NCHW format. In pool3d, all tensors are in * In pool2d, all tensors are in NCHW format. In pool3d, all tensors are in
* NCDHW format. * NCDHW format.
*/ */
template <typename Place, typename T> template <typename Place, typename T1, typename T2>
class MaxPool2dWithIndexFunctor { class MaxPool2dWithIndexFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
...@@ -162,7 +162,7 @@ class MaxPool2dWithIndexFunctor { ...@@ -162,7 +162,7 @@ class MaxPool2dWithIndexFunctor {
framework::Tensor* output, framework::Tensor* mask); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename Place, typename T> template <typename Place, typename T1, typename T2>
class MaxPool2dWithIndexGradFunctor { class MaxPool2dWithIndexGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
...@@ -172,7 +172,7 @@ class MaxPool2dWithIndexGradFunctor { ...@@ -172,7 +172,7 @@ class MaxPool2dWithIndexGradFunctor {
framework::Tensor* input_grad); framework::Tensor* input_grad);
}; };
template <typename Place, typename T> template <typename Place, typename T1, typename T2>
class MaxPool3dWithIndexFunctor { class MaxPool3dWithIndexFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
...@@ -181,7 +181,7 @@ class MaxPool3dWithIndexFunctor { ...@@ -181,7 +181,7 @@ class MaxPool3dWithIndexFunctor {
framework::Tensor* output, framework::Tensor* mask); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename Place, typename T> template <typename Place, typename T1, typename T2>
class MaxPool3dWithIndexGradFunctor { class MaxPool3dWithIndexGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/operators/maxout_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MaxOutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(Tensor) The input tensor of maxout operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of feature.");
AddOutput("Out",
"(Tensor) The output tensor of maxout operator."
"The format of output tensor is also NCHW."
"Where N is batch size, C is "
"the number of channels, H and W is the height and "
"width of feature.");
AddAttr<int>(
"groups",
R"DOC("Specifies how many groups the input tensor will be split"
"in the channel dimension. And the number of output channel is "
"the number of channels divided by groups.."
)DOC");
AddComment(R"DOC(
Assumed the input shape is (N, Ci, H, W).
The output shape is (N, Co, H, W). Then `Co = Ci / groups`.
math:
y_{si+j} = \max_k x_{gsi + sk + j}
g = groups
s = input.size / num_channels
0 \le i < num_channels / groups
0 \le j < s
0 \le k < groups
Please refer to Paper:
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
)DOC");
}
};
class MaxOutOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MaxoutOp"
"should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of MaxoutOp should not be null.");
auto in_x_dims = ctx->GetInputDim("X");
int groups = ctx->Attrs().Get<int>("groups");
// check groups > 1
PADDLE_ENFORCE_GT(
groups, 1,
"groups should be larger than 1 in maxoutop");
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1] / groups});
output_shape.push_back(in_x_dims[2]);
output_shape.push_back(in_x_dims[3]);
ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
}
};
class MaxOutOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, maxout_grad,
ops::MaxOutOpGrad);
REGISTER_OP_CPU_KERNEL(maxout, ops::MaxOutKernel<paddle::platform::CPUPlace,
float>);
REGISTER_OP_CPU_KERNEL(maxout_grad,
ops::MaxOutGradKernel<paddle::platform::CPUPlace,
float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/maxout_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(maxout,
ops::MaxOutKernel<paddle::platform::GPUPlace, float>,
ops::MaxOutKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(maxout_grad,
ops::MaxOutGradKernel<paddle::platform::GPUPlace,
float>,
ops::MaxOutGradKernel<paddle::platform::GPUPlace,
double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/maxouting.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename Place, typename T>
class MaxOutKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X");
Tensor* out = context.Output<Tensor>("Out");
int groups = context.template Attr<int>("groups");
math::MaxOutFunctor<Place, T> maxout_forward;
maxout_forward(context.device_context(), *in_x, out, groups);
}
};
template <typename Place, typename T>
class MaxOutGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X");
const Tensor* out = context.Input<Tensor>("Out");
const Tensor* out_grad =
context.Input<Tensor>(framework::GradVarName("Out"));
Tensor* in_x_grad = context.Output<Tensor>(framework::GradVarName("X"));
int groups = context.template Attr<int>("groups");
auto& device_ctx = context.device_context();
math::SetConstant<Place, T> zero;
if (in_x_grad) {
in_x_grad->mutable_data<T>(context.GetPlace());
zero(device_ctx, in_x_grad, static_cast<T>(0.0));
math::MaxOutGradFunctor<Place, T> maxout_backward;
maxout_backward(context.device_context(), *in_x, in_x_grad, *out,
*out_grad, groups);
}
}
};
} // namespace operators
} // namespace paddle
...@@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad, ...@@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
ops::PoolOpGrad); ops::PoolOpGrad);
REGISTER_OP_CPU_KERNEL(pool2d_cudnn, REGISTER_OP_CPU_KERNEL(pool2d_cudnn,
ops::PoolKernel<paddle::platform::CPUPlace, float>); ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad, REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>) ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>)
REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad,
ops::PoolOpGrad);
REGISTER_OP_CPU_KERNEL(pool3d_cudnn,
ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool3d_cudnn_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>)
...@@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> { ...@@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor input_desc; ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc; ScopedTensorDescriptor output_desc;
ScopedPoolingDescriptor pool_desc; ScopedPoolingDescriptor pool_desc;
DataLayout layout = DataLayout::kNCHW; DataLayout layout;
if (strides.size() == 2U) {
layout = DataLayout::kNCHW;
} else {
layout = DataLayout::kNCDHW;
}
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims())); layout, framework::vectorize2int(input->dims()));
...@@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> { ...@@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
ScopedTensorDescriptor input_desc; ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc; ScopedTensorDescriptor output_desc;
ScopedPoolingDescriptor pool_desc; ScopedPoolingDescriptor pool_desc;
DataLayout layout = DataLayout::kNCHW; DataLayout layout;
if (strides.size() == 2U) {
layout = DataLayout::kNCHW;
} else {
layout = DataLayout::kNCDHW;
}
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>( cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims())); layout, framework::vectorize2int(input->dims()));
...@@ -150,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> { ...@@ -150,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>); REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>,
REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>); ops::PoolCudnnOpKernel<double>);
REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
ops::PoolCudnnGradOpKernel<double>);
REGISTER_OP_GPU_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel<float>,
ops::PoolCudnnOpKernel<double>);
REGISTER_OP_GPU_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
ops::PoolCudnnGradOpKernel<double>);
...@@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad, ...@@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad,
ops::PoolOpGrad); ops::PoolOpGrad);
REGISTER_OP_CPU_KERNEL(pool2d, REGISTER_OP_CPU_KERNEL(pool2d,
ops::PoolKernel<paddle::platform::CPUPlace, float>); ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool2d_grad, REGISTER_OP_CPU_KERNEL(pool2d_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>) ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>)
REGISTER_OP(pool3d, ops::PoolOp, ops::Pool3dOpMaker, pool3d_grad, REGISTER_OP(pool3d, ops::PoolOp, ops::Pool3dOpMaker, pool3d_grad,
ops::PoolOpGrad); ops::PoolOpGrad);
REGISTER_OP_CPU_KERNEL(pool3d, REGISTER_OP_CPU_KERNEL(pool3d,
ops::PoolKernel<paddle::platform::CPUPlace, float>); ops::PoolKernel<paddle::platform::CPUPlace, float>,
ops::PoolKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(pool3d_grad, REGISTER_OP_CPU_KERNEL(pool3d_grad,
ops::PoolGradKernel<paddle::platform::CPUPlace, float>); ops::PoolGradKernel<paddle::platform::CPUPlace, float>,
ops::PoolGradKernel<paddle::platform::CPUPlace, double>);
...@@ -17,11 +17,15 @@ limitations under the License. */ ...@@ -17,11 +17,15 @@ limitations under the License. */
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(pool2d, REGISTER_OP_GPU_KERNEL(pool2d,
ops::PoolKernel<paddle::platform::GPUPlace, float>); ops::PoolKernel<paddle::platform::GPUPlace, float>,
ops::PoolKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(pool2d_grad, REGISTER_OP_GPU_KERNEL(pool2d_grad,
ops::PoolGradKernel<paddle::platform::GPUPlace, float>); ops::PoolGradKernel<paddle::platform::GPUPlace, float>,
ops::PoolGradKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(pool3d, REGISTER_OP_GPU_KERNEL(pool3d,
ops::PoolKernel<paddle::platform::GPUPlace, float>); ops::PoolKernel<paddle::platform::GPUPlace, float>,
ops::PoolKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(pool3d_grad, REGISTER_OP_GPU_KERNEL(pool3d_grad,
ops::PoolGradKernel<paddle::platform::GPUPlace, float>); ops::PoolGradKernel<paddle::platform::GPUPlace, float>,
ops::PoolGradKernel<paddle::platform::GPUPlace, double>);
...@@ -29,11 +29,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -29,11 +29,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"X(Input) of Pooling should not be null."); "Input(X) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Out(Output) of Pooling should not be null."); "Output(Out) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Mask"), PADDLE_ENFORCE(ctx->HasOutput("Mask"),
"Mask(Output) of Pooling should not be null."); "Output(Mask) of Pooling should not be null.");
auto in_x_dims = ctx->GetInputDim("X"); auto in_x_dims = ctx->GetInputDim("X");
...@@ -67,6 +67,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -67,6 +67,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
ctx->SetOutputDim("Mask", framework::make_ddim(output_shape)); ctx->SetOutputDim("Mask", framework::make_ddim(output_shape));
} }
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::Tensor>("X")->type()),
ctx.device_context());
}
}; };
class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
...@@ -80,6 +88,14 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { ...@@ -80,6 +88,14 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
"Input(X@GRAD) should not be null."); "Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
} }
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::Tensor>("X")->type()),
ctx.device_context());
}
}; };
class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
...@@ -116,7 +132,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -116,7 +132,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<bool>( AddAttr<bool>(
"global_pooling", "global_pooling",
"(bool, default false) Whether to use the global pooling. " "(bool, default:false) Whether to use the global pooling. "
"If global_pooling = true, ksize and paddings will be ignored.") "If global_pooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
...@@ -126,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -126,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector<int>, defalut {0, 0}), paddings(height, width) of pooling " "(vector<int>, defalut:{0, 0}), paddings(height, width) of pooling "
"operator. " "operator. "
"If global_pooling = true, paddings and will be ignored.") "If global_pooling = true, paddings and will be ignored.")
.SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
...@@ -250,10 +266,12 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp, ...@@ -250,10 +266,12 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
max_pool2d_with_index, max_pool2d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float>); ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float, int>,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, double, int>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
max_pool2d_with_index_grad, max_pool2d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float>) ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float, int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, double, int>)
REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
ops::MaxPool3dWithIndexOpMaker, max_pool3d_with_index_grad, ops::MaxPool3dWithIndexOpMaker, max_pool3d_with_index_grad,
...@@ -261,7 +279,9 @@ REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, ...@@ -261,7 +279,9 @@ REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
max_pool3d_with_index, max_pool3d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float>); ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, float, int>,
ops::MaxPoolWithIndexKernel<paddle::platform::CPUPlace, double, int>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
max_pool3d_with_index_grad, max_pool3d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float>) ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, float, int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUPlace, double, int>)
...@@ -18,14 +18,18 @@ namespace ops = paddle::operators; ...@@ -18,14 +18,18 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
max_pool2d_with_index, max_pool2d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float>); ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float, int>,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, double, int>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
max_pool2d_with_index_grad, max_pool2d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float>) ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float, int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, double, int>)
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
max_pool3d_with_index, max_pool3d_with_index,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float>); ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, float, int>,
ops::MaxPoolWithIndexKernel<paddle::platform::GPUPlace, double, int>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
max_pool3d_with_index_grad, max_pool3d_with_index_grad,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float>) ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, float, int>,
ops::MaxPoolWithIndexGradKernel<paddle::platform::GPUPlace, double, int>)
...@@ -24,8 +24,8 @@ namespace operators { ...@@ -24,8 +24,8 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename Place, typename T> template <typename Place, typename T1, typename T2>
class MaxPoolWithIndexKernel : public framework::OpKernel<T> { class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* in_x = context.Input<Tensor>("X"); const Tensor* in_x = context.Input<Tensor>("X");
...@@ -44,13 +44,13 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> { ...@@ -44,13 +44,13 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
switch (ksize.size()) { switch (ksize.size()) {
case 2: { case 2: {
paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T> paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T1, T2>
pool2d_forward; pool2d_forward;
pool2d_forward(context.device_context(), *in_x, ksize, strides, pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, out, mask); paddings, out, mask);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T> paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T1, T2>
pool3d_forward; pool3d_forward;
pool3d_forward(context.device_context(), *in_x, ksize, strides, pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, out, mask); paddings, out, mask);
...@@ -60,8 +60,8 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> { ...@@ -60,8 +60,8 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
} }
}; };
template <typename Place, typename T> template <typename Place, typename T1, typename T2>
class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> { class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* mask = context.Input<Tensor>("Mask"); const Tensor* mask = context.Input<Tensor>("Mask");
...@@ -80,19 +80,19 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> { ...@@ -80,19 +80,19 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
} }
if (in_x_grad) { if (in_x_grad) {
in_x_grad->mutable_data<T>(context.GetPlace()); in_x_grad->mutable_data<T1>(context.GetPlace());
auto& device_ctx = context.device_context(); auto& device_ctx = context.device_context();
math::set_constant(device_ctx, in_x_grad, 0); math::set_constant(device_ctx, in_x_grad, 0);
switch (ksize.size()) { switch (ksize.size()) {
case 2: { case 2: {
paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T> paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T1, T2>
pool2d_backward; pool2d_backward;
pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides, pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides,
paddings, in_x_grad); paddings, in_x_grad);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T> paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T1, T2>
pool3d_backward; pool3d_backward;
pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides, pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides,
paddings, in_x_grad); paddings, in_x_grad);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_slice_op.h"
namespace paddle {
namespace operators {
class SequenceSliceOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Offset"),
"Input(Offset) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Length"),
"Input(Length) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequenceSliceOp should not be null.");
auto input_dims = ctx->GetInputDim("X");
auto offset_dim = ctx->GetInputDim("Offset");
auto length_dim = ctx->GetInputDim("Length");
PADDLE_ENFORCE_EQ(
offset_dim.size(), 2UL,
"Only support one level sequence now, The rank of offset must be 2.");
PADDLE_ENFORCE_EQ(
length_dim.size(), 2UL,
"Only support one level sequence now, The rank of Length must be 2.");
// Initialize the output's dims to maximum,
// and re-set to real dims by the value of Offset and Length at kernel
ctx->SetOutputDim("Out", input_dims);
}
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
class SequenceSliceGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"The gradient of Out should not be null.");
PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")),
"The gradient of X should not be null.");
ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
}
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SequenceSliceOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(LoDTensor), "
"the input of SequenceSliceOp.");
AddInput("Offset",
"(Tensor), "
"a vector<int> to describe the offset of every input sequence for "
"sub sequence item.");
AddInput("Length",
"(Tensor), "
"a vector<int> to describe the length of every input sequence for "
"sub sequence item.");
AddOutput("Out",
"(LoDTensor), the output of SequenceSliceOp.");
AddComment(R"DOC(
Sequence slice operator
The operator crops a subsequence from given sequence with given start offset and subsequence length.
It only supports sequence (LoD Tensor with level number is 1).
- Case:
X = [[a1, a2;
b1, b2;
c1, c2]
[d1, d2;
e1, e2]]
LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2)
Offset = [[0], [1]]; Length = [[2], [1]]
Out = [[a1, a2;
b1, b2]
[e1, e2]]
LoD(Out) = {{0, 2, 3}}; Dims(Out) = (3, 2)
NOTE: The first dimension size of input, the size of offset and Length, should be equal. The offset start from 0.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sequence_slice, ops::SequenceSliceOp, ops::SequenceSliceOpMaker,
sequence_slice_grad, ops::SequenceSliceGradOp);
REGISTER_OP_CPU_KERNEL(
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_slice_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/strided_memcpy.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;
template <typename T>
inline LoD SequenceSliceLoD(const T& in, const int64_t* offset_data,
const int64_t* length_data) {
auto out_lod = in.lod();
size_t lod_offset = 0;
auto n = in.lod()[0].size() - 1;
out_lod[0][0] = 0;
for (size_t i = 0; i < n; ++i) {
lod_offset += length_data[i];
out_lod[0][i+1] = lod_offset;
}
return out_lod;
}
template <typename Place, typename T>
class SequenceSliceOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<LoDTensor>("X");
auto* offset = ctx.Input<Tensor>("Offset");
auto* length = ctx.Input<Tensor>("Length");
auto* out = ctx.Output<LoDTensor>("Out");
auto lod = in->lod();
auto n = lod[0].size() - 1;
PADDLE_ENFORCE_EQ(lod.size(), 1UL,
"Only support one level sequence now.");
PADDLE_ENFORCE_EQ(
n, static_cast<size_t>(length->dims()[0]),
"The size of input-sequence and length-array should be the same")
PADDLE_ENFORCE_EQ(
n, static_cast<size_t>(offset->dims()[0]),
"The size of input-sequence and offset-array should be the same")
const int64_t* offset_data = offset->data<int64_t>();
const int64_t* length_data = length->data<int64_t>();
framework::Tensor offset_cpu;
framework::Tensor length_cpu;
if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context());
offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context());
length_data = length_cpu.data<int64_t>();
}
for (size_t i = 0; i < n; ++i) {
PADDLE_ENFORCE_LT(0, offset_data[i],
"The offset[%d] must greater than zero.", i)
PADDLE_ENFORCE_LT(0, length_data[i],
"The length[%d] must greater than zero.", i)
PADDLE_ENFORCE_LT(
lod[0][i] + offset_data[i] + length_data[i],
lod[0][i + 1],
"The target tensor's length overflow.")
}
out->mutable_data<T>(ctx.GetPlace());
auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
auto out_dims = in->dims();
out_dims[0] = out_lod[0][out_lod[0].size() - 1];
out->Resize(out_dims);
out->set_lod(out_lod);
auto in_stride = framework::stride(in->dims());
auto out_stride = framework::stride(out->dims());
size_t out_offset = 0;
for (size_t i = 0; i < n; ++i) {
Tensor in_t =
in->Slice(static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] +
length_data[i]));
StridedMemcpy<T>(ctx.device_context(), in_t.data<T>(),
in_stride, in_t.dims(), out_stride,
out->data<T>() + out_offset);
out_offset += length_data[i] * in_stride[0];
}
}
};
template <typename Place, typename T>
class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<LoDTensor>("X");
auto* offset = ctx.Input<Tensor>("Offset");
auto* length = ctx.Input<Tensor>("Length");
auto* out_grad =
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
const int64_t* offset_data = offset->data<int64_t>();
const int64_t* length_data = length->data<int64_t>();
framework::Tensor offset_cpu;
framework::Tensor length_cpu;
if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context());
offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context());
length_data = length_cpu.data<int64_t>();
}
auto lod = in->lod();
auto out_lod = out_grad->lod();
if (x_grad) {
x_grad->mutable_data<T>(ctx.GetPlace());
x_grad->set_lod(in->lod());
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), x_grad, static_cast<T>(0));
auto out_grad_stride = framework::stride(out_grad->dims());
for (size_t i = 0; i < out_lod[0].size() - 1; ++i) {
Tensor out_grad_t =
out_grad->Slice(static_cast<int>(out_lod[0][i]),
static_cast<int>(out_lod[0][i + 1]));
auto out_grad_stride = framework::stride(out_grad_t.dims());
auto x_grad_stride = framework::stride(x_grad->dims());
Tensor x_grad_t = x_grad->Slice(
static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));
StridedMemcpy<T>(ctx.device_context(), out_grad_t.data<T>(),
out_grad_stride, out_grad_t.dims(), x_grad_stride,
x_grad_t.data<T>());
}
}
}
};
} // namespace operators
} // namespace paddle
...@@ -224,13 +224,15 @@ class ScopedConvolutionDescriptor { ...@@ -224,13 +224,15 @@ class ScopedConvolutionDescriptor {
PADDLE_ENFORCE_EQ(pads.size(), strides.size()); PADDLE_ENFORCE_EQ(pads.size(), strides.size());
PADDLE_ENFORCE_EQ(pads.size(), dilations.size()); PADDLE_ENFORCE_EQ(pads.size(), dilations.size());
#if CUDNN_VERSION < 6000 #if !CUDNN_VERSION_MIN(6, 0, 0)
// cudnn v5 does not support dilation conv, the argument is called upscale // cudnn v5 does not support dilation conv, the argument is called upscale
// instead of dilations and it is must be one. // instead of dilations and it is must be one.
for (size_t i = 0; i < dilations.size(); ++i) { for (size_t i = 0; i < dilations.size(); ++i) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
dilations[i], 1, dilations[i], 1,
"Dilations conv is not supported in this cuDNN version"); "Dilations conv is not supported in this cuDNN version(%d.%d.%d).",
CUDNN_VERSION / 1000, CUDNN_VERSION % 1000 / 100,
CUDNN_VERSION % 100);
} }
#endif #endif
......
...@@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { ...@@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) {
EXPECT_EQ(strides[2], 6); EXPECT_EQ(strides[2], 6);
EXPECT_EQ(strides[1], 36); EXPECT_EQ(strides[1], 36);
EXPECT_EQ(strides[0], 144); EXPECT_EQ(strides[0], 144);
// test tensor5d: ScopedTensorDescriptor
ScopedTensorDescriptor tensor5d_desc;
std::vector<int> shape_5d = {2, 4, 6, 6, 6};
auto desc_5d = tensor5d_desc.descriptor<float>(DataLayout::kNCDHW, shape_5d);
std::vector<int> dims_5d(5);
std::vector<int> strides_5d(5);
paddle::platform::dynload::cudnnGetTensorNdDescriptor(
desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data());
EXPECT_EQ(nd, 5);
for (size_t i = 0; i < dims_5d.size(); ++i) {
EXPECT_EQ(dims_5d[i], shape_5d[i]);
}
EXPECT_EQ(strides_5d[4], 1);
EXPECT_EQ(strides_5d[3], 6);
EXPECT_EQ(strides_5d[2], 36);
EXPECT_EQ(strides_5d[1], 216);
EXPECT_EQ(strides_5d[0], 864);
} }
TEST(CudnnHelper, ScopedFilterDescriptor) { TEST(CudnnHelper, ScopedFilterDescriptor) {
...@@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { ...@@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) {
for (size_t i = 0; i < shape.size(); ++i) { for (size_t i = 0; i < shape.size(); ++i) {
EXPECT_EQ(kernel[i], shape[i]); EXPECT_EQ(kernel[i], shape[i]);
} }
ScopedFilterDescriptor filter_desc_4d;
std::vector<int> shape_4d = {2, 3, 3, 3};
auto desc_4d = filter_desc.descriptor<float>(DataLayout::kNCDHW, shape_4d);
std::vector<int> kernel_4d(4);
paddle::platform::dynload::cudnnGetFilterNdDescriptor(
desc_4d, 4, &type, &format, &nd, kernel_4d.data());
EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
EXPECT_EQ(nd, 4);
for (size_t i = 0; i < shape_4d.size(); ++i) {
EXPECT_EQ(kernel_4d[i], shape_4d[i]);
}
} }
TEST(CudnnHelper, ScopedConvolutionDescriptor) { TEST(CudnnHelper, ScopedConvolutionDescriptor) {
......
...@@ -144,7 +144,7 @@ function gen_dockerfile() { ...@@ -144,7 +144,7 @@ function gen_dockerfile() {
DOCKERFILE_GPU_ENV="" DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO="" DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:\${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so" DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi fi
......
...@@ -11,7 +11,6 @@ add_unittest_without_exec(test_Trainer ...@@ -11,7 +11,6 @@ add_unittest_without_exec(test_Trainer
test_Trainer.cpp) test_Trainer.cpp)
add_test(NAME test_Trainer add_test(NAME test_Trainer
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py &&
${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_Trainer ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
......
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
TrainData(ProtoData(
files = 'trainer/tests/train_files.txt',
usage_ratio = 1.0,
))
TestData(ProtoData(
files = 'trainer/tests/test_files.txt'
))
default_initial_std(1)
default_decay_rate(4e-4)
default_device(0)
Inputs("features", "word", "pos", "chunk")
Outputs("crf")
Layer(
name = "features",
type = "data",
size = 4339,
)
Layer(
name = "word",
type = "data",
size = 478,
)
Layer(
name = "pos",
type = "data",
size = 45
)
Layer(
name = "chunk",
type = "data",
size = 23
)
Layer(
name = "output",
type = "mixed",
size = 23,
bias = False,
device = -1,
inputs = [
FullMatrixProjection("features", parameter_name="feature_weights"),
# TableProjection("word"),
# TableProjection("pos"),
],
)
Layer(
name = "crf",
type = "crf",
size = 23,
device = -1,
inputs = [
Input("output", parameter_name="crfw"),
"chunk"
]
)
Layer(
name = "crf_decoding",
type = "crf_decoding",
size = 23,
device = -1,
inputs = [
Input("output", parameter_name="crfw"),
"chunk"
]
)
Evaluator(
name = "error",
type = "sum",
inputs = "crf_decoding",
)
'''
# chuck evaluator cannot be used for GPU training
Evaluator(
name = "chunk_f1",
type = "chunk",
inputs = ["crf_decoding", "chunk"],
chunk_scheme = "IOB",
num_chunk_types = 11,
)
'''
Settings(
algorithm = 'sgd',
batch_size = 100,
average_window = 0.5,
max_average_window = 2500,
learning_rate = 1e-1,
learning_rate_decay_a = 5e-7,
learning_rate_decay_b = 0.75,
l1weight = 0,
l2weight = 1,
c1 = 0.0001,
backoff = 0.5,
owlqn_steps = 100,
max_backoff = 5,
)
因为 它太大了无法显示 source diff 。你可以改为 查看blob
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from cStringIO import StringIO
import paddle.proto.DataFormat_pb2 as DataFormat
from google.protobuf.internal.encoder import _EncodeVarint
import logging
import pprint
logging.basicConfig(
format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
logger = logging.getLogger('paddle')
logger.setLevel(logging.INFO)
OOV_POLICY_IGNORE = 0
OOV_POLICY_USE = 1
OOV_POLICY_ERROR = 2
num_original_columns = 3
# Feature combination patterns.
# [[-1,0], [0,0]] means previous token at column 0 and current token at
# column 0 are combined as one feature.
patterns = [
[[-2, 0]],
[[-1, 0]],
[[0, 0]],
[[1, 0]],
[[2, 0]],
[[-1, 0], [0, 0]],
[[0, 0], [1, 0]],
[[-2, 1]],
[[-1, 1]],
[[0, 1]],
[[1, 1]],
[[2, 1]],
[[-2, 1], [-1, 1]],
[[-1, 1], [0, 1]],
[[0, 1], [1, 1]],
[[1, 1], [2, 1]],
[[-2, 1], [-1, 1], [0, 1]],
[[-1, 1], [0, 1], [1, 1]],
[[0, 1], [1, 1], [2, 1]],
]
def make_features(sequence):
length = len(sequence)
num_features = len(sequence[0])
def get_features(pos):
if pos < 0:
return ['#B%s' % -pos] * num_features
if pos >= length:
return ['#E%s' % (pos - length + 1)] * num_features
return sequence[pos]
for i in xrange(length):
for pattern in patterns:
fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
sequence[i].append(fname)
'''
Source file format:
Each line is for one timestep. The features are separated by space.
An empty line indicates end of a sequence.
cutoff: a list of numbers. If count of a feature is smaller than this,
it will be ignored.
if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of
i-th column.
return a list of dict for each column
'''
def create_dictionaries(filename, cutoff, oov_policy):
def add_to_dict(sequence, dicts):
num_features = len(dicts)
for features in sequence:
l = len(features)
assert l == num_features, "Wrong number of features " + line
for i in xrange(l):
if features[i] in dicts[i]:
dicts[i][features[i]] += 1
else:
dicts[i][features[i]] = 1
num_features = len(cutoff)
dicts = []
for i in xrange(num_features):
dicts.append(dict())
f = open(filename, 'rb')
sequence = []
for line in f:
line = line.strip()
if not line:
make_features(sequence)
add_to_dict(sequence, dicts)
sequence = []
continue
features = line.split(' ')
sequence.append(features)
for i in xrange(num_features):
dct = dicts[i]
n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
todo = []
for k, v in dct.iteritems():
if v < cutoff[i]:
todo.append(k)
else:
dct[k] = n
n += 1
if oov_policy[i] == OOV_POLICY_USE:
# placeholder so that len(dct) will be the number of features
# including OOV
dct['#OOV#'] = 0
logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo)))
for k in todo:
del dct[k]
f.close()
return dicts
def encode_varint(v):
out = StringIO()
_EncodeVarint(out.write, v)
return out.getvalue()
def write_proto(file, message):
s = message.SerializeToString()
packed_len = encode_varint(len(s))
file.write(packed_len + s)
'''
if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
existed in dicts[i] will be assigned to id 0.
if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
in dicts[i].
'''
def gen_proto_file(input_file, dicts, oov_policy, output_file):
def write_sequence(out, sequence):
num_features = len(dicts)
is_beginning = True
for features in sequence:
assert len(features) == num_features, \
"Wrong number of features: " + line
sample = DataFormat.DataSample()
for i in xrange(num_original_columns):
id = dicts[i].get(features[i], -1)
if id != -1:
sample.id_slots.append(id)
elif oov_policy[i] == OOV_POLICY_IGNORE:
sample.id_slots.append(0xffffffff)
elif oov_policy[i] == OOV_POLICY_ERROR:
logger.fatal("Unknown token: %s" % features[i])
else:
sample.id_slots.append(0)
if patterns:
dim = 0
vec = sample.vector_slots.add()
for i in xrange(num_original_columns, num_features):
id = dicts[i].get(features[i], -1)
if id != -1:
vec.ids.append(dim + id)
elif oov_policy[i] == OOV_POLICY_IGNORE:
pass
elif oov_policy[i] == OOV_POLICY_ERROR:
logger.fatal("Unknown token: %s" % features[i])
else:
vec.ids.append(dim + 0)
dim += len(dicts[i])
sample.is_beginning = is_beginning
is_beginning = False
write_proto(out, sample)
num_features = len(dicts)
f = open(input_file, 'rb')
out = open(output_file, 'wb')
header = DataFormat.DataHeader()
if patterns:
slot_def = header.slot_defs.add()
slot_def.type = DataFormat.SlotDef.VECTOR_SPARSE_NON_VALUE
slot_def.dim = sum(
[len(dicts[i]) for i in xrange(num_original_columns, len(dicts))])
logger.info("feature_dim=%s" % slot_def.dim)
for i in xrange(num_original_columns):
slot_def = header.slot_defs.add()
slot_def.type = DataFormat.SlotDef.INDEX
slot_def.dim = len(dicts[i])
write_proto(out, header)
num_sequences = 0
sequence = []
for line in f:
line = line.strip()
if not line:
make_features(sequence)
write_sequence(out, sequence)
sequence = []
num_sequences += 1
continue
features = line.split(' ')
sequence.append(features)
f.close()
out.close()
logger.info("num_sequences=%s" % num_sequences)
dict2 = {
'B-ADJP': 0,
'I-ADJP': 1,
'B-ADVP': 2,
'I-ADVP': 3,
'B-CONJP': 4,
'I-CONJP': 5,
'B-INTJ': 6,
'I-INTJ': 7,
'B-LST': 8,
'I-LST': 9,
'B-NP': 10,
'I-NP': 11,
'B-PP': 12,
'I-PP': 13,
'B-PRT': 14,
'I-PRT': 15,
'B-SBAR': 16,
'I-SBAR': 17,
'B-UCP': 18,
'I-UCP': 19,
'B-VP': 20,
'I-VP': 21,
'O': 22
}
if __name__ == '__main__':
cutoff = [3, 1, 0]
cutoff += [3] * len(patterns)
oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR]
oov_policy += [OOV_POLICY_IGNORE] * len(patterns)
dicts = create_dictionaries('trainer/tests/train.txt', cutoff, oov_policy)
dicts[2] = dict2
gen_proto_file('trainer/tests/train.txt', dicts, oov_policy,
'trainer/tests/train_proto.bin')
gen_proto_file('trainer/tests/test.txt', dicts, oov_policy,
'trainer/tests/test_proto.bin')
Confidence NN B-NP
in IN B-PP
the DT B-NP
pound NN I-NP
is VBZ B-VP
widely RB I-VP
expected VBN I-VP
to TO I-VP
take VB I-VP
another DT B-NP
sharp JJ I-NP
dive NN I-NP
if IN B-SBAR
trade NN B-NP
figures NNS I-NP
for IN B-PP
September NNP B-NP
, , O
due JJ B-ADJP
for IN B-PP
release NN B-NP
tomorrow NN B-NP
, , O
fail VB B-VP
to TO I-VP
show VB I-VP
a DT B-NP
substantial JJ I-NP
improvement NN I-NP
from IN B-PP
July NNP B-NP
and CC I-NP
August NNP I-NP
's POS B-NP
near-record JJ I-NP
deficits NNS I-NP
. . O
Chancellor NNP O
of IN B-PP
the DT B-NP
Exchequer NNP I-NP
Nigel NNP B-NP
Lawson NNP I-NP
's POS B-NP
restated VBN I-NP
commitment NN I-NP
to TO B-PP
a DT B-NP
firm NN I-NP
monetary JJ I-NP
policy NN I-NP
has VBZ B-VP
helped VBN I-VP
to TO I-VP
prevent VB I-VP
a DT B-NP
freefall NN I-NP
in IN B-PP
sterling NN B-NP
over IN B-PP
the DT B-NP
past JJ I-NP
week NN I-NP
. . O
But CC O
analysts NNS B-NP
reckon VBP B-VP
underlying VBG B-NP
support NN I-NP
for IN B-PP
sterling NN B-NP
has VBZ B-VP
been VBN I-VP
eroded VBN I-VP
by IN B-PP
the DT B-NP
chancellor NN I-NP
's POS B-NP
failure NN I-NP
to TO B-VP
announce VB I-VP
any DT B-NP
new JJ I-NP
policy NN I-NP
measures NNS I-NP
in IN B-PP
his PRP$ B-NP
Mansion NNP I-NP
House NNP I-NP
speech NN I-NP
last JJ B-NP
Thursday NNP I-NP
. . O
This DT B-NP
has VBZ B-VP
increased VBN I-VP
the DT B-NP
risk NN I-NP
of IN B-PP
the DT B-NP
government NN I-NP
being VBG B-VP
forced VBN I-VP
to TO I-VP
increase VB I-VP
base NN B-NP
rates NNS I-NP
to TO B-PP
16 CD B-NP
% NN I-NP
from IN B-PP
their PRP$ B-NP
current JJ I-NP
15 CD I-NP
% NN I-NP
level NN I-NP
to TO B-VP
defend VB I-VP
the DT B-NP
pound NN I-NP
, , O
economists NNS B-NP
and CC O
foreign JJ B-NP
exchange NN I-NP
market NN I-NP
analysts NNS I-NP
say VBP B-VP
. . O
`` `` O
The DT B-NP
risks NNS I-NP
for IN B-PP
sterling NN B-NP
of IN B-PP
a DT B-NP
bad JJ I-NP
trade NN I-NP
figure NN I-NP
are VBP B-VP
very RB B-ADVP
heavily RB I-ADVP
on IN B-PP
the DT B-NP
down JJ I-NP
side NN I-NP
, , O
'' '' O
said VBD B-VP
Chris NNP B-NP
Dillow NNP I-NP
, , O
senior JJ B-NP
U.K. NNP I-NP
economist NN I-NP
at IN B-PP
Nomura NNP B-NP
Research NNP I-NP
Institute NNP I-NP
. . O
`` `` O
If IN B-SBAR
there EX B-NP
is VBZ B-VP
another DT B-NP
bad JJ I-NP
trade NN I-NP
number NN I-NP
, , O
there EX B-NP
could MD B-VP
be VB I-VP
an DT B-NP
awful JJ I-NP
lot NN I-NP
of IN B-PP
pressure NN B-NP
, , O
'' '' O
noted VBD B-VP
Simon NNP B-NP
Briscoe NNP I-NP
, , O
U.K. NNP B-NP
economist NN I-NP
for IN B-PP
Midland NNP B-NP
Montagu NNP I-NP
, , O
a DT B-NP
unit NN I-NP
of IN B-PP
Midland NNP B-NP
Bank NNP I-NP
PLC NNP I-NP
. . O
Forecasts NNS B-NP
for IN B-PP
the DT B-NP
trade NN I-NP
figures NNS I-NP
range VBP B-VP
widely RB B-ADVP
, , O
but CC O
few JJ B-NP
economists NNS I-NP
expect VBP B-VP
the DT B-NP
data NNS I-NP
to TO B-VP
show VB I-VP
a DT B-NP
very RB I-NP
marked VBN I-NP
improvement NN I-NP
from IN B-PP
the DT O
# # O
2 CD O
billion CD O
-LRB- ( O
$ $ B-ADJP
3.2 CD O
billion CD O
-RRB- ) O
deficit NN B-NP
in IN B-PP
the DT B-NP
current JJ I-NP
account NN I-NP
reported VBD B-VP
for IN B-PP
August NNP B-NP
. . O
The DT B-NP
August NNP I-NP
deficit NN I-NP
and CC O
the DT B-NP
# # I-NP
2.2 CD I-NP
billion CD I-NP
gap NN I-NP
registered VBN B-VP
in IN B-PP
July NNP B-NP
are VBP B-VP
topped VBN I-VP
only RB B-ADVP
by IN B-PP
the DT B-NP
# # I-NP
2.3 CD I-NP
billion CD I-NP
deficit NN I-NP
of IN B-PP
October NNP B-NP
1988 CD I-NP
. . O
Sanjay NNP B-NP
Joshi NNP I-NP
, , O
European JJ B-NP
economist NN I-NP
at IN B-PP
Baring NNP B-NP
Brothers NNPS I-NP
& CC I-NP
Co. NNP I-NP
, , O
said VBD B-VP
there EX B-NP
is VBZ B-VP
no DT B-NP
sign NN I-NP
that IN B-SBAR
Britain NNP B-NP
's POS B-NP
manufacturing NN I-NP
industry NN I-NP
is VBZ B-VP
transforming VBG I-VP
itself PRP B-NP
to TO B-VP
boost VB I-VP
exports NNS B-NP
. . O
At IN B-PP
the DT B-NP
same JJ I-NP
time NN I-NP
, , O
he PRP B-NP
remains VBZ B-VP
fairly RB B-ADJP
pessimistic JJ I-ADJP
about IN B-PP
the DT B-NP
outlook NN I-NP
for IN B-PP
imports NNS B-NP
, , O
given VBN B-PP
continued VBD B-NP
high JJ I-NP
consumer NN I-NP
and CC I-NP
capital NN I-NP
goods NNS I-NP
inflows NNS I-NP
. . O
He PRP B-NP
reckons VBZ B-VP
the DT B-NP
current JJ I-NP
account NN I-NP
deficit NN I-NP
will MD B-VP
narrow VB I-VP
to TO B-PP
only RB B-NP
# # I-NP
1.8 CD I-NP
billion CD I-NP
in IN B-PP
September NNP B-NP
. . O
However RB B-ADVP
, , O
Mr. NNP B-NP
Dillow NNP I-NP
said VBD B-VP
he PRP B-NP
believes VBZ B-VP
that IN B-SBAR
a DT B-NP
reduction NN I-NP
in IN B-PP
raw JJ B-NP
material NN I-NP
stockbuilding VBG I-NP
by IN B-PP
industry NN B-NP
could MD B-VP
lead VB I-VP
to TO B-PP
a DT B-NP
sharp JJ I-NP
drop NN I-NP
in IN B-PP
imports NNS B-NP
. . O
Combined VBN B-PP
with IN B-PP
at IN B-ADVP
least JJS I-ADVP
some DT B-NP
rebound NN I-NP
in IN B-PP
exports NNS B-NP
after IN B-PP
August NNP B-NP
's POS B-NP
unexpected JJ I-NP
decline NN I-NP
, , O
the DT B-NP
deficit NN I-NP
could MD B-VP
narrow VB I-VP
to TO B-PP
as RB B-NP
little JJ I-NP
as IN I-NP
# # I-NP
1.3 CD I-NP
billion CD I-NP
. . O
Mr. NNP B-NP
Briscoe NNP I-NP
, , O
who WP B-NP
also RB B-ADVP
forecasts VBZ B-VP
a DT B-NP
# # I-NP
1.3 CD I-NP
billion CD I-NP
current JJ I-NP
account NN I-NP
gap NN I-NP
, , O
warns VBZ B-VP
that IN B-SBAR
even RB B-SBAR
if IN I-SBAR
the DT B-NP
trade NN I-NP
figures NNS I-NP
are VBP B-VP
bullish JJ B-ADJP
for IN B-PP
sterling NN B-NP
, , O
the DT B-NP
currency NN I-NP
wo MD B-VP
n't RB I-VP
advance VB I-VP
much JJ B-NP
because IN B-SBAR
investors NNS B-NP
will MD B-VP
want VB I-VP
to TO I-VP
see VB I-VP
further JJ B-NP
evidence NN I-NP
of IN B-PP
the DT B-NP
turnaround NN I-NP
before IN B-PP
adjusting VBG B-VP
positions NNS B-NP
. . O
Nevertheless RB B-ADVP
, , O
he PRP B-NP
noted VBD B-VP
, , O
`` `` O
No DT B-NP
one PRP I-NP
will MD B-VP
want VB I-VP
to TO I-VP
go VB I-VP
into IN B-PP
the DT B-NP
trade NN I-NP
figures NNS I-NP
without IN B-PP
a DT B-NP
flat JJ I-NP
position NN I-NP
'' '' O
in IN B-PP
the DT B-NP
pound NN I-NP
. . O
Meanwhile RB B-ADVP
, , O
overall JJ B-NP
evidence NN I-NP
on IN B-PP
the DT B-NP
economy NN I-NP
remains VBZ B-VP
fairly RB B-ADJP
clouded VBN I-ADJP
. . O
In IN B-PP
his PRP$ B-NP
Mansion NNP I-NP
House NNP I-NP
speech NN I-NP
, , O
Mr. NNP B-NP
Lawson NNP I-NP
warned VBD B-VP
that IN B-SBAR
a DT B-NP
further JJ I-NP
slowdown NN I-NP
can MD B-VP
be VB I-VP
expected VBN I-VP
as IN B-SBAR
the DT B-NP
impact NN I-NP
of IN B-PP
the DT B-NP
last JJ I-NP
rise NN I-NP
in IN B-PP
interest NN B-NP
rates NNS I-NP
earlier RBR B-NP
this DT I-NP
month NN I-NP
takes VBZ B-VP
effect NN B-NP
. . O
U.K. JJ B-NP
base NN I-NP
rates NNS I-NP
are VBP B-VP
at IN B-PP
their PRP$ B-NP
highest JJS I-NP
level NN I-NP
in IN B-PP
eight CD B-NP
years NNS I-NP
. . O
But CC O
consumer NN B-NP
expenditure NN I-NP
data NNS I-NP
released VBD B-VP
Friday NNP B-NP
do VBP B-VP
n't RB I-VP
suggest VB I-VP
that IN B-SBAR
the DT B-NP
U.K. NNP I-NP
economy NN I-NP
is VBZ B-VP
slowing VBG I-VP
that DT B-ADVP
quickly RB I-ADVP
. . O
The DT B-NP
figures NNS I-NP
show VBP B-VP
that DT O
spending NN B-NP
rose VBD B-VP
0.1 CD B-NP
% NN I-NP
in IN B-PP
the DT B-NP
third JJ I-NP
quarter NN I-NP
from IN B-PP
the DT B-NP
second JJ I-NP
quarter NN I-NP
and CC O
was VBD B-VP
up IN B-ADVP
3.8 CD B-NP
% NN I-NP
from IN B-PP
a DT B-NP
year NN I-NP
ago RB B-ADVP
. . O
This DT B-NP
compares VBZ B-VP
with IN B-PP
a DT B-NP
1.6 CD I-NP
% NN I-NP
rise NN I-NP
in IN B-PP
the DT B-NP
second NN I-NP
from IN B-PP
the DT B-NP
first JJ I-NP
quarter NN I-NP
and CC O
a DT B-NP
5.4 CD I-NP
% NN I-NP
increase NN I-NP
from IN B-PP
the DT B-NP
second JJ I-NP
quarter NN I-NP
of IN B-PP
1988 CD B-NP
. . O
Mr. NNP B-NP
Dillow NNP I-NP
said VBD B-VP
the DT B-NP
data NNS I-NP
show VBP B-VP
the DT B-NP
economy NN I-NP
`` `` O
is VBZ B-VP
still RB B-ADVP
quite RB B-ADJP
strong JJ I-ADJP
, , O
'' '' O
but CC O
suggestions NNS B-NP
that IN B-SBAR
much NN B-NP
of IN B-PP
the DT B-NP
spending NN I-NP
went VBD B-VP
on IN B-PP
services NNS B-NP
rather RB B-PP
than IN I-PP
consumer NN B-NP
goods NNS I-NP
should MD B-VP
reduce VB I-VP
fears NNS B-NP
of IN B-PP
more JJR B-NP
import NN I-NP
rises NNS I-NP
. . O
Certainly RB B-ADVP
, , O
the DT B-NP
chancellor NN I-NP
has VBZ B-VP
made VBN I-VP
it PRP B-NP
clear JJ B-ADJP
that IN B-SBAR
he PRP B-NP
is VBZ B-VP
prepared VBN I-VP
to TO I-VP
increase VB I-VP
interest NN B-NP
rates NNS I-NP
again RB B-ADVP
if IN B-SBAR
necessary JJ B-ADJP
to TO B-VP
both DT I-VP
ensure VB I-VP
that IN B-SBAR
a DT B-NP
substantial JJ I-NP
slowdown NN I-NP
does VBZ B-VP
take VB I-VP
place NN B-NP
and CC O
that DT O
sterling NN B-NP
does VBZ B-VP
n't RB I-VP
decline VB I-VP
further JJ B-ADVP
. . O
Thursday NNP B-NP
, , O
he PRP B-NP
reminded VBD B-VP
his PRP$ B-NP
audience NN I-NP
that IN B-SBAR
the DT B-NP
government NN I-NP
`` `` O
can MD B-VP
not RB I-VP
allow VB I-VP
the DT B-NP
necessary JJ I-NP
rigor NN I-NP
of IN B-PP
monetary JJ B-NP
policy NN I-NP
to TO B-VP
be VB I-VP
undermined VBN I-VP
by IN B-PP
exchange NN B-NP
rate NN I-NP
weakness NN I-NP
. . O
'' '' O
Analysts NNS B-NP
agree VBP B-VP
there EX B-NP
is VBZ B-VP
little JJ B-NP
holding NN B-VP
sterling NN B-NP
firm NN B-ADJP
at IN B-PP
the DT B-NP
moment NN I-NP
other JJ B-ADJP
than IN B-PP
Mr. NNP B-NP
Lawson NNP I-NP
's POS B-NP
promise NN I-NP
that IN B-SBAR
rates NNS B-NP
will MD B-VP
be VB I-VP
pushed VBN I-VP
higher JJR B-ADJP
if IN B-SBAR
necessary JJ B-ADJP
. . O
And CC O
, , O
they PRP B-NP
warn VBP B-VP
, , O
any DT B-NP
further JJ I-NP
drop NN I-NP
in IN B-PP
the DT B-NP
government NN I-NP
's POS B-NP
popularity NN I-NP
could MD B-VP
swiftly RB I-VP
make VB I-VP
this DT B-NP
promise NN I-NP
sound NN B-VP
hollow JJ B-ADJP
. . O
Sterling NNP B-NP
was VBD B-VP
already RB I-VP
showing VBG I-VP
some DT B-NP
signs NNS I-NP
of IN B-PP
a DT B-NP
lack NN I-NP
of IN B-PP
confidence NN B-NP
in IN B-PP
Mr. NNP B-NP
Lawson NNP I-NP
's POS B-NP
promise NN I-NP
Friday NNP B-NP
. . O
In IN B-PP
European JJ B-NP
trading NN I-NP
it PRP B-NP
declined VBD B-VP
to TO B-PP
$ $ B-NP
1.5890 CD I-NP
and CC O
2.9495 CD B-NP
marks NNS I-NP
from IN B-PP
$ $ B-NP
1.5940 CD I-NP
and CC O
2.9429 CD B-NP
marks NNS I-NP
late JJ B-NP
Thursday NNP I-NP
. . O
Economists NNS B-NP
suggested VBD B-VP
that IN B-SBAR
if IN B-SBAR
the DT B-NP
pound NN I-NP
falls VBZ B-VP
much JJ B-NP
below IN B-PP
2.90 CD B-NP
marks NNS I-NP
, , O
the DT B-NP
government NN I-NP
will MD B-VP
be VB I-VP
forced VBN I-VP
to TO I-VP
increase VB I-VP
rates NNS B-NP
to TO B-PP
16 CD B-NP
% NN I-NP
, , O
both DT B-VP
to TO I-VP
halt VB B-VP
any DT B-NP
further JJ I-NP
decline NN I-NP
and CC O
ensure VB B-VP
that IN B-SBAR
the DT B-NP
balance NN I-NP
of IN B-PP
monetary JJ B-NP
policy NN I-NP
remains VBZ B-VP
unchanged JJ B-ADJP
. . O
Friday NNP B-NP
's POS B-NP
Market NNP I-NP
Activity NN I-NP
The DT B-NP
dollar NN I-NP
posted VBD B-VP
gains NNS B-NP
in IN B-PP
quiet JJ B-NP
trading NN I-NP
as IN B-SBAR
concerns NNS B-NP
about IN B-PP
equities NNS B-NP
abated VBN B-VP
. . O
Foreign JJ B-NP
exchange NN I-NP
dealers NNS I-NP
said VBD B-VP
that IN B-SBAR
the DT B-NP
currency NN I-NP
market NN I-NP
has VBZ B-VP
begun VBN I-VP
to TO I-VP
distance VB I-VP
itself PRP B-NP
from IN B-PP
the DT B-NP
volatile JJ I-NP
stock NN I-NP
exchange NN I-NP
, , O
which WDT B-NP
has VBZ B-VP
preoccupied VBN I-VP
the DT B-NP
market NN I-NP
since IN B-PP
Oct. NNP B-NP
13 CD I-NP
, , O
when WRB B-ADVP
the DT B-NP
Dow NNP I-NP
Jones NNP I-NP
Industrial NNP I-NP
Average NNP I-NP
plunged VBD B-VP
more JJR B-NP
than IN I-NP
190 CD I-NP
points NNS I-NP
. . O
Currency NN B-NP
analysts NNS I-NP
predict VBP B-VP
that IN B-SBAR
in IN B-PP
the DT B-NP
coming VBG I-NP
week NN I-NP
the DT B-NP
foreign JJ I-NP
exchange NN I-NP
market NN I-NP
will MD B-VP
shift VB I-VP
its PRP$ B-NP
focus NN I-NP
back RB B-ADVP
to TO B-PP
economic JJ B-NP
fundamentals NNS I-NP
, , O
keeping VBG B-VP
a DT B-NP
close NN I-NP
eye NN I-NP
out IN B-ADVP
for IN B-PP
any DT B-NP
signs NNS I-NP
of IN B-PP
monetary JJ B-NP
easing NN I-NP
by IN B-PP
U.S. NNP B-NP
Federal NNP I-NP
Reserve NNP I-NP
. . O
Late RB B-ADVP
in IN B-PP
the DT B-NP
New NNP I-NP
York NNP I-NP
trading NN I-NP
day NN I-NP
, , O
the DT B-NP
dollar NN I-NP
was VBD B-VP
quoted VBN I-VP
at IN B-PP
1.8578 CD B-NP
marks NNS I-NP
, , O
up IN B-ADVP
from IN B-PP
1.8470 CD B-NP
marks NNS I-NP
late JJ B-NP
Thursday NNP I-NP
in IN B-PP
New NNP B-NP
York NNP I-NP
. . O
The DT B-NP
U.S. NNP I-NP
currency NN I-NP
was VBD B-VP
also RB I-VP
changing VBG I-VP
hands NNS B-NP
at IN B-PP
142.43 CD B-NP
yen NN I-NP
, , O
up IN B-ADVP
from IN B-PP
141.70 CD B-NP
yen NN I-NP
in IN B-PP
New NNP B-NP
York NNP I-NP
late JJ B-NP
Thursday NNP I-NP
. . O
In IN B-PP
Tokyo NNP B-NP
on IN B-PP
Monday NNP B-NP
, , O
the DT B-NP
U.S. NNP I-NP
currency NN I-NP
opened VBD B-VP
for IN B-PP
trading NN B-NP
at IN B-PP
141.95 CD B-NP
yen NN I-NP
, , O
up IN B-ADVP
from IN B-PP
Friday NNP B-NP
's POS B-NP
Tokyo NNP I-NP
...@@ -24,7 +24,6 @@ using namespace std; // NOLINT ...@@ -24,7 +24,6 @@ using namespace std; // NOLINT
static const string& configFile1 = "trainer/tests/sample_trainer_config.conf"; static const string& configFile1 = "trainer/tests/sample_trainer_config.conf";
static const string& configFile2 = static const string& configFile2 =
"trainer/tests/sample_trainer_config_hsigmoid.conf"; "trainer/tests/sample_trainer_config_hsigmoid.conf";
static const string& configFile3 = "trainer/tests/chunking.conf";
static const string& configFile4 = static const string& configFile4 =
"trainer/tests/sample_trainer_config_parallel.conf"; "trainer/tests/sample_trainer_config_parallel.conf";
...@@ -95,13 +94,6 @@ TEST(checkGradient, multi) { ...@@ -95,13 +94,6 @@ TEST(checkGradient, multi) {
TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); } TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST(checkGradient, chunk) {
checkGradientTest(configFile3, false, false);
#ifdef PADDLE_WITH_CUDA
checkGradientTest(configFile3, true, true);
#endif
}
TEST(checkGradient, non_parallel) { TEST(checkGradient, non_parallel) {
checkGradientTest(configFile4, false, false); checkGradientTest(configFile4, false, false);
} }
......
...@@ -15,12 +15,7 @@ ...@@ -15,12 +15,7 @@
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
TrainData(ProtoData( TrainData(SimpleData(
files = "dummy_list",
constant_slots = [1.0],
async_load_data = True))
TestData(SimpleData(
files = "trainer/tests/sample_filelist.txt", files = "trainer/tests/sample_filelist.txt",
feat_dim = 3, feat_dim = 3,
context_len = 0, context_len = 0,
......
此差异已折叠。
...@@ -1116,35 +1116,6 @@ def PyData(files=None, ...@@ -1116,35 +1116,6 @@ def PyData(files=None,
return data_config return data_config
@config_func
def ProtoData(files=None,
type=None,
file_group_queue_capacity=None,
load_file_count=None,
constant_slots=None,
load_thread_num=None,
**xargs):
data_config = create_data_config_proto(**xargs)
if type is None:
data_config.type = 'proto'
else:
data_config.type = type
data_config.files = files
# When type="proto_group", one data provider contains at most
# load_file_count files, and there are at most
# (queue_capacity + load_thread_num + 1) data providers in memory
if file_group_queue_capacity is not None:
data_config.file_group_conf.queue_capacity = file_group_queue_capacity
if load_file_count is not None:
data_config.file_group_conf.load_file_count = load_file_count
if load_thread_num is not None:
data_config.file_group_conf.load_thread_num = load_thread_num
if constant_slots:
data_config.constant_slots.extend(constant_slots)
return data_config
#real data for training is actually provided by "sub_data" data providers. #real data for training is actually provided by "sub_data" data providers.
@config_func @config_func
def MultiData(sub_data=[]): def MultiData(sub_data=[]):
...@@ -2714,7 +2685,7 @@ Usage: ...@@ -2714,7 +2685,7 @@ Usage:
max_sort_size = -1, inputs = ["output", "score"]) max_sort_size = -1, inputs = ["output", "score"])
Input data: Samples of the same query should be loaded as a sequence, Input data: Samples of the same query should be loaded as a sequence,
by ProtoDataProvider or PyDataProvider etc.. User should provide by PyDataProvider etc.. User should provide
scores for each sample. The score slot should be the 2nd scores for each sample. The score slot should be the 2nd
input of lambdaRank layer. input of lambdaRank layer.
......
...@@ -17,7 +17,8 @@ __all__ = [ ...@@ -17,7 +17,8 @@ __all__ = [
"IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation', "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation", 'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
"STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation", "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation",
"LogActivation", "SqrtActivation", "ReciprocalActivation" "LogActivation", "SqrtActivation", "ReciprocalActivation",
"SoftSignActivation"
] ]
...@@ -243,8 +244,20 @@ class ReciprocalActivation(BaseActivation): ...@@ -243,8 +244,20 @@ class ReciprocalActivation(BaseActivation):
Reciprocal Activation. Reciprocal Activation.
.. math:: .. math::
f(z) = 1/z f(z)=\\frac{1}{z}
""" """
def __init__(self): def __init__(self):
BaseActivation.__init__(self, 'reciprocal', False) BaseActivation.__init__(self, 'reciprocal', False)
class SoftSignActivation(BaseActivation):
"""
SoftSign Activation.
.. math::
f(z)=\\frac{z}{1 + |z|}
"""
def __init__(self):
BaseActivation.__init__(self, 'softsign', False)
...@@ -2507,12 +2507,12 @@ def img_conv_layer(input, ...@@ -2507,12 +2507,12 @@ def img_conv_layer(input,
input is raw pixels of image(mono or RGB), or it may be the previous layer's input is raw pixels of image(mono or RGB), or it may be the previous layer's
num_filters * num_group. num_filters * num_group.
There are several group of filter in PaddlePaddle implementation. There are several groups of filters in PaddlePaddle implementation.
Each group will process some channel of the inputs. For example, if an input Each group will process some channels of the input. For example, if
num_channel = 256, group = 4, num_filter=32, the PaddlePaddle will create num_channel = 256, group = 4, num_filter=32, the PaddlePaddle will create
32*4 = 128 filters to process inputs. The channels will be split into 4 32*4 = 128 filters to process the input. The channels will be split into 4
pieces. First 256/4 = 64 channels will process by first 32 filters. The pieces. First 256/4 = 64 channels will be processed by first 32 filters. The
rest channels will be processed by rest group of filters. rest channels will be processed by the rest groups of filters.
The example usage is: The example usage is:
...@@ -2528,53 +2528,68 @@ def img_conv_layer(input, ...@@ -2528,53 +2528,68 @@ def img_conv_layer(input,
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. Or input a tuple for :param filter_size: The dimensions of the filter kernel. If the parameter is
two image dimension. set to one integer, the two dimensions on x and y axises
will be same when filter_size_y is not set. If it is set
to a list, the first element indicates the dimension on
the x axis, and the second is used to specify the dimension
on the y axis when filter_size_y is not provided.
:type filter_size: int | tuple | list :type filter_size: int | tuple | list
:param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
currently supports rectangular filters, the filter's is not set, it will be set automatically according to filter_size.
shape will be (filter_size, filter_size_y). :type filter_size_y: int
:type filter_size_y: int | None
:param num_filters: Each filter group's number of filter :param num_filters: Each filter group's number of filter
:param act: Activation type. ReluActivation is the default activation. :param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param groups: Group size of filters. :param groups: The group number. 1 is the default group number.
:type groups: int :type groups: int
:param stride: The x dimension of the stride. Or input a tuple for two image :param stride: The strides. If the parameter is set to one integer, the strides
dimension. on x and y axises will be same when stride_y is not set. If it is
set to a list, the first element indicates the stride on the x axis,
and the second is used to specify the stride on the y axis when
stride_y is not provided. 1 is the default value.
:type stride: int | tuple | list :type stride: int | tuple | list
:param stride_y: The y dimension of the stride. :param stride_y: The stride on the y axis.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of the padding. Or input a tuple for two :param padding: The padding sizes. If the parameter is set to one integer, the padding
image dimension sizes on x and y axises will be same when padding_y is not set. If it
is set to a list, the first element indicates the padding size on the
x axis, and the second is used to specify the padding size on the y axis
when padding_y is not provided. 0 is the default padding size.
:type padding: int | tuple | list :type padding: int | tuple | list
:param padding_y: The y dimension of the padding. :param padding_y: The padding size on the y axis.
:type padding_y: int :type padding_y: int
:param dilation: The x dimension of the dilation. Or input a tuple for two :param dilation: The dimensions of the dilation. If the parameter is set to one integer,
image dimension the two dimensions on x and y axises will be same when dilation_y is not
set. If it is set to a list, the first element indicates the dimension
on the x axis, and the second is used to specify the dimension on the y
axis when dilation_y is not provided. 1 is the default dimension.
:type dilation: int | tuple | list :type dilation: int | tuple | list
:param dilation_y: The y dimension of the dilation. :param dilation_y: The dimension of the dilation on the y axis.
:type dilation_y: int :type dilation_y: int
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero. parameter is set to True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param num_channels: number of input channels. If None will be set :param num_channels: The number of input channels. If the parameter is not set or
automatically from previous output. set to None, its actual value will be automatically set to
the channel number of the input.
:type num_channels: int :type num_channels: int
:param param_attr: Convolution param attribute. None means default attribute :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param shared_biases: Is biases will be shared between filters or not. :param shared_biases: Whether biases will be shared between filters or not.
:type shared_biases: bool :type shared_biases: bool
:param layer_attr: Layer Extra Attribute. :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param trans: true if it is a convTransLayer, false if it is a convLayer :param trans: True if it is a convTransLayer, False if it is a convLayer
:type trans: bool :type trans: bool
:param layer_type: specify the layer_type, default is None. If trans=True, :param layer_type: Specify the layer type. If the dilation's dimension on one axis is
layer_type has to be "exconvt" or "cudnn_convt", larger than 1, layer_type has to be "cudnn_conv" or "cudnn_convt".
otherwise layer_type has to be either "exconv" or If trans=True, layer_type has to be "exconvt" or "cudnn_convt",
"cudnn_conv" otherwise layer_type has to be either "exconv" or "cudnn_conv".
:type layer_type: String :type layer_type: basestring
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -2679,7 +2694,7 @@ def img_pool_layer(input, ...@@ -2679,7 +2694,7 @@ def img_pool_layer(input,
""" """
Image pooling Layer. Image pooling Layer.
The details of pooling layer, please refer ufldl's pooling_ . The details of pooling layer, please refer to ufldl's pooling_ .
.. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
...@@ -2711,32 +2726,37 @@ def img_pool_layer(input, ...@@ -2711,32 +2726,37 @@ def img_pool_layer(input,
padding_y=2, padding_y=2,
pool_type=MaxPooling()) pool_type=MaxPooling())
:param padding: pooling padding width. :param padding: The padding size on the x axis. 0 is the default padding size.
:type padding: int :type padding: int
:param padding_y: pooling padding height. It's equal to padding by default. :param padding_y: The padding size on the y axis. If the parameter is not set
:type padding_y: int | None or set to None, it will be set to 'padding' automatically.
:param name: name of pooling layer :param name: The name of this layer. It is optional.
:type name: basestring. :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pool_size: pooling window width :param pool_size: The pooling window length on the x axis.
:type pool_size: int :type pool_size: int
:param pool_size_y: pooling window height. It's eaqual to pool_size by default. :param pool_size_y: The pooling window length on the y axis. If the parameter is
:type pool_size_y: int | None not set or set to None, its actual value will be automatically
:param num_channels: number of input channel. set to pool_size.
:type pool_size_y: int
:param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int :type num_channels: int
:param pool_type: pooling type. MaxPooling or AvgPooling. Default is :param pool_type: Pooling type. MaxPooling is the default pooling.
MaxPooling.
:type pool_type: BasePoolingType :type pool_type: BasePoolingType
:param stride: stride width of pooling. :param stride: The stride on the x axis. 1 is the default value.
:type stride: int :type stride: int
:param stride_y: stride height of pooling. It is equal to stride by default. :param stride_y: The stride on the y axis. If the parameter is not set or set to
:type stride_y: int | None None, its actual value will be automatically set to 'stride'.
:param layer_attr: Extra Layer attribute. :type stride_y: int
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param ceil_mode: Wether to use ceil mode to calculate output height and with. :param ceil_mode: Wether to use the ceil function to calculate output height and width.
Defalut is True. If set false, Otherwise use floor. True is the default. If it is set to False, the floor function will
be used.
:type ceil_mode: bool :type ceil_mode: bool
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -2842,24 +2862,32 @@ def img_pool3d_layer(input, ...@@ -2842,24 +2862,32 @@ def img_pool3d_layer(input,
:param padding: pooling padding width. :param padding: pooling padding width.
:type padding: int | tuple | list :type padding: int | tuple | list
:param name: name of pooling layer :param name: The name of this layer. It is optional.
:type name: basestring. :type name: basestring.
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pool_size: pooling window width :param pool_size: The pooling window lengths along three axises. If the parameter
is set to one integer, the three lengths will be same.
:type pool_size: int | tuple | list :type pool_size: int | tuple | list
:param num_channels: number of input channel. :param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int :type num_channels: int
:param pool_type: pooling type. MaxPooling or AvgPooling. Default is :param pool_type: Pooling type. MaxPooling is the default pooling.
MaxPooling.
:type pool_type: BasePoolingType :type pool_type: BasePoolingType
:param stride: stride width of pooling. :param stride: The strides of the pooling along three axises. If the parameter
is set to one integer, the three strides will be same. 1 is the
default value.
:type stride: int | tuple | list :type stride: int | tuple | list
:param layer_attr: Extra Layer attribute. :param padding: The sizes of padding along three axises. If the parameter is set to
one integer, they will be same. 0 is the default padding size.
:type padding: int | tuple | list
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param ceil_mode: Wether to use ceil mode to calculate output height and with. :param ceil_mode: Wether to use the ceil function to calculate output height and width.
Defalut is True. If set false, Otherwise use floor. True is the default. If it is set to False, the floor function will
be used.
:type ceil_mode: bool :type ceil_mode: bool
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -2938,9 +2966,11 @@ def spp_layer(input, ...@@ -2938,9 +2966,11 @@ def spp_layer(input,
pyramid_height=None, pyramid_height=None,
layer_attr=None): layer_attr=None):
""" """
Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. A layer performs spatial pyramid pooling.
The details please refer to
`Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_. Reference:
Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition
https://arxiv.org/abs/1406.4729
The example usage is: The example usage is:
...@@ -2955,13 +2985,16 @@ def spp_layer(input, ...@@ -2955,13 +2985,16 @@ def spp_layer(input,
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: number of input channel. :param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int :type num_channels: int
:param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling. :param pool_type: Pooling type. MaxPooling is the default pooling.
:type scale: BasePoolingType :type scale: BasePoolingType
:param pyramid_height: pyramid height. :param pyramid_height: The pyramid height of this pooling.
:type pyramid_height: int :type pyramid_height: int
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4694,7 +4727,7 @@ def conv_projection(input, ...@@ -4694,7 +4727,7 @@ def conv_projection(input,
will be same when filter_size_y is not set. If it is set will be same when filter_size_y is not set. If it is set
to a list, the first element indicates the dimension on to a list, the first element indicates the dimension on
the x axis, and the second is used to specify the dimension the x axis, and the second is used to specify the dimension
on the y axis when filter_size is not provided. on the y axis when filter_size_y is not provided.
:type filter_size: int | tuple | list :type filter_size: int | tuple | list
:param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
is not set, it will be set automatically according to filter_size. is not set, it will be set automatically according to filter_size.
...@@ -7076,7 +7109,7 @@ def img_conv3d_layer(input, ...@@ -7076,7 +7109,7 @@ def img_conv3d_layer(input,
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param trans: True if it is a convTransLayer, False if it is a convLayer :param trans: True if it is a convTransLayer, False if it is a convLayer
:type trans: bool :type trans: bool
:param layer_type: Specify the layer_type. If the parameter is set, it must be "deconv3d" :param layer_type: Specify the layer type. If the parameter is set, it must be "deconv3d"
when trans=True. If not set, it will be automatically set to "deconv3d" when trans=True. If not set, it will be automatically set to "deconv3d"
when trans=True and "conv3d" when trans=False. when trans=True and "conv3d" when trans=False.
:type layer_type: basestring :type layer_type: basestring
......
...@@ -11,7 +11,6 @@ test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_l ...@@ -11,7 +11,6 @@ test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_l
test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer
test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer test_seq_slice_layer test_cross_entropy_over_beam test_roi_pool_layer test_pooling3D_layer
test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer
test_conv3d_layer test_deconv3d_layer test_BatchNorm3D test_resize_layer
test_scale_sub_region_layer test_dot_prod_layer test_l2_distance_layer) test_scale_sub_region_layer test_dot_prod_layer test_l2_distance_layer)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
...@@ -15,6 +15,37 @@ def unique_name(prefix): ...@@ -15,6 +15,37 @@ def unique_name(prefix):
return "_".join([prefix, str(uid)]) return "_".join([prefix, str(uid)])
def convert_np_dtype_to_dtype_(np_dtype):
dtype = np.dtype(np_dtype)
if dtype == np.float32:
return core.DataType.FP32
elif dtype == np.float64:
return core.DataType.FP64
elif dtype == np.float16:
return core.DataType.FP16
elif dtype == np.int32:
return core.DataType.INT32
elif dtype == np.int16:
return core.DataType.INT16
elif dtype == np.int64:
return core.DataType.INT64
elif dtype == np.bool:
return core.DataType.BOOL
else:
raise ValueError("Not supported numpy dtype " + str(dtype))
def dtype_is_floating(dtype):
if not isinstance(dtype, core.DataType):
dtype = convert_np_dtype_to_dtype_(dtype)
if (dtype == core.DataType.FP16 or dtype == core.DataType.FP32 or
dtype == core.DataType.FP64):
return True
else:
return False
def _debug_string_(proto, throw_on_error=True): def _debug_string_(proto, throw_on_error=True):
error_fields = list() error_fields = list()
if not proto.IsInitialized(error_fields) and throw_on_error: if not proto.IsInitialized(error_fields) and throw_on_error:
...@@ -66,7 +97,7 @@ class Variable(object): ...@@ -66,7 +97,7 @@ class Variable(object):
"matched.".format(self.name, old_shape, shape)) "matched.".format(self.name, old_shape, shape))
if dtype is not None: if dtype is not None:
if not isinstance(dtype, core.DataType): if not isinstance(dtype, core.DataType):
dtype = Variable._convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
if is_new_var: if is_new_var:
self.desc.set_data_type(dtype) self.desc.set_data_type(dtype)
else: else:
...@@ -148,26 +179,6 @@ class Variable(object): ...@@ -148,26 +179,6 @@ class Variable(object):
uid = core.unique_integer(prefix) # unique during whole process. uid = core.unique_integer(prefix) # unique during whole process.
return "_".join([prefix, str(uid)]) return "_".join([prefix, str(uid)])
@staticmethod
def _convert_np_dtype_to_dtype_(np_dtype):
dtype = np.dtype(np_dtype)
if dtype == np.float32:
return core.DataType.FP32
elif dtype == np.float64:
return core.DataType.FP64
elif dtype == np.float16:
return core.DataType.FP16
elif dtype == np.int32:
return core.DataType.INT32
elif dtype == np.int16:
return core.DataType.INT16
elif dtype == np.int64:
return core.DataType.INT64
elif dtype == np.bool:
return core.DataType.BOOL
else:
raise ValueError("Not supported numpy dtype " + str(dtype))
def get_all_op_protos(): def get_all_op_protos():
""" """
......
此差异已折叠。
...@@ -4,6 +4,7 @@ import paddle.v2.fluid.core as core ...@@ -4,6 +4,7 @@ import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.evaluator as evaluator
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.initializer import XavierInitializer
from paddle.v2.fluid.optimizer import AdamOptimizer from paddle.v2.fluid.optimizer import AdamOptimizer
...@@ -103,12 +104,13 @@ net = vgg16_bn_drop(images) ...@@ -103,12 +104,13 @@ net = vgg16_bn_drop(images)
predict = layers.fc(input=net, size=classdim, act='softmax') predict = layers.fc(input=net, size=classdim, act='softmax')
cost = layers.cross_entropy(input=predict, label=label) cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(x=cost)
accuracy = layers.accuracy(input=predict, label=label)
# optimizer = SGDOptimizer(learning_rate=0.001) # optimizer = SGDOptimizer(learning_rate=0.001)
optimizer = AdamOptimizer(learning_rate=0.001) optimizer = AdamOptimizer(learning_rate=0.001)
opts = optimizer.minimize(avg_cost) opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(input=predict, label=label)
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 1 PASS_NUM = 1
...@@ -124,6 +126,7 @@ exe.run(framework.default_startup_program()) ...@@ -124,6 +126,7 @@ exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
batch_id = 0 batch_id = 0
accuracy.reset(exe)
for data in train_reader(): for data in train_reader():
img_data = np.array(map(lambda x: x[0].reshape(data_shape), img_data = np.array(map(lambda x: x[0].reshape(data_shape),
data)).astype("float32") data)).astype("float32")
...@@ -141,12 +144,14 @@ for pass_id in range(PASS_NUM): ...@@ -141,12 +144,14 @@ for pass_id in range(PASS_NUM):
outs = exe.run(framework.default_main_program(), outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, acc_out])
loss = np.array(outs[0]) loss = np.array(outs[0])
acc = np.array(outs[1]) acc = np.array(outs[1])
pass_acc = accuracy.eval(exe)
print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) +
" loss:" + str(loss) + " acc:" + str(acc)) " loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(
pass_acc))
batch_id = batch_id + 1 batch_id = batch_id + 1
if batch_id > 1: if batch_id > 1:
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册