diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py index 9a86aafcb2fa4d4354d1dd9443c1b73ddcda980b..49c0ff600c40e0222093ff0a8a2f7e8e38ccba29 100755 --- a/demo/image_classification/prediction.py +++ b/demo/image_classification/prediction.py @@ -126,7 +126,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - return output[output_layer].mean(0) + return output[output_layer]['value'].mean(0) def predict(self, image=None, output_layer=None): assert isinstance(image, basestring) diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py index 4631816c43ef48839df1863a0a86c3ab00924d3f..6074cc1d3a85e13e3e8d336d81e22104f9d8e7cf 100755 --- a/demo/model_zoo/resnet/classify.py +++ b/demo/model_zoo/resnet/classify.py @@ -156,7 +156,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - res[name] = output[name].mean(0) + res[name] = output[name]['value'].mean(0) return res diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp index 41beed38a87601cb57072c8966cd0fd2ea156524..a3f4bfffc9f074900ebcc52876c04bbfc0e570b2 100644 --- a/paddle/api/Arguments.cpp +++ b/paddle/api/Arguments.cpp @@ -38,6 +38,13 @@ Arguments* Arguments::createByPaddleArgumentVector(void* ptr) { return args; } +Arguments* Arguments::createByPaddleArgument(const void* ptr) { + auto p = (paddle::Argument*)(ptr); + auto args = new Arguments(); + args->m->outputs.push_back(*p); + return args; +} + Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) { auto& a = m->getArg(idx); return Matrix::createByPaddleMatrixPtr(&a.value); diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index a44763bfa53ed5689d665cbd78f09417ec7de6da..a64e70a6bd563a20f1da3549aeaa76454dfe3219 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -144,12 +144,11 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { void GradientMachine::randParameters() { m->machine->randParameters(); } -Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const +Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { auto nn = m->machine; if (nn) { - auto mat = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&mat); + return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); } else { throw UnsupportError(); } diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index f5af8b0035b44d97832dd90ca2eeba079503715c..10569a71708463a37c77b519f8022b732dcf8ddb 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -454,6 +454,7 @@ public: private: static Arguments* createByPaddleArgumentVector(void* ptr); + static Arguments* createByPaddleArgument(const void* ptr); void* getInternalArgumentsPtr() const; private: @@ -769,7 +770,7 @@ public: void randParameters(); - Matrix* getLayerOutput(const std::string& layerName) const + Arguments* getLayerOutput(const std::string& layerName) const throw(UnsupportError); /** @@ -952,7 +953,7 @@ public: Arguments* getForwardOutput(); - Matrix* getLayerOutput(const std::string& layerName); + Arguments* getLayerOutput(const std::string& layerName); }; /// the N-Best results generated from one input sequence. diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index d83dc380beeec3747451a483f4811eb833e8c226..c742614aff97f26e1a10be607f30c0fe61530b09 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -131,12 +131,10 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); } -Matrix* Trainer::getLayerOutput(const std::string& layerName) { - auto nn = std::dynamic_pointer_cast( - this->m->getGradientMachine()); +Arguments* Trainer::getLayerOutput(const std::string& layerName) { + auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - auto m = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&m); + return Arguments::createByPaddleArgument(&nn->getLayerOutput(layerName)); } void Trainer::forwardOneBatch(size_t batchSize) { diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 5469c0d89f49648743f3a7e851694e4e1e736fb2..ae39783c6b7ea64a5306d08f665ed7e53f2b58ca 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,8 +134,8 @@ public: backward(callback); } - virtual MatrixPtr getLayerOutput(const std::string& layerName) const { - return nullptr; + virtual const Argument& getLayerOutput(const std::string& layerName) { + return *((Argument*)nullptr); } // see comment in Layer.h for the function with the same name diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 2d42e648302f1673ae067ded43db4502679a6625..6b11b0155e990d6eb740f744ffa3f3a086e3e1ed 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,33 +282,17 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } -MatrixPtr MultiGradientMachine::getLayerOutput( - const std::string& layerName) const { - // each thread has the same neural network - auto nn = threads_[0]->getGradientMachine(); - size_t height = 0; - size_t width = nn->getLayerOutput(layerName)->getWidth(); - std::vector mats; - mats.reserve(threads_.size()); - for (auto& thread : threads_) { - MatrixPtr out = thread->getGradientMachine()->getLayerOutput(layerName); - mats.push_back(out); - height += out->getHeight(); - CHECK_EQ(width, out->getWidth()); - } +const Argument& MultiGradientMachine::getLayerOutput( + const std::string& layerName) { + std::vector args; + args.reserve(threads_.size()); - MatrixPtr layerOutput; - Matrix::resizeOrCreate(layerOutput, height, width, false, false); - - // copy one layer output from one trainer thread at each time - size_t startRow = 0; - for (auto& mat : mats) { - auto tmpMatrix = layerOutput->subMatrix(startRow, mat->getHeight()); - tmpMatrix->copyFrom(*mat); - startRow += mat->getHeight(); + for (auto& thread : threads_) { + args.push_back(thread->getGradientMachine()->getLayerOutput(layerName)); } + outLayerArgs_.concat(args, false /* use_gpu */, outArgStream_, passType_); - return layerOutput; + return outLayerArgs_; } void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index a1a2d417062de5a1adeb83506b2a30339a816a13..9083230afd69e278b0343be1785edd01df3ad016 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,7 +189,7 @@ public: PassType passType, const UpdateCallback& callback); - virtual MatrixPtr getLayerOutput(const std::string& layerName) const; + virtual const Argument& getLayerOutput(const std::string& layerName); virtual void onPassEnd(); @@ -316,6 +316,8 @@ protected: std::vector outArgs_; hl_stream_t outArgStream_; + Argument outLayerArgs_; + /// ParameterType which needs to be merged from each GPU std::vector mergeTypes_; int numDevices_; /* number of gpu devices */ diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 00887c81d47687f3f3f976c2050289a3c73885e9..d1afde40e1f81eea6b34c1e2c33fe4851d0074d6 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,10 +293,8 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) const { - auto it = layerMap_.find(layerName); - CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; - return it->second->getOutputValue(); +const Argument& NeuralNetwork::getLayerOutput(const std::string& layerName) { + return getLayer(layerName)->getOutput(); } void NeuralNetwork::onPassEnd() { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 6ecc251a409cb9daefce704dd89027b9bd03744c..b4dc38e31b6fae1dd721b4ec1f5cdd2e5fe50e61 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,7 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - virtual MatrixPtr getLayerOutput(const std::string& layerName) const; + virtual const Argument& getLayerOutput(const std::string& layerName); const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index 1501c7437011d526b5a2cf575fd23fa9215216a3..57ba124e40cbd098fa8b0012ff31d6935b16862a 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -68,7 +68,7 @@ void CosSimLayer::forward(PassType passType) { void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - CHECK_EQ(backward_.size(), 1) << "Only one backward function needed"; + CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed"; const auto outG = this->getOutputGrad(); const auto outV = this->getOutputValue(); diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index ce105d249aaf3e838443d3e0cf5996fe8c783a22..a708def1d2d7f6da2998a5905f9473accc6db969 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -208,7 +208,7 @@ def __monkeypatch_gradient_machine__(): output = dict() for name in layerNames: - output[name] = __matrix_to_numpy__(self.getLayerOutput(name)) + output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name)) return output swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs