diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py index 9a86aafcb2fa4d4354d1dd9443c1b73ddcda980b..49c0ff600c40e0222093ff0a8a2f7e8e38ccba29 100755 --- a/demo/image_classification/prediction.py +++ b/demo/image_classification/prediction.py @@ -126,7 +126,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - return output[output_layer].mean(0) + return output[output_layer]['value'].mean(0) def predict(self, image=None, output_layer=None): assert isinstance(image, basestring) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 99cc344a5ed9c65ddcb67744d1c40d906ff09ec1..cb1191eddd005ac6205f18ddddab3942583cc3d7 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -1,4 +1,3 @@ -import numpy import paddle.v2 as paddle import mnist_util @@ -27,19 +26,14 @@ def main(): cost = paddle.layer.classification_cost(input=inference, label=label) parameters = paddle.parameters.create(cost) - for param_name in parameters.keys(): - array = parameters.get(param_name) - array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape) - parameters.set(parameter_name=param_name, value=array) adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) def event_handler(event): if isinstance(event, paddle.event.EndIteration): - para = parameters.get('___fc_2__.w0') - print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % ( - event.pass_id, event.batch_id, event.cost, para.mean()) - + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) else: pass diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py index 4631816c43ef48839df1863a0a86c3ab00924d3f..6074cc1d3a85e13e3e8d336d81e22104f9d8e7cf 100755 --- a/demo/model_zoo/resnet/classify.py +++ b/demo/model_zoo/resnet/classify.py @@ -156,7 +156,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - res[name] = output[name].mean(0) + res[name] = output[name]['value'].mean(0) return res diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/trainer_config_helpers/layers.rst index 2793d6afd9565eb461c8657b838b146fe1992b20..bbea823de4d870f8a4384b6a85ebb7e8182797fe 100644 --- a/doc/api/trainer_config_helpers/layers.rst +++ b/doc/api/trainer_config_helpers/layers.rst @@ -139,24 +139,12 @@ lstmemory :members: lstmemory :noindex: -lstm_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstm_step_layer - :noindex: - grumemory --------- .. automodule:: paddle.trainer_config_helpers.layers :members: grumemory :noindex: -gru_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: gru_step_layer - :noindex: - Recurrent Layer Group ===================== @@ -172,6 +160,18 @@ recurrent_group :members: recurrent_group :noindex: +lstm_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: lstm_step_layer + :noindex: + +gru_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: gru_step_layer + :noindex: + beam_search ------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -308,6 +308,12 @@ repeat_layer :members: repeat_layer :noindex: +rotate_layer +------------ +.. automodule:: paddle.trainer_config_helpers.layers + :members: rotate_layer + :noindex: + seq_reshape_layer ----------------- .. automodule:: paddle.trainer_config_helpers.layers @@ -462,6 +468,12 @@ ctc_layer :members: ctc_layer :noindex: +warp_ctc_layer +-------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: warp_ctc_layer + :noindex: + nce_layer ----------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp index 41beed38a87601cb57072c8966cd0fd2ea156524..a3f4bfffc9f074900ebcc52876c04bbfc0e570b2 100644 --- a/paddle/api/Arguments.cpp +++ b/paddle/api/Arguments.cpp @@ -38,6 +38,13 @@ Arguments* Arguments::createByPaddleArgumentVector(void* ptr) { return args; } +Arguments* Arguments::createByPaddleArgument(const void* ptr) { + auto p = (paddle::Argument*)(ptr); + auto args = new Arguments(); + args->m->outputs.push_back(*p); + return args; +} + Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) { auto& a = m->getArg(idx); return Matrix::createByPaddleMatrixPtr(&a.value); diff --git a/paddle/api/Evaluator.cpp b/paddle/api/Evaluator.cpp index c30e09876397e37ef9ed4ec3200d1aa372ceb609..681e3a380912339c531c16c88f43255c2f34c32f 100644 --- a/paddle/api/Evaluator.cpp +++ b/paddle/api/Evaluator.cpp @@ -27,3 +27,18 @@ std::string Evaluator::toString() { m->rawPtr->printStats(sout); return sout.str(); } + +std::vector Evaluator::getNames() const { + std::vector retv; + m->rawPtr->getNames(&retv); + return retv; +} + +double Evaluator::getValue(const std::string name) const { + paddle::Error err; + double v = m->rawPtr->getValue(name, &err); + if (err) { + throw std::runtime_error(err.msg()); + } + return v; +} diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index 66115f8293b905809639afff779abfdb2bb3a54e..538ca2999f8f05afc45ac2d2f526133c8024f066 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -144,12 +144,12 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { void GradientMachine::randParameters() { m->machine->randParameters(); } -Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const +Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { - auto nn = std::dynamic_pointer_cast(m->machine); + auto nn = m->machine; if (nn) { - auto mat = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&mat); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } else { throw UnsupportError(); } diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index f5af8b0035b44d97832dd90ca2eeba079503715c..1831b8e170087c909f77948f2d9077c946c72507 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -454,6 +454,7 @@ public: private: static Arguments* createByPaddleArgumentVector(void* ptr); + static Arguments* createByPaddleArgument(const void* ptr); void* getInternalArgumentsPtr() const; private: @@ -769,7 +770,7 @@ public: void randParameters(); - Matrix* getLayerOutput(const std::string& layerName) const + Arguments* getLayerOutput(const std::string& layerName) const throw(UnsupportError); /** @@ -900,6 +901,10 @@ public: */ std::string toString(); + std::vector getNames() const; + + double getValue(const std::string name) const; + private: EvaluatorPrivate* m; @@ -952,7 +957,7 @@ public: Arguments* getForwardOutput(); - Matrix* getLayerOutput(const std::string& layerName); + Arguments* getLayerOutput(const std::string& layerName) const; }; /// the N-Best results generated from one input sequence. diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index d83dc380beeec3747451a483f4811eb833e8c226..84e4ca054abb0100a02c8a40e31c49c17684ef40 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -131,12 +131,11 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); } -Matrix* Trainer::getLayerOutput(const std::string& layerName) { - auto nn = std::dynamic_pointer_cast( - this->m->getGradientMachine()); +Arguments* Trainer::getLayerOutput(const std::string& layerName) const { + auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - auto m = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&m); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } void Trainer::forwardOneBatch(size_t batchSize) { diff --git a/paddle/api/test/testTrain.py b/paddle/api/test/testTrain.py index a90d15c272a3a2b56e35c979e053deb2b54eebc1..7061a4c43bf01158b5f084d0c310dedd81773a04 100644 --- a/paddle/api/test/testTrain.py +++ b/paddle/api/test/testTrain.py @@ -89,9 +89,14 @@ def main(): except Exception as e: print e + ev = m.makeEvaluator() + ev.start() m.forwardBackward(inArgs, outArgs, swig_paddle.PASS_TRAIN, update_callback) - + m.eval(ev) + ev.finish() + for name in ev.getNames(): + print name, ev.getValue(name) for optimizer in optimizers: optimizer.finishBatch() diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 0829968d87c5dc7eeb2d1b70c758ff305d89496f..bc2f2f8563526aa045ea89f15152ee2d639b5774 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -134,6 +134,10 @@ public: backward(callback); } + virtual Argument getLayerOutput(const std::string& layerName) { + return *((Argument*)nullptr); + } + // see comment in Layer.h for the function with the same name virtual void resetState() {} diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 80f223824d8dccfb0e9386f4c076b28f9332a958..123273f916f5d33e2543d9f5f28573c3b5761e28 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,6 +282,18 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } +Argument MultiGradientMachine::getLayerOutput(const std::string& layerName) { + std::vector args; + args.reserve(threads_.size()); + + for (auto& thread : threads_) { + args.push_back(thread->getGradientMachine()->getLayerOutput(layerName)); + } + outLayerArgs_.concat(args, false /* use_gpu */, outArgStream_, passType_); + + return outLayerArgs_; +} + void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { for (size_t i = 0; i < parameters_.size(); i++) { if (!parameters_[i]->useGpu() || parameters_[i]->isStatic()) continue; diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 9be15ef4bcf34f26b7eceb9047252e537f20a4a8..838a52b5153af63adbce5788824b9f541f22517c 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,6 +189,8 @@ public: PassType passType, const UpdateCallback& callback); + virtual Argument getLayerOutput(const std::string& layerName); + virtual void onPassEnd(); virtual void finish(); @@ -314,6 +316,8 @@ protected: std::vector outArgs_; hl_stream_t outArgStream_; + Argument outLayerArgs_; + /// ParameterType which needs to be merged from each GPU std::vector mergeTypes_; int numDevices_; /* number of gpu devices */ diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 273a9111c35a21c01f0cd8d283ecc6eaa4ef0c61..4512aacc81f86bf87fc9ea30adcf081327663f16 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,11 +293,10 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) { - auto it = layerMap_.find(layerName); - CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; - return it->second->getOutputValue(); +Argument NeuralNetwork::getLayerOutput(const std::string& layerName) { + return getLayer(layerName)->getOutput(); } + void NeuralNetwork::onPassEnd() { for (auto& layer : layers_) { layer->onPassEnd(); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 25af4abcf81700e200feea806fa3daed19df1275..e7b6c438407e7eab6eab1f6ed496f35caa9f2177 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,8 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - MatrixPtr getLayerOutput(const std::string& layerName); + virtual Argument getLayerOutput(const std::string& layerName); + const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); CHECK(it != layerMap_.end()) << "Unknown layer " << layerName; diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index a6c0300acf6752a3536e7939577b561fd97d1eb8..57ba124e40cbd098fa8b0012ff31d6935b16862a 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -42,7 +42,7 @@ void CosSimLayer::forward(PassType passType) { /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); - CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); @@ -68,7 +68,7 @@ void CosSimLayer::forward(PassType passType) { void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - CHECK_EQ(backward_.size(), 1) << "Only one backward function needed"; + CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed"; const auto outG = this->getOutputGrad(); const auto outV = this->getOutputValue(); diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp index aabafd473aa1e06a767d48d4c49b7b8662e992e7..0f887d8adfa053e8fe88ac4fa4e2a9ba08ac07b5 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp @@ -112,7 +112,7 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap, void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); - CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); @@ -145,7 +145,7 @@ void CosSimVecMatLayer::forward(PassType passType) { } void CosSimVecMatLayer::backward(const UpdateCallback& callback) { - CHECK_EQ(backward_.size(), 1) << "Only one forward function needed"; + CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); diff --git a/paddle/math/tests/test_RowBuffer.cpp b/paddle/math/tests/test_RowBuffer.cpp index 5f66f22ef73dcff1868c1a3e03139a680b1ce2b5..8cc4c69a1a4d8afec08bf7fb13408e135a06c09c 100644 --- a/paddle/math/tests/test_RowBuffer.cpp +++ b/paddle/math/tests/test_RowBuffer.cpp @@ -17,10 +17,10 @@ limitations under the License. */ TEST(RowBuffer, testAutoGrow) { paddle::RowBuffer buf(128); - ASSERT_EQ(128, buf.getWidth()); + ASSERT_EQ(128UL, buf.getWidth()); ASSERT_TRUE(buf.isAutoGrowth()); buf.resize(2); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } @@ -35,7 +35,7 @@ TEST(RowBuffer, testAutoGrow) { data[i] = i; } - ASSERT_EQ(3, buf.getRowCount()); + ASSERT_EQ(3UL, buf.getRowCount()); for (size_t i = 0; i < buf.getRowCount() - 1; ++i) { for (size_t j = 0; j < buf.getWidth(); ++j) { ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5); @@ -51,7 +51,7 @@ TEST(RowBuffer, testWithMemBuf) { std::make_shared(128 * 2 * sizeof(real)); paddle::RowBuffer buf(mem, 128); ASSERT_TRUE(!buf.isAutoGrowth()); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index 21d1cb75f4d40e6ed011b33c6366c9d31c0fcc7c..2690cafe1d8d32bf52cd9e5fa4dc69fbacb2d66c 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -23,7 +23,8 @@ __all__ = ['DataProviderConverter'] class IScanner(object): def __init__(self, input_type, pos): self.input_type = input_type - assert isinstance(self.input_type, dp2.InputType) + if not isinstance(self.input_type, dp2.InputType): + raise ValueError("input type should be dataprovider2.InputType") self.pos = pos def scan(self, dat): @@ -50,7 +51,6 @@ class DenseScanner(IScanner): def finish_scan(self, argument): assert isinstance(argument, swig_paddle.Arguments) - assert isinstance(self.input_type, dp2.InputType) if self.__mat__.dtype != numpy.float32: self.__mat__ = self.__mat__.astype(numpy.float32) m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False) @@ -63,7 +63,6 @@ class SparseBinaryScanner(IScanner): self.__rows__ = [0] self.__cols__ = [] self.__height__ = 0 - self.__nnz__ = 0 self.__value__ = [] def scan(self, dat): @@ -76,7 +75,6 @@ class SparseBinaryScanner(IScanner): def finish_scan(self, argument): assert isinstance(argument, swig_paddle.Arguments) - assert isinstance(self.input_type, dp2.InputType) m = swig_paddle.Matrix.createSparse(self.__height__, self.input_type.dim, len(self.__cols__), diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index ce105d249aaf3e838443d3e0cf5996fe8c783a22..a708def1d2d7f6da2998a5905f9473accc6db969 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -208,7 +208,7 @@ def __monkeypatch_gradient_machine__(): output = dict() for name in layerNames: - output[name] = __matrix_to_numpy__(self.getLayerOutput(name)) + output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name)) return output swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs diff --git a/paddle/scripts/docker/Dockerfile b/paddle/scripts/docker/Dockerfile index d4845a72b61833b356779d44de5163f161e0cd4d..27d19dc15c8bd64832243d75bf22f0e8adeecbd1 100644 --- a/paddle/scripts/docker/Dockerfile +++ b/paddle/scripts/docker/Dockerfile @@ -10,28 +10,30 @@ RUN apt-get update && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ - apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ + apt-get install -y automake && \ apt-get clean -y RUN pip install --upgrade pip && \ - pip install -U protobuf && \ + pip install -U "protobuf==3.1.0" && \ pip install -U wheel pillow BeautifulSoup && \ pip install -U docopt PyYAML sphinx && \ pip install -U sphinx_rtd_theme recommonmark jupyter RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ - cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ + cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd .. && rm -rf cmake-3.4.1 +ARG BUILD_WOBOQ ARG BUILD_AND_INSTALL ARG WITH_AVX ARG WITH_DOC ARG WITH_STYLE_CHECK +ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF} ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} ENV WITH_GPU=OFF ENV WITH_AVX=${WITH_AVX:-ON} -ENV WITH_DOC=${WITH_DOC:-ON} +ENV WITH_DOC=${WITH_DOC:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} RUN mkdir /paddle diff --git a/paddle/scripts/docker/Dockerfile.gpu b/paddle/scripts/docker/Dockerfile.gpu index da20b2635e10e702d7c121a4f95e6a52a68487b0..538233dd0c7bb5088ea9473eb4d1a1ab62e98cb5 100644 --- a/paddle/scripts/docker/Dockerfile.gpu +++ b/paddle/scripts/docker/Dockerfile.gpu @@ -10,28 +10,30 @@ RUN apt-get update && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ - apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ + apt-get install -y automake && \ apt-get clean -y RUN pip install --upgrade pip && \ - pip install -U protobuf && \ + pip install -U "protobuf==3.1.0" && \ pip install -U wheel pillow BeautifulSoup && \ pip install -U docopt PyYAML sphinx && \ pip install -U sphinx_rtd_theme recommonmark jupyter RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ - cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ + cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd .. && rm -rf cmake-3.4.1 +ARG BUILD_WOBOQ ARG BUILD_AND_INSTALL ARG WITH_AVX ARG WITH_DOC ARG WITH_STYLE_CHECK +ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF} ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} ENV WITH_GPU=ON ENV WITH_AVX=${WITH_AVX:-ON} -ENV WITH_DOC=${WITH_DOC:-ON} +ENV WITH_DOC=${WITH_DOC:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} RUN mkdir /paddle diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 6197b41d6b5f191a452cbf32b47e0ff490b61046..d9c44f42340323afb4930dab35114f2adc5fbb3a 100755 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -11,7 +11,7 @@ set -e # If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied # source tree to /paddle, and this scripts should build it into # /paddle/build. -if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then +if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so fi @@ -19,7 +19,7 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then mkdir -p /paddle/build # -p means no error if exists cd /paddle/build cmake .. \ - -DWITH_DOC=ON \ + -DWITH_DOC=${WITH_DOC:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_SWIG_PY=ON \ @@ -29,28 +29,32 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then make -j `nproc` make install - # Install woboq_codebrowser. - git clone https://github.com/woboq/woboq_codebrowser /woboq - cd /woboq - cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ - -DCMAKE_BUILD_TYPE=Release \ - . - make - - export WOBOQ_OUT=/usr/share/nginx/html/paddle - export BUILD_DIR=/paddle/build - mkdir -p $WOBOQ_OUT - cp -rv /woboq/data $WOBOQ_OUT/../data - /woboq/generator/codebrowser_generator \ - -b /paddle/build \ - -a \ - -o $WOBOQ_OUT \ - -p paddle:/paddle - /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT - cd /woboq - make clean - - pip install /usr/local/opt/paddle/share/wheels/*.whl + if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then + apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev + # Install woboq_codebrowser. + git clone https://github.com/woboq/woboq_codebrowser /woboq + cd /woboq + cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ + -DCMAKE_BUILD_TYPE=Release \ + . + make + + export WOBOQ_OUT=/usr/share/nginx/html/paddle + export BUILD_DIR=/paddle/build + mkdir -p $WOBOQ_OUT + cp -rv /woboq/data $WOBOQ_OUT/../data + /woboq/generator/codebrowser_generator \ + -b /paddle/build \ + -a \ + -o $WOBOQ_OUT \ + -p paddle:/paddle + /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT + cd /woboq + make clean + fi + + pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl + pip install /usr/local/opt/paddle/share/wheels/paddle*.whl paddle version fi diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 357637e20346f8e1179d3a28ff580722cdfcccff..71af50a9a4ed835635362c24d6c1ae5e92de050b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -25,6 +25,7 @@ add_custom_target(paddle_python ALL DEPENDS add_subdirectory(paddle/trainer_config_helpers/tests) add_subdirectory(paddle/reader/tests) +add_subdirectory(paddle/v2/tests) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ DESTINATION opt/paddle/share/wheels diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py index 493b410e8299ebe167be43ead1401a6ab245a631..7373dc461b1d3115c03b37c5102a469a52aa7441 100644 --- a/python/paddle/reader/__init__.py +++ b/python/paddle/reader/__init__.py @@ -21,3 +21,5 @@ # # r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt")) from decorator import * + +import creator diff --git a/python/paddle/reader/creator.py b/python/paddle/reader/creator.py new file mode 100644 index 0000000000000000000000000000000000000000..5a91bb0b8ef6d1874737386897f6c555eaec18d4 --- /dev/null +++ b/python/paddle/reader/creator.py @@ -0,0 +1,53 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['np_array', 'text_file'] + + +def np_array(x): + """ + Creates a reader that yields elements of x, if it is a + numpy vector. Or rows of x, if it is a numpy matrix. + Or any sub-hyperplane indexed by the highest dimension. + + :param x: the numpy array to create reader from. + :returns: data reader created from x. + """ + + def reader(): + if x.ndim < 1: + yield x + + for e in x: + yield e + + return reader + + +def text_file(path): + """ + Creates a data reader that outputs text line by line from given text file. + Trailing new line ('\n') of each line will be removed. + + :path: path of the text file. + :returns: data reader of text file + """ + + def reader(): + f = open(path, "r") + for l in f: + yield l.rstrip('\n') + f.close() + + return reader diff --git a/python/paddle/reader/tests/CMakeLists.txt b/python/paddle/reader/tests/CMakeLists.txt index 502c897d8946a838847c1c23b1236358c58c088e..da072fb3dbeed5bee15fa1f64372ad8dec497070 100644 --- a/python/paddle/reader/tests/CMakeLists.txt +++ b/python/paddle/reader/tests/CMakeLists.txt @@ -2,3 +2,8 @@ add_test(NAME reader_decorator_test COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/decorator_test.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + +add_test(NAME reader_creator_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/creator_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/reader/tests/creator_test.py b/python/paddle/reader/tests/creator_test.py new file mode 100644 index 0000000000000000000000000000000000000000..eda8ab6715b2be0c9cb6163adf60d8fbdf2d7e8c --- /dev/null +++ b/python/paddle/reader/tests/creator_test.py @@ -0,0 +1,38 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import paddle.reader.creator +import numpy as np +import os + + +class TestNumpyArray(unittest.TestCase): + def test_numpy_array(self): + l = [[1, 2, 3], [4, 5, 6]] + x = np.array(l, np.int32) + reader = paddle.reader.creator.np_array(x) + for idx, e in enumerate(reader()): + self.assertItemsEqual(e, l[idx]) + + +class TestTextFile(unittest.TestCase): + def test_text_file(self): + path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt") + reader = paddle.reader.creator.text_file(path) + for idx, e in enumerate(reader()): + self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/reader/tests/test_data_creator.txt b/python/paddle/reader/tests/test_data_creator.txt new file mode 100644 index 0000000000000000000000000000000000000000..a2a8d47d43868d369083808497697da79e620e31 --- /dev/null +++ b/python/paddle/reader/tests/test_data_creator.txt @@ -0,0 +1,3 @@ +0 1 +2 3 +4 5 diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 00aef80691fba05be543beadf22acde7d28c5e8e..1bb1a01d509e6412c254fce856101137e66b1e12 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -112,6 +112,8 @@ __all__ = [ 'priorbox_layer', 'spp_layer', 'pad_layer', + 'eos_layer', + 'layer_support', ] @@ -708,6 +710,7 @@ class MixedLayerType(LayerOutput): # update the size which might be computed inside MixedLayer # according to the operator's output size self.size = ml.config.size + self.finalized = True @wrap_name_default("mixed") @@ -1287,6 +1290,12 @@ def last_seq(input, """ Get Last Timestamp Activation of a sequence. + The simple usage is: + + .. code-block:: python + + seq = last_seq(input=layer) + :param agg_level: Aggregated level :param name: Layer name. :type name: basestring @@ -1325,6 +1334,12 @@ def first_seq(input, """ Get First Timestamp Activation of a sequence. + The simple usage is: + + .. code-block:: python + + seq = first_seq(input=layer) + :param agg_level: aggregation level :param name: Layer name. :type name: basestring @@ -1425,7 +1440,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): .. code-block:: python - expand = repeat_layer(layer, 4) + expand = repeat_layer(input=layer, num_repeats=4) :param input: Input layer :type input: LayerOutput @@ -1797,6 +1812,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): Note that the above computation is for one sample. Multiple samples are processed in one batch. + The example usage is: + + .. code-block:: python + + cos = cos_sim(a=layer1, b=layer2, size=3) + :param name: layer name :type name: basestring :param a: input layer a @@ -1958,6 +1979,16 @@ def img_conv_layer(input, pieces. First 256/4 = 64 channels will process by first 32 filters. The rest channels will be processed by rest group of filters. + The example usage is: + + .. code-block:: python + + conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, + num_channels=8, + num_filters=16, stride=1, + bias_attr=False, + act=ReluActivation()) + :param name: Layer name. :type name: basestring :param input: Layer Input. @@ -2097,6 +2128,34 @@ def img_pool_layer(input, .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ + - ceil_mode=True: + + .. math:: + + w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + + - ceil_mode=False: + + .. math:: + + w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + + The example usage is: + + .. code-block:: python + + maxpool = img_pool_layer(input=conv, + pool_size=3, + pool_size_y=5, + num_channels=8, + stride=1, + stride_y=2, + padding=1, + padding_y=2, + pool_type=MaxPooling()) + :param padding: pooling padding width. :type padding: int :param padding_y: pooling padding height. It's equal to padding by default. @@ -2123,19 +2182,6 @@ def img_pool_layer(input, :param ceil_mode: Wether to use ceil mode to calculate output height and with. Defalut is True. If set false, Otherwise use floor. - - ceil_mode=True: - - .. math:: - - w = 1 + int(ceil(input_width + 2 * padding - pool_size) / float(stride)) - h = 1 + int(ceil(input_height + 2 * padding_y - pool_size_y) / float(stride_y)) - - - ceil_mode=False: - - .. math:: - - w = 1 + int(floor(input_width + 2 * padding - pool_size) / float(stride)) - h = 1 + int(floor(input_height + 2 * padding_y - pool_size_y) / float(stride_y)) :type ceil_mode: bool :return: LayerOutput object. :rtype: LayerOutput @@ -2197,6 +2243,15 @@ def spp_layer(input, The details please refer to `Kaiming He's paper `_. + The example usage is: + + .. code-block:: python + + spp = spp_layer(input=data, + pyramid_height=2, + num_channels=16, + pool_type=MaxPooling()) + :param name: layer name. :type name: basestring :param input: layer's input. @@ -2285,6 +2340,12 @@ def img_cmrnorm_layer(input, The details please refer to `Alex's paper `_. + The example usage is: + + .. code-block:: python + + norm = img_cmrnorm_layer(input=net, size=5) + :param name: layer name. :type name: None|basestring :param input: layer's input. @@ -2340,6 +2401,12 @@ def batch_norm_layer(input, The details of batch normalization please refer to this `paper `_. + The example usage is: + + .. code-block:: python + + norm = batch_norm_layer(input=net, act=ReluActivation()) + :param name: layer name. :type name: basestring :param input: batch normalization input. Better be linear activation. @@ -3903,13 +3970,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): .. code-block:: python - conv_shift = conv_shift_layer(input=[layer1, layer2]) + conv_shift = conv_shift_layer(a=layer1, b=layer2) :param name: layer name :type name: basestring :param a: Input layer a. :type a: LayerOutput - :param b: input layer b + :param b: input layer b. :type b: LayerOutput :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute @@ -4001,8 +4068,8 @@ def tensor_layer(a, @wrap_act_default() @layer_support() def selective_fc_layer(input, - select, size, + select=None, act=None, name=None, pass_generation=False, @@ -4029,6 +4096,7 @@ def selective_fc_layer(input, :type input: LayerOutput|list|tuple :param select: The select layer. The output of select layer should be a sparse binary matrix, and treat as the mask of selective fc. + If is None, acts exactly like fc_layer. :type select: LayerOutput :param size: The layer dimension. :type size: int @@ -4257,7 +4325,7 @@ def block_expand_layer(input, .. code-block:: python - block_expand = block_expand_layer(input, + block_expand = block_expand_layer(input=layer, num_channels=128, stride_x=1, stride_y=1, @@ -4461,7 +4529,7 @@ def warp_ctc_layer(input, - You can set 'blank' to any value ranged in [0, num_classes], which should be consistent as that used in your labels. - As a native 'softmax' activation is interated to the warp-ctc library, - 'linear' activation is expected instead in the 'input' layer. + 'linear' activation is expected instead in the 'input' layer. The simple usage: @@ -4594,6 +4662,13 @@ def crf_decoding_layer(input, this layer will also calculate error. output.value[i] is 1 for incorrect decoding or 0 for correct decoding. + The simple usage: + + .. code-block:: python + + crf_decoding = crf_decoding_layer(input=input, + size=label_dim) + :param input: The first input layer. :type input: LayerOutput :param size: size of this layer. diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 402b3f658d586bdeb9eaf0673646332f8df801c4..90321defef7674dde96d57f0e0455031ee64ee86 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -19,12 +19,14 @@ import trainer import event import data_type import topology +import data_feeder import attr +import pooling import py_paddle.swig_paddle as api __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', - 'event', 'data_type', 'topology', 'attr' + 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'topology' ] diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py new file mode 100644 index 0000000000000000000000000000000000000000..2a16d46dda47f822dd2d6c168528dd6cec53ab4e --- /dev/null +++ b/python/paddle/v2/data_feeder.py @@ -0,0 +1,100 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from py_paddle import swig_paddle +from py_paddle import DataProviderConverter +import data_type + +__all__ = ['DataFeeder'] + + +class DataFeeder(DataProviderConverter): + """ + DataFeeder converts the data returned by paddle.reader into a data structure + of Arguments which is defined in the API. The paddle.reader usually returns + a list of mini-batch data entries. Each data entry in the list is one sampe. + Each sample is a list or a tuple with one feature or multiple features. + DataFeeder converts this mini-batch data entries into Arguments in order + to feed it to C++ interface. + + The example usage: + + data_types = [('image', paddle.data_type.dense_vector(784)), + ('label', paddle.data_type.integer_value(10))] + reader_dict = {'image':0, 'label':1} + feeder = DataFeeder(data_types=data_types, reader_dict=reader_dict) + minibatch_data = [ + ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample + ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample + ] + # or minibatch_data = [ + # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample + # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample + # ] + arg = feeder(minibatch_data) + """ + + def __init__(self, data_types, reader_dict): + """ + :param data_types: A list to specify data name and type. Each item is + a tuple of (data_name, data_type). For example: + [('image', paddle.data_type.dense_vector(784)), + ('label', paddle.data_type.integer_value(10))] + + :type data_types: A list of tuple + :param reader_dict: A dictionary to specify the position of each data + in the input data. + :type reader_dict: dict() + """ + self.input_names = [] + input_types = [] + self.reader_dict = reader_dict + for each in data_types: + self.input_names.append(each[0]) + assert isinstance(each[1], data_type.InputType) + input_types.append(each[1]) + DataProviderConverter.__init__(self, input_types) + + def convert(self, dat, argument=None): + """ + :param dat: A list of mini-batch data. Each sample is a list or tuple + one feature or multiple features. + for example: + [ + ([0.2, 0.2], ), # first sample + ([0.8, 0.3], ), # second sample + ] + or, + [ + [[0.2, 0.2], ], # first sample + [[0.8, 0.3], ], # second sample + ] + + :type dat: List + :param argument: An Arguments object contains this mini-batch data with + one or multiple features. The Arguments definition is + in the API. + :type argument: swig_paddle.Arguments + """ + + def reorder_data(data): + retv = [] + for each in data: + reorder = [] + for name in self.input_names: + reorder.append(each[self.reader_dict[name]]) + retv.append(reorder) + return retv + + return DataProviderConverter.convert(self, reorder_data(dat), argument) diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py index cd9ce6e51352ae4cc386997b1e3353bf11db5a7d..dd3ebfcb4267e1bb59011c81cb5a2716b8e45a6d 100644 --- a/python/paddle/v2/data_type.py +++ b/python/paddle/v2/data_type.py @@ -14,9 +14,9 @@ from paddle.trainer.PyDataProvider2 import \ InputType, dense_vector, sparse_binary_vector,\ - sparse_vector, integer_value, DataType + sparse_vector, integer_value, integer_value_sequence __all__ = [ 'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector', - 'integer_value', 'DataType' + 'integer_value', 'integer_value_sequence' ] diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/paddle/v2/dataset/config.py b/python/paddle/v2/dataset/config.py new file mode 100644 index 0000000000000000000000000000000000000000..69e96d65ef1ef868aff5d46ddf3af250ca11e641 --- /dev/null +++ b/python/paddle/v2/dataset/config.py @@ -0,0 +1,8 @@ +import os + +__all__ = ['DATA_HOME'] + +DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') + +if not os.path.exists(DATA_HOME): + os.makedirs(DATA_HOME) diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..db84f37aa4fc3477b17599a48a4de9b45cfb6c1f --- /dev/null +++ b/python/paddle/v2/dataset/mnist.py @@ -0,0 +1,39 @@ +import sklearn.datasets.mldata +import sklearn.model_selection +import numpy +from config import DATA_HOME + +__all__ = ['train_creator', 'test_creator'] + + +def __mnist_reader_creator__(data, target): + def reader(): + n_samples = data.shape[0] + for i in xrange(n_samples): + yield (data[i] / 255.0).astype(numpy.float32), int(target[i]) + + return reader + + +TEST_SIZE = 10000 + +data = sklearn.datasets.mldata.fetch_mldata( + "MNIST original", data_home=DATA_HOME) +X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + data.data, data.target, test_size=TEST_SIZE, random_state=0) + + +def train_creator(): + return __mnist_reader_creator__(X_train, y_train) + + +def test_creator(): + return __mnist_reader_creator__(X_test, y_test) + + +def unittest(): + assert len(list(test_creator()())) == TEST_SIZE + + +if __name__ == '__main__': + unittest() diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index a16cfa91f062a60a141ea8fa962b3ecf6f5f0a22..835e28e6218df22e1cad7f7bb31c3c9941657252 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -3,8 +3,6 @@ All training events. There are: -* BeginTraining -* EndTraining * BeginIteration * EndIteration * BeginPass @@ -12,15 +10,62 @@ There are: TODO(yuyang18): Complete it! """ -__all__ = ['EndIteration'] +import py_paddle.swig_paddle as api +__all__ = ['EndIteration', 'BeginIteration', 'BeginPass', 'EndPass'] -class EndIteration(object): +class WithMetric(object): + def __init__(self, evaluator): + if not isinstance(evaluator, api.Evaluator): + raise TypeError("Evaluator should be api.Evaluator type") + self.__evaluator__ = evaluator + + @property + def metrics(self): + names = self.__evaluator__.getNames() + retv = dict() + for each_name in names: + val = self.__evaluator__.getValue(each_name) + retv[each_name] = val + return retv + + +class BeginPass(object): + """ + Event On One Pass Training Start. + """ + + def __init__(self, pass_id): + self.pass_id = pass_id + + +class EndPass(WithMetric): + """ + Event On One Pass Training Complete. + """ + + def __init__(self, pass_id, evaluator): + self.pass_id = pass_id + WithMetric.__init__(self, evaluator) + + +class BeginIteration(object): + """ + Event On One Batch Training Start. + """ + + def __init__(self, pass_id, batch_id): + self.pass_id = pass_id + self.batch_id = batch_id + + +class EndIteration(WithMetric): """ Event On One Batch Training Complete. """ - def __init__(self, pass_id, batch_id, cost): + def __init__(self, pass_id, batch_id, cost, evaluator): self.pass_id = pass_id self.batch_id = batch_id self.cost = cost + WithMetric.__init__(self, evaluator) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 2c15c1ffb79897894d2414c0c4f05a2c617fe359..d15e6398f51f43c1eeab67bba654f91cc56135a4 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -71,19 +71,37 @@ import collections import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ + from paddle.trainer_config_helpers.default_decorators import wrap_name_default +from paddle.trainer_config_helpers.default_decorators import wrap_act_default +from paddle.trainer_config_helpers.default_decorators import wrap_bias_attr_default +from paddle.trainer_config_helpers.layers import layer_support import data_type import activation import attr __all__ = [ - 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', - 'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', + 'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp', + 'maxout', 'img_cmrnorm', 'batch_norm', 'sum_to_one_norm', 'recurrent', + 'lstmemory', 'grumemory', 'pool', 'last_seq', 'first_seq', 'concat', + 'seq_concat', 'block_expand', 'expand', 'repeat', 'seq_reshape', 'addto', + 'linear_comb', 'interpolation', 'bilinear_interp', 'power', 'scaling', + 'slope_intercept', 'tensor', 'cos_sim', 'trans', 'max_id', 'sampling_id', + 'pad', 'classification_cost', 'cross_entropy_cost', + 'cross_entropy_with_selfnorm_cost', 'regression_cost', 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', - 'sum_cost', 'huber_cost' + 'sum_cost', 'huber_cost', 'crf', 'crf_decoding', 'ctc', 'warp_ctc', 'nce', + 'hsigmoid', 'eos' ] +__projection_names__ = filter(lambda x: x.endswith('_projection'), + dir(conf_helps)) +__all__ += __projection_names__ + +__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps)) +__all__ += __operator_names__ + def parse_network(*outputs): """ @@ -101,9 +119,8 @@ def parse_network(*outputs): class Layer(object): - def __init__(self, name, parent_layers): + def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) - assert isinstance(name, basestring) self.name = name self.__parent_layers__ = parent_layers @@ -122,22 +139,25 @@ class Layer(object): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name not in context: + if self.name is None: + return self.to_proto_impl(**kwargs) + elif self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() -def __convert_to_v2__(method_name, name_prefix, parent_names): - if name_prefix is not None: - wrapper = wrap_name_default(name_prefix=name_prefix) +def __convert_to_v2__(method_name, parent_names, is_default_name=True): + if is_default_name: + wrapper = wrap_name_default(name_prefix=method_name) else: wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, **kwargs): + def __init__(self, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -148,6 +168,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): if key not in parent_names: other_kwargs[key] = kwargs[key] + name = kwargs.get('name', None) super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs @@ -160,7 +181,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] - return getattr(conf_helps, method_name)(name=self.name, **args) + return getattr(conf_helps, method_name)(**args) return V2LayerImpl @@ -191,69 +212,171 @@ class DataLayerV2(Layer): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) -LayerV2 = Layer +class MixedLayerV2(Layer): + """ + This class is use to support `with` grammar. If not, the following code + could convert mixed_layer simply. + + mixed = __convert_to_v2__( + 'mixed_layer', name_prefix='mixed', parent_names=['input']) + """ + + class AddToSealedMixedLayerExceptionV2(Exception): + pass + + def __init__(self, + size=0, + input=None, + name=None, + act=None, + bias_attr=None, + layer_attr=None): + self.__method_name__ = 'mixed_layer' + self.finalized = False + self.__inputs__ = [] + if input is not None: + self.__inputs__ = input + + other_kwargs = dict() + other_kwargs['name'] = name + other_kwargs['size'] = size + other_kwargs['act'] = act + other_kwargs['bias_attr'] = bias_attr + other_kwargs['layer_attr'] = layer_attr + + parent_layers = {"input": self.__inputs__} + super(MixedLayerV2, self).__init__(name, parent_layers) + self.__other_kwargs__ = other_kwargs + + def __iadd__(self, other): + if not self.finalized: + self.__inputs__.append(other) + return self + else: + raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + + def __enter__(self): + assert len(self.__inputs__) == 0 + return self + + def __exit__(self, *args, **kwargs): + self.finalized = True + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, self.__method_name__)(**args) + + +@wrap_name_default("mixed") +@wrap_act_default(act=activation.Linear()) +@wrap_bias_attr_default(has_bias=False) +@layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT) +def mixed(size=0, + name=None, + input=None, + act=None, + bias_attr=False, + layer_attr=None): + return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) + data = DataLayerV2 -fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) -max_id = __convert_to_v2__( - 'maxid_layer', name_prefix='maxid', parent_names=['input']) -classification_cost = __convert_to_v2__( - 'classification_cost', - name_prefix='classification_cost', - parent_names=['input', 'label', 'weight']) -regression_cost = __convert_to_v2__( - 'regression_cost', - name_prefix='regression_cost', - parent_names=['input', 'label', 'weight']) -cross_entropy_cost = __convert_to_v2__( - 'cross_entropy', - name_prefix='cross_entropy', - parent_names=['input', 'label']) -cross_entropy_with_selfnorm_cost = __convert_to_v2__( - 'cross_entropy_with_selfnorm', - name_prefix='cross_entropy_with_selfnorm', - parent_names=['input', 'label']) -multi_binary_label_cross_entropy_cost = __convert_to_v2__( - 'multi_binary_label_cross_entropy', - name_prefix='multi_binary_label_cross_entropy', - parent_names=['input', 'label']) -rank_cost = __convert_to_v2__( - 'rank_cost', - name_prefix='rank_cost', - parent_names=['left', 'right', 'label', 'weight']) -lambda_cost = __convert_to_v2__( - 'lambda_cost', name_prefix='lambda_cost', parent_names=['input', 'score']) -sum_cost = __convert_to_v2__( - 'sum_cost', name_prefix='sum_cost', parent_names=['input']) -huber_cost = __convert_to_v2__( - 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) - -if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(784)) - label = data(name='label', type=data_type.integer_value(10)) - weight = data(name='weight', type=data_type.dense_vector(10)) - score = data(name='score', type=data_type.dense_vector(1)) - - hidden = fc(input=pixel, - size=100, - act=activation.Sigmoid(), - param_attr=attr.Param(name='hidden')) - inference = fc(input=hidden, size=10, act=activation.Softmax()) - maxid = max_id(input=inference) - cost1 = classification_cost(input=inference, label=label) - cost2 = classification_cost(input=inference, label=label, weight=weight) - cost3 = cross_entropy_cost(input=inference, label=label) - cost4 = cross_entropy_with_selfnorm_cost(input=inference, label=label) - cost5 = regression_cost(input=inference, label=label) - cost6 = regression_cost(input=inference, label=label, weight=weight) - cost7 = multi_binary_label_cross_entropy_cost(input=inference, label=label) - cost8 = rank_cost(left=score, right=score, label=score) - cost9 = lambda_cost(input=inference, score=score) - cost10 = sum_cost(input=inference) - cost11 = huber_cost(input=score, label=label) - - print parse_network(cost1, cost2) - print parse_network(cost3, cost4) - print parse_network(cost5, cost6) - print parse_network(cost7, cost8, cost9, cost10, cost11) - print parse_network(inference, maxid) +AggregateLevel = conf_helps.layers.AggregateLevel +ExpandLevel = conf_helps.layers.ExpandLevel + +layer_list = [ + # [V2LayerImpl, V1_method_name, parent_names] + # fully connected layers + ['fc', 'fc_layer', ['input']], + # conv layers + ['conv_shift', 'conv_shift_layer', ['a', 'b']], + ['img_conv', 'img_conv_layer', ['input']], + # image pooling layers + ['img_pool', 'img_pool_layer', ['input']], + ['spp', 'spp_layer', ['input']], + ['maxout', 'maxout_layer', ['input']], + # norm layers + ['img_cmrnorm', 'img_cmrnorm_layer', ['input']], + ['batch_norm', 'batch_norm_layer', ['input']], + ['sum_to_one_norm', 'sum_to_one_norm_layer', ['input']], + # recurrent layers + ['recurrent', 'recurrent_layer', ['input']], + ['lstmemory', 'lstmemory', ['input']], + ['grumemory', 'grumemory', ['input']], + # aggregate layers + ['pool', 'pooling_layer', ['input']], + ['last_seq', 'last_seq', ['input']], + ['first_seq', 'first_seq', ['input']], + ['concat', 'concat_layer', ['input']], + ['seq_concat', 'seq_concat_layer', ['a', 'b']], + # reshaping layers + ['block_expand', 'block_expand_layer', ['input']], + ['expand', 'expand_layer', ['input', 'expand_as']], + ['repeat', 'repeat_layer', ['input']], + ['rotate', 'rotate_layer', ['input']], + ['seq_reshape', 'seq_reshape_layer', ['input']], + # math layers + ['addto', 'addto_layer', ['input']], + ['linear_comb', 'linear_comb_layer', ['weights', 'vectors']], + ['interpolation', 'interpolation_layer', ['input', 'weight']], + ['bilinear_interp', 'bilinear_interp_layer', ['input']], + ['power', 'power_layer', ['input', 'weight']], + ['scaling', 'scaling_layer', ['input', 'weight']], + ['slope_intercept', 'slope_intercept_layer', ['input']], + ['tensor', 'tensor_layer', ['a', 'b']], + ['cos_sim', 'cos_sim', ['a', 'b']], + ['trans', 'trans_layer', ['input']], + # sampling layers + ['max_id', 'maxid_layer', ['input']], + ['sampling_id', 'sampling_id_layer', ['input']], + # slicing and joining layers + ['pad', 'pad_layer', ['input']], + # cost layers + [ + 'classification_cost', 'classification_cost', + ['input', 'label', 'weight'] + ], + ['regression_cost', 'regression_cost', ['input', 'label', 'weight']], + ['cross_entropy_cost', 'cross_entropy', ['input', 'label']], + [ + 'cross_entropy_with_selfnorm_cost', 'cross_entropy_with_selfnorm', + ['input', 'label'] + ], + [ + 'multi_binary_label_cross_entropy_cost', + 'multi_binary_label_cross_entropy', ['input', 'label'] + ], + ['rank_cost', 'rank_cost', ['left', 'right', 'label', 'weight']], + ['lambda_cost', 'lambda_cost', ['input', 'score']], + ['sum_cost', 'sum_cost', ['input']], + ['huber_cost', 'huber_cost', ['input', 'label']], + ['crf', 'crf_layer', ['input', 'label']], + ['crf_decoding', 'crf_decoding_layer', ['input']], + ['ctc', 'ctc_layer', ['input', 'label']], + ['warp_ctc', 'warp_ctc_layer', ['input', 'label']], + ['nce', 'nce_layer', ['input', 'label']], + ['hsigmoid', 'hsigmoid', ['input', 'label']], + # check layers + ['eos', 'eos_layer', ['input']] +] +for l in layer_list: + globals()[l[0]] = __convert_to_v2__(l[1], l[2]) + +# convert projection +for prj in __projection_names__: + globals()[prj] = __convert_to_v2__( + prj, parent_names=['input'], is_default_name=False) + +# convert operator +operator_list = [ + # [V1_method_name, parent_names], + ['dotmul_operator', ['a', 'b']], + ['conv_operator', ['img', 'filter']] +] +for op in operator_list: + globals()[op[0]] = __convert_to_v2__( + op[0], parent_names=op[1], is_default_name=False) diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index aa2942bc9faeb2a353459cd619886f56ea32f450..10e255dc945efb8b20f09dc1806d2ba7ef856c55 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -3,7 +3,10 @@ import paddle.trainer_config_helpers.optimizers as v1_optimizers import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.v2 -__all__ = ['Adam', 'Adamax'] +__all__ = [ + 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', + 'RMSProp', 'ModelAverage', 'L2Regularization' +] class Optimizer(object): @@ -38,6 +41,14 @@ class Optimizer(object): pass_num) +class Momentum(Optimizer): + def __init__(self, momentum=None, sparse=False, **kwargs): + learning_method = v1_optimizers.MomentumOptimizer( + momentum=None, sparse=False) + super(Momentum, self).__init__( + learning_method=learning_method, **kwargs) + + class Adam(Optimizer): def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs): learning_method = v1_optimizers.AdamOptimizer( @@ -52,7 +63,45 @@ class Adamax(Optimizer): super(Adamax, self).__init__(learning_method=learning_method, **kwargs) +class AdaGrad(Optimizer): + def __init__(self, **kwargs): + learning_method = v1_optimizers.AdaGradOptimizer() + super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs) + + +class DecayedAdaGrad(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): + learning_method = v1_optimizers.DecayedAdaGradOptimizer( + rho=rho, epsilon=epsilon) + super(DecayedAdaGrad, self).__init__( + learning_method=learning_method, **kwargs) + + +class AdaDelta(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): + learning_method = v1_optimizers.AdaDeltaOptimizer( + rho=rho, epsilon=epsilon) + super(AdaDelta, self).__init__( + learning_method=learning_method, **kwargs) + + +class RMSProp(Optimizer): + def __init__(self, rho=0.95, epsilon=1e-6, **kwargs): + learning_method = v1_optimizers.RMSPropOptimizer( + rho=rho, epsilon=epsilon) + super(RMSProp, self).__init__(learning_method=learning_method, **kwargs) + + +ModelAverage = v1_optimizers.ModelAverage +L2Regularization = v1_optimizers.L2Regularization + if __name__ == '__main__': swig_api.initPaddle('--use_gpu=false') - opt = paddle.v2.optimizer.Adam() - print opt.enable_types() + for opt in [ + Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), + AdaDelta(), RMSProp(), Adam( + model_average=ModelAverage(average_window=0.5), + regularization=L2Regularization(rate=0.5), + gradient_clipping_threshold=25) + ]: + print opt, opt.enable_types() diff --git a/python/paddle/v2/pooling.py b/python/paddle/v2/pooling.py new file mode 100644 index 0000000000000000000000000000000000000000..9076a159bb4f2c58bf52cba1657c58510637f4f8 --- /dev/null +++ b/python/paddle/v2/pooling.py @@ -0,0 +1,24 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers.poolings import * + +__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"] + +Max = MaxPooling +CudnnMax = CudnnMaxPooling +Avg = AvgPooling +CudnnAvg = CudnnAvgPooling +Sum = SumPooling +SquareRootN = SquareRootNPooling diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index 3a257af2fc5693c1675e85304dda0566fad6dacd..c77df827d956ecb08efd2473f7acef526023b468 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,3 +1,11 @@ +add_test(NAME test_v2_layer + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + +add_test(NAME test_v2_api + COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) + add_test(NAME topology_test COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/topology_test.py diff --git a/python/paddle/v2/tests/run_tests.sh b/python/paddle/v2/tests/run_tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..b96f54fe9cc78a436bc67e6c542b6e842aba997b --- /dev/null +++ b/python/paddle/v2/tests/run_tests.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +pushd `dirname $0` > /dev/null +SCRIPTPATH=$PWD +popd > /dev/null + +cd $SCRIPTPATH + +$1 -m pip install ../../../../paddle/dist/*.whl + +test_list="test_data_feeder.py" + +export PYTHONPATH=$PWD/../../../../python/ + +for fn in $test_list +do + echo "test $fn" + $1 $fn + if [ $? -ne 0 ]; then + exit 1 + fi +done diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py new file mode 100644 index 0000000000000000000000000000000000000000..5f67da6a5b32d74228d727d94ec79b9f7a06dab7 --- /dev/null +++ b/python/paddle/v2/tests/test_data_feeder.py @@ -0,0 +1,238 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import py_paddle.swig_paddle as api +import numpy as np + +from paddle.v2 import data_type +from paddle.v2.data_feeder import DataFeeder + + +class DataFeederTest(unittest.TestCase): + def dense_reader(self, size): + data = np.random.random(size) + return data + + def sparse_binary_reader(self, high, size_limit, non_empty=False): + num = np.random.randint(size_limit) # num could be 0 + while non_empty and num == 0: + num = np.random.randint(size_limit) + return np.random.randint(high, size=num).tolist() + + def test_dense(self): + def compare(input): + feeder = DataFeeder([('image', data_type.dense_vector(784))], + {'image': 0}) + arg = feeder(input) + output = arg.getSlotValue(0).copyToNumpyMat() + input = np.array(input, dtype='float32') + self.assertAlmostEqual(input.all(), output.all()) + + # test numpy array + batch_size = 32 + dim = 784 + data = [] + for i in xrange(batch_size): + each_sample = [] + each_sample.append(self.dense_reader(dim)) + data.append(each_sample) + compare(data) + + # each feature is a list + data = [] + for i in xrange(batch_size): + each_sample = [] + each_sample.append(self.dense_reader(dim).tolist()) + data.append(each_sample) + compare(data) + + # test tuple + data = [] + for i in xrange(batch_size): + each_sample = (self.dense_reader(dim).tolist(), ) + data.append(each_sample) + compare(data) + + def test_sparse_binary(self): + dim = 10000 + batch_size = 32 + data = [] + for i in xrange(batch_size): + each_sample = [] + each_sample.append(self.sparse_binary_reader(dim, 50)) + data.append(each_sample) + feeder = DataFeeder([('input', data_type.sparse_binary_vector(dim))], + {'input': 0}) + arg = feeder(data) + output = arg.getSlotValue(0) + assert isinstance(output, api.Matrix) + for i in xrange(batch_size): + self.assertEqual(output.getSparseRowCols(i), data[i][0]) + + def test_sparse(self): + dim = 10000 + batch_size = 32 + v = [] + w = [] + data = [] + for dat in xrange(batch_size): + each_sample = [] + a = self.sparse_binary_reader(dim, 40, non_empty=True) + b = self.dense_reader(len(a)).tolist() + v.append(a) + w.append(np.array(b, dtype="float32")) + each_sample.append(zip(a, b)) + data.append(each_sample) + + feeder = DataFeeder([('input', data_type.sparse_vector(dim))], + {'input': 0}) + arg = feeder(data) + output = arg.getSlotValue(0) + assert isinstance(output, api.Matrix) + for i in xrange(batch_size): + self.assertEqual(output.getSparseRowCols(i), v[i]) + cols_value = output.getSparseRowColsVal(i) + value = [val[1] for val in cols_value] + value = np.array(value, dtype="float32") + self.assertAlmostEqual(value.all(), w[i].all()) + + def test_integer(self): + dim = 100 + batch_size = 32 + index = [] + for i in xrange(batch_size): + each_sample = [] + each_sample.append(np.random.randint(dim)) + index.append(each_sample) + feeder = DataFeeder([('input', data_type.integer_value(dim))], + {'input': 0}) + arg = feeder(index) + output = arg.getSlotIds(0).copyToNumpyArray() + index = np.array(index, dtype='int') + self.assertEqual(output.all(), index.flatten().all()) + + def test_integer_sequence(self): + dim = 10000 + batch_size = 32 + start = [0] + data = [] + for i in xrange(batch_size): + each_sample = [] + each_sample.append( + self.sparse_binary_reader( + dim, 30, non_empty=True)) + data.append(each_sample) + start.append(len(each_sample[0]) + start[-1]) + feeder = DataFeeder([('input', data_type.integer_value_sequence(dim))], + {'input': 0}) + arg = feeder(data) + output_data = arg.getSlotIds(0).copyToNumpyArray() + output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray() + + index = [] + for dat in data: + index.extend(x for x in dat[0]) # only one feature, so dat[0] + index = np.array(index, dtype='int') + start = np.array(start, dtype='int') + self.assertEqual(output_data.all(), index.all()) + self.assertEqual(output_start.all(), start.all()) + + def test_multiple_features(self): + batch_size = 2 + data = [] + for i in xrange(batch_size): + each_sample = [] + each_sample.append(np.random.randint(10)) + each_sample.append( + self.sparse_binary_reader( + 20000, 40, non_empty=True)) + each_sample.append(self.dense_reader(100)) + data.append(each_sample) + + # test multiple features + data_types = [('fea0', data_type.dense_vector(100)), + ('fea1', data_type.sparse_binary_vector(20000)), + ('fea2', data_type.integer_value(10))] + feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0}) + arg = feeder(data) + output_dense = arg.getSlotValue(0).copyToNumpyMat() + output_sparse = arg.getSlotValue(1) + output_index = arg.getSlotIds(2).copyToNumpyArray() + for i in xrange(batch_size): + self.assertEqual(output_dense[i].all(), data[i][2].all()) + self.assertEqual(output_sparse.getSparseRowCols(i), data[i][1]) + self.assertEqual(output_index[i], data[i][0]) + + # reader returns 3 features, but only use 2 features + data_types = [('fea0', data_type.dense_vector(100)), + ('fea2', data_type.integer_value(10))] + feeder = DataFeeder(data_types, {'fea0': 2, 'fea2': 0}) + arg = feeder(data) + output_dense = arg.getSlotValue(0).copyToNumpyMat() + output_index = arg.getSlotIds(1).copyToNumpyArray() + for i in xrange(batch_size): + self.assertEqual(output_dense[i].all(), data[i][2].all()) + self.assertEqual(output_index[i], data[i][0]) + + # reader returns 3 featreus, one is duplicate data + data_types = [('fea0', data_type.dense_vector(100)), + ('fea1', data_type.sparse_binary_vector(20000)), + ('fea2', data_type.integer_value(10)), + ('fea3', data_type.dense_vector(100))] + feeder = DataFeeder(data_types, + {'fea0': 2, + 'fea1': 1, + 'fea2': 0, + 'fea3': 2}) + arg = feeder(data) + fea0 = arg.getSlotValue(0).copyToNumpyMat() + fea1 = arg.getSlotValue(1) + fea2 = arg.getSlotIds(2).copyToNumpyArray() + fea3 = arg.getSlotValue(3).copyToNumpyMat() + for i in xrange(batch_size): + self.assertEqual(fea0[i].all(), data[i][2].all()) + self.assertEqual(fea1.getSparseRowCols(i), data[i][1]) + self.assertEqual(fea2[i], data[i][0]) + self.assertEqual(fea3[i].all(), data[i][2].all()) + + def test_multiple_features_tuple(self): + batch_size = 2 + data = [] + for i in xrange(batch_size): + a = np.random.randint(10) + b = self.sparse_binary_reader(20000, 40, non_empty=True) + c = self.dense_reader(100) + each_sample = (a, b, c) + data.append(each_sample) + + # test multiple features + data_types = [('fea0', data_type.dense_vector(100)), + ('fea1', data_type.sparse_binary_vector(20000)), + ('fea2', data_type.integer_value(10))] + feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0}) + arg = feeder(data) + out_dense = arg.getSlotValue(0).copyToNumpyMat() + out_sparse = arg.getSlotValue(1) + out_index = arg.getSlotIds(2).copyToNumpyArray() + for i in xrange(batch_size): + self.assertEqual(out_dense[i].all(), data[i][2].all()) + self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1]) + self.assertEqual(out_index[i], data[i][0]) + + +if __name__ == '__main__': + api.initPaddle("--use_gpu=0") + unittest.main() diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..bb0099ea2fbb78b0a05eedf23af95a02e8849015 --- /dev/null +++ b/python/paddle/v2/tests/test_layer.py @@ -0,0 +1,259 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.attr as attr +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +import paddle.v2.pooling as pooling +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + +pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) +label = layer.data(name='label', type=data_type.integer_value(10)) +weight = layer.data(name='weight', type=data_type.dense_vector(10)) +score = layer.data(name='score', type=data_type.dense_vector(1)) + +hidden = layer.fc(input=pixel, + size=100, + act=activation.Sigmoid(), + param_attr=attr.Param(name='hidden')) +inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) +conv = layer.img_conv( + input=pixel, + filter_size=1, + filter_size_y=1, + num_channels=8, + num_filters=16, + act=activation.Linear()) + + +class ImageLayerTest(unittest.TestCase): + def test_conv_layer(self): + conv_shift = layer.conv_shift(a=pixel, b=score) + print layer.parse_network(conv, conv_shift) + + def test_pooling_layer(self): + maxpool = layer.img_pool( + input=conv, + pool_size=2, + num_channels=16, + padding=1, + pool_type=pooling.Max()) + spp = layer.spp(input=conv, + pyramid_height=2, + num_channels=16, + pool_type=pooling.Max()) + maxout = layer.maxout(input=conv, num_channels=16, groups=4) + print layer.parse_network(maxpool, spp, maxout) + + def test_norm_layer(self): + norm1 = layer.img_cmrnorm(input=conv, size=5) + norm2 = layer.batch_norm(input=conv) + norm3 = layer.sum_to_one_norm(input=conv) + print layer.parse_network(norm1, norm2, norm3) + + +class AggregateLayerTest(unittest.TestCase): + def test_aggregate_layer(self): + pool = layer.pool( + input=pixel, + pooling_type=pooling.Avg(), + agg_level=layer.AggregateLevel.EACH_SEQUENCE) + last_seq = layer.last_seq(input=pixel) + first_seq = layer.first_seq(input=pixel) + concat = layer.concat(input=[last_seq, first_seq]) + seq_concat = layer.seq_concat(a=last_seq, b=first_seq) + print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat) + + +class MathLayerTest(unittest.TestCase): + def test_math_layer(self): + addto = layer.addto(input=[pixel, pixel]) + linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10) + interpolation = layer.interpolation( + input=[hidden, hidden], weight=score) + bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) + power = layer.power(input=pixel, weight=score) + scaling = layer.scaling(input=pixel, weight=score) + slope = layer.slope_intercept(input=pixel) + tensor = layer.tensor(a=pixel, b=pixel, size=1000) + cos_sim = layer.cos_sim(a=pixel, b=pixel) + trans = layer.trans(input=tensor) + print layer.parse_network(addto, linear_comb, interpolation, power, + scaling, slope, tensor, cos_sim, trans) + + +class ReshapeLayerTest(unittest.TestCase): + def test_reshape_layer(self): + block_expand = layer.block_expand( + input=conv, num_channels=4, stride_x=1, block_x=1) + expand = layer.expand( + input=weight, + expand_as=pixel, + expand_level=layer.ExpandLevel.FROM_TIMESTEP) + repeat = layer.repeat(input=pixel, num_repeats=4) + reshape = layer.seq_reshape(input=pixel, reshape_size=4) + rotate = layer.rotate(input=pixel, height=16, width=49) + print layer.parse_network(block_expand, expand, repeat, reshape, rotate) + + +class RecurrentLayerTest(unittest.TestCase): + def test_recurrent_layer(self): + word = layer.data(name='word', type=data_type.integer_value(12)) + recurrent = layer.recurrent(input=word) + lstm = layer.lstmemory(input=word) + gru = layer.grumemory(input=word) + print layer.parse_network(recurrent, lstm, gru) + + +class CostLayerTest(unittest.TestCase): + def test_cost_layer(self): + cost1 = layer.classification_cost(input=inference, label=label) + cost2 = layer.classification_cost( + input=inference, label=label, weight=weight) + cost3 = layer.cross_entropy_cost(input=inference, label=label) + cost4 = layer.cross_entropy_with_selfnorm_cost( + input=inference, label=label) + cost5 = layer.regression_cost(input=inference, label=label) + cost6 = layer.regression_cost( + input=inference, label=label, weight=weight) + cost7 = layer.multi_binary_label_cross_entropy_cost( + input=inference, label=label) + cost8 = layer.rank_cost(left=score, right=score, label=score) + cost9 = layer.lambda_cost(input=inference, score=score) + cost10 = layer.sum_cost(input=inference) + cost11 = layer.huber_cost(input=score, label=label) + + print layer.parse_network(cost1, cost2) + print layer.parse_network(cost3, cost4) + print layer.parse_network(cost5, cost6) + print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + + crf = layer.crf(input=inference, label=label) + crf_decoding = layer.crf_decoding(input=inference, size=3) + ctc = layer.ctc(input=inference, label=label) + warp_ctc = layer.warp_ctc(input=pixel, label=label) + nce = layer.nce(input=inference, label=label, num_classes=3) + hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3) + + print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce, + hsigmoid) + + +class OtherLayerTest(unittest.TestCase): + def test_sampling_layer(self): + maxid = layer.max_id(input=inference) + sampling_id = layer.sampling_id(input=inference) + eos = layer.eos(input=maxid, eos_id=5) + print layer.parse_network(maxid, sampling_id, eos) + + def test_slicing_joining_layer(self): + pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) + print layer.parse_network(pad) + + +class ProjOpTest(unittest.TestCase): + def test_projection(self): + input = layer.data(name='data', type=data_type.dense_vector(784)) + word = layer.data( + name='word', type=data_type.integer_value_sequence(10000)) + fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid()) + mixed0 = layer.mixed( + size=256, + input=[ + layer.full_matrix_projection(input=fc0), + layer.full_matrix_projection(input=fc1) + ]) + with layer.mixed(size=200) as mixed1: + mixed1 += layer.full_matrix_projection(input=fc0) + mixed1 += layer.identity_projection(input=fc1) + + table = layer.table_projection(input=word) + emb0 = layer.mixed(size=512, input=table) + with layer.mixed(size=512) as emb1: + emb1 += table + + scale = layer.scaling_projection(input=fc0) + scale0 = layer.mixed(size=100, input=scale) + with layer.mixed(size=100) as scale1: + scale1 += scale + + dotmul = layer.dotmul_projection(input=fc0) + dotmul0 = layer.mixed(size=100, input=dotmul) + with layer.mixed(size=100) as dotmul1: + dotmul1 += dotmul + + context = layer.context_projection(input=fc0, context_len=5) + context0 = layer.mixed(size=100, input=context) + with layer.mixed(size=100) as context1: + context1 += context + + conv = layer.conv_projection( + input=input, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv, bias_attr=True) + with layer.mixed(bias_attr=True) as conv1: + conv1 += conv + + print layer.parse_network(mixed0) + print layer.parse_network(mixed1) + print layer.parse_network(emb0) + print layer.parse_network(emb1) + print layer.parse_network(scale0) + print layer.parse_network(scale1) + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + + def test_operator(self): + ipt0 = layer.data(name='data', type=data_type.dense_vector(784)) + ipt1 = layer.data(name='word', type=data_type.dense_vector(128)) + fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) + fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid()) + + dotmul_op = layer.dotmul_operator(a=fc0, b=fc1) + dotmul0 = layer.mixed(input=dotmul_op) + with layer.mixed() as dotmul1: + dotmul1 += dotmul_op + + conv = layer.conv_operator( + img=ipt0, + filter=ipt1, + filter_size=1, + num_channels=1, + num_filters=128, + stride=1, + padding=0) + conv0 = layer.mixed(input=conv) + with layer.mixed() as conv1: + conv1 += conv + + print layer.parse_network(dotmul0) + print layer.parse_network(dotmul1) + print layer.parse_network(conv0) + print layer.parse_network(conv1) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 969aa6e0e059350bed110ac7cd9a8af654d91dc5..96a3ee4fd446c11ad2f3506db3b13d8369590323 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -3,6 +3,7 @@ import collections import py_paddle.swig_paddle as api from py_paddle import DataProviderConverter +from data_feeder import DataFeeder from . import event as v2_event from . import optimizer as v2_optimizer from . import parameters as v2_parameters @@ -68,7 +69,8 @@ class SGD(ITrainer): test_data_reader=None, event_handler=None, batch_size=32, - data_types=None): + data_types=None, + reader_dict=None): """ Training method. Will train num_passes of input data. @@ -96,22 +98,37 @@ class SGD(ITrainer): self.__optimizer__.enable_types()) assert isinstance(gm, api.GradientMachine) parameters.append_gradient_machine(gm) - + gm.randParameters() updater = self.__optimizer__.create_local_updater() updater.init(gm) gm.start() + batch_evaluator = gm.makeEvaluator() + assert isinstance(batch_evaluator, api.Evaluator) + pass_evaluator = gm.makeEvaluator() + assert isinstance(pass_evaluator, api.Evaluator) out_args = api.Arguments.createArguments(0) + data_types_lists = [data_type[1] for data_type in topology.data_type()] converter = DataProviderConverter(input_types=data_types_lists) + feeder = DataFeeder(data_types, reader_dict) + for pass_id in xrange(num_passes): + event_handler(v2_event.BeginPass(pass_id)) + pass_evaluator.start() updater.startPass() for batch_id, data_batch in enumerate( __data_reader_to_batch__(train_data_reader, batch_size, topology)): + batch_evaluator.start() + event_handler( + v2_event.BeginIteration( + pass_id=pass_id, batch_id=batch_id)) pass_type = updater.startBatch(len(data_batch)) - gm.forwardBackward(converter(data_batch), out_args, pass_type) + gm.forwardBackward(feeder(data_batch), out_args, pass_type) + gm.eval(pass_evaluator) + gm.eval(batch_evaluator) for each_param in gm.getParameters(): updater.update(each_param) # Get cost. We use numpy to calculate total cost for this batch. @@ -119,11 +136,17 @@ class SGD(ITrainer): cost_vec = cost_vec.copyToNumpyMat() cost = cost_vec.sum() / len(data_batch) updater.finishBatch(cost) + batch_evaluator.finish() event_handler( v2_event.EndIteration( - pass_id=pass_id, batch_id=batch_id, cost=cost)) + pass_id=pass_id, + batch_id=batch_id, + cost=cost, + evaluator=batch_evaluator)) updater.finishPass() + pass_evaluator.finish() + event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) gm.finish()