提交 c49644a4 编写于 作者: Q qiaolongfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into topology

...@@ -126,7 +126,7 @@ class ImageClassifier(): ...@@ -126,7 +126,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops. # For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number), # If not, the shape of output[name]: (1, class_number),
# the mean is also applicable. # the mean is also applicable.
return output[output_layer].mean(0) return output[output_layer]['value'].mean(0)
def predict(self, image=None, output_layer=None): def predict(self, image=None, output_layer=None):
assert isinstance(image, basestring) assert isinstance(image, basestring)
......
import numpy
import paddle.v2 as paddle import paddle.v2 as paddle
import mnist_util import mnist_util
...@@ -27,19 +26,14 @@ def main(): ...@@ -27,19 +26,14 @@ def main():
cost = paddle.layer.classification_cost(input=inference, label=label) cost = paddle.layer.classification_cost(input=inference, label=label)
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
for param_name in parameters.keys():
array = parameters.get(param_name)
array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape)
parameters.set(parameter_name=param_name, value=array)
adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01)
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
para = parameters.get('___fc_2__.w0') if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % ( print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, para.mean()) event.pass_id, event.batch_id, event.cost, event.metrics)
else: else:
pass pass
......
...@@ -156,7 +156,7 @@ class ImageClassifier(): ...@@ -156,7 +156,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops. # For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number), # If not, the shape of output[name]: (1, class_number),
# the mean is also applicable. # the mean is also applicable.
res[name] = output[name].mean(0) res[name] = output[name]['value'].mean(0)
return res return res
......
...@@ -139,24 +139,12 @@ lstmemory ...@@ -139,24 +139,12 @@ lstmemory
:members: lstmemory :members: lstmemory
:noindex: :noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
grumemory grumemory
--------- ---------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
:members: grumemory :members: grumemory
:noindex: :noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
Recurrent Layer Group Recurrent Layer Group
===================== =====================
...@@ -172,6 +160,18 @@ recurrent_group ...@@ -172,6 +160,18 @@ recurrent_group
:members: recurrent_group :members: recurrent_group
:noindex: :noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
beam_search beam_search
------------ ------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
...@@ -308,6 +308,12 @@ repeat_layer ...@@ -308,6 +308,12 @@ repeat_layer
:members: repeat_layer :members: repeat_layer
:noindex: :noindex:
rotate_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: rotate_layer
:noindex:
seq_reshape_layer seq_reshape_layer
----------------- -----------------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
...@@ -462,6 +468,12 @@ ctc_layer ...@@ -462,6 +468,12 @@ ctc_layer
:members: ctc_layer :members: ctc_layer
:noindex: :noindex:
warp_ctc_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: warp_ctc_layer
:noindex:
nce_layer nce_layer
----------- -----------
.. automodule:: paddle.trainer_config_helpers.layers .. automodule:: paddle.trainer_config_helpers.layers
......
...@@ -38,6 +38,13 @@ Arguments* Arguments::createByPaddleArgumentVector(void* ptr) { ...@@ -38,6 +38,13 @@ Arguments* Arguments::createByPaddleArgumentVector(void* ptr) {
return args; return args;
} }
Arguments* Arguments::createByPaddleArgument(const void* ptr) {
auto p = (paddle::Argument*)(ptr);
auto args = new Arguments();
args->m->outputs.push_back(*p);
return args;
}
Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) { Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) {
auto& a = m->getArg(idx); auto& a = m->getArg(idx);
return Matrix::createByPaddleMatrixPtr(&a.value); return Matrix::createByPaddleMatrixPtr(&a.value);
......
...@@ -27,3 +27,18 @@ std::string Evaluator::toString() { ...@@ -27,3 +27,18 @@ std::string Evaluator::toString() {
m->rawPtr->printStats(sout); m->rawPtr->printStats(sout);
return sout.str(); return sout.str();
} }
std::vector<std::string> Evaluator::getNames() const {
std::vector<std::string> retv;
m->rawPtr->getNames(&retv);
return retv;
}
double Evaluator::getValue(const std::string name) const {
paddle::Error err;
double v = m->rawPtr->getValue(name, &err);
if (err) {
throw std::runtime_error(err.msg());
}
return v;
}
...@@ -144,12 +144,12 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { ...@@ -144,12 +144,12 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
void GradientMachine::randParameters() { m->machine->randParameters(); } void GradientMachine::randParameters() { m->machine->randParameters(); }
Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const
throw(UnsupportError) { throw(UnsupportError) {
auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>(m->machine); auto nn = m->machine;
if (nn) { if (nn) {
auto mat = nn->getLayerOutput(layerName); auto arg = nn->getLayerOutput(layerName);
return Matrix::createByPaddleMatrixPtr(&mat); return Arguments::createByPaddleArgument(&arg);
} else { } else {
throw UnsupportError(); throw UnsupportError();
} }
......
...@@ -454,6 +454,7 @@ public: ...@@ -454,6 +454,7 @@ public:
private: private:
static Arguments* createByPaddleArgumentVector(void* ptr); static Arguments* createByPaddleArgumentVector(void* ptr);
static Arguments* createByPaddleArgument(const void* ptr);
void* getInternalArgumentsPtr() const; void* getInternalArgumentsPtr() const;
private: private:
...@@ -769,7 +770,7 @@ public: ...@@ -769,7 +770,7 @@ public:
void randParameters(); void randParameters();
Matrix* getLayerOutput(const std::string& layerName) const Arguments* getLayerOutput(const std::string& layerName) const
throw(UnsupportError); throw(UnsupportError);
/** /**
...@@ -900,6 +901,10 @@ public: ...@@ -900,6 +901,10 @@ public:
*/ */
std::string toString(); std::string toString();
std::vector<std::string> getNames() const;
double getValue(const std::string name) const;
private: private:
EvaluatorPrivate* m; EvaluatorPrivate* m;
...@@ -952,7 +957,7 @@ public: ...@@ -952,7 +957,7 @@ public:
Arguments* getForwardOutput(); Arguments* getForwardOutput();
Matrix* getLayerOutput(const std::string& layerName); Arguments* getLayerOutput(const std::string& layerName) const;
}; };
/// the N-Best results generated from one input sequence. /// the N-Best results generated from one input sequence.
......
...@@ -131,12 +131,11 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { ...@@ -131,12 +131,11 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) {
void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); }
void Trainer::finishTestPeriod() { m->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); }
Matrix* Trainer::getLayerOutput(const std::string& layerName) { Arguments* Trainer::getLayerOutput(const std::string& layerName) const {
auto nn = std::dynamic_pointer_cast<paddle::NeuralNetwork>( auto nn = this->m->getGradientMachine();
this->m->getGradientMachine());
CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork";
auto m = nn->getLayerOutput(layerName); auto arg = nn->getLayerOutput(layerName);
return Matrix::createByPaddleMatrixPtr(&m); return Arguments::createByPaddleArgument(&arg);
} }
void Trainer::forwardOneBatch(size_t batchSize) { void Trainer::forwardOneBatch(size_t batchSize) {
......
...@@ -89,9 +89,14 @@ def main(): ...@@ -89,9 +89,14 @@ def main():
except Exception as e: except Exception as e:
print e print e
ev = m.makeEvaluator()
ev.start()
m.forwardBackward(inArgs, outArgs, swig_paddle.PASS_TRAIN, m.forwardBackward(inArgs, outArgs, swig_paddle.PASS_TRAIN,
update_callback) update_callback)
m.eval(ev)
ev.finish()
for name in ev.getNames():
print name, ev.getValue(name)
for optimizer in optimizers: for optimizer in optimizers:
optimizer.finishBatch() optimizer.finishBatch()
......
...@@ -134,6 +134,10 @@ public: ...@@ -134,6 +134,10 @@ public:
backward(callback); backward(callback);
} }
virtual Argument getLayerOutput(const std::string& layerName) {
return *((Argument*)nullptr);
}
// see comment in Layer.h for the function with the same name // see comment in Layer.h for the function with the same name
virtual void resetState() {} virtual void resetState() {}
......
...@@ -282,6 +282,18 @@ void MultiGradientMachine::forwardBackward(const std::vector<Argument>& inArgs, ...@@ -282,6 +282,18 @@ void MultiGradientMachine::forwardBackward(const std::vector<Argument>& inArgs,
backwardImp(callback); backwardImp(callback);
} }
Argument MultiGradientMachine::getLayerOutput(const std::string& layerName) {
std::vector<Argument> args;
args.reserve(threads_.size());
for (auto& thread : threads_) {
args.push_back(thread->getGradientMachine()->getLayerOutput(layerName));
}
outLayerArgs_.concat(args, false /* use_gpu */, outArgStream_, passType_);
return outLayerArgs_;
}
void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { void MultiGradientMachine::backwardImp(const UpdateCallback& callback) {
for (size_t i = 0; i < parameters_.size(); i++) { for (size_t i = 0; i < parameters_.size(); i++) {
if (!parameters_[i]->useGpu() || parameters_[i]->isStatic()) continue; if (!parameters_[i]->useGpu() || parameters_[i]->isStatic()) continue;
......
...@@ -189,6 +189,8 @@ public: ...@@ -189,6 +189,8 @@ public:
PassType passType, PassType passType,
const UpdateCallback& callback); const UpdateCallback& callback);
virtual Argument getLayerOutput(const std::string& layerName);
virtual void onPassEnd(); virtual void onPassEnd();
virtual void finish(); virtual void finish();
...@@ -314,6 +316,8 @@ protected: ...@@ -314,6 +316,8 @@ protected:
std::vector<Argument> outArgs_; std::vector<Argument> outArgs_;
hl_stream_t outArgStream_; hl_stream_t outArgStream_;
Argument outLayerArgs_;
/// ParameterType which needs to be merged from each GPU /// ParameterType which needs to be merged from each GPU
std::vector<ParameterType> mergeTypes_; std::vector<ParameterType> mergeTypes_;
int numDevices_; /* number of gpu devices */ int numDevices_; /* number of gpu devices */
......
...@@ -293,11 +293,10 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { ...@@ -293,11 +293,10 @@ void NeuralNetwork::backward(const UpdateCallback& callback) {
} }
} }
MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) { Argument NeuralNetwork::getLayerOutput(const std::string& layerName) {
auto it = layerMap_.find(layerName); return getLayer(layerName)->getOutput();
CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName;
return it->second->getOutputValue();
} }
void NeuralNetwork::onPassEnd() { void NeuralNetwork::onPassEnd() {
for (auto& layer : layers_) { for (auto& layer : layers_) {
layer->onPassEnd(); layer->onPassEnd();
......
...@@ -87,7 +87,8 @@ public: ...@@ -87,7 +87,8 @@ public:
virtual void backward(const UpdateCallback& callback = nullptr); virtual void backward(const UpdateCallback& callback = nullptr);
MatrixPtr getLayerOutput(const std::string& layerName); virtual Argument getLayerOutput(const std::string& layerName);
const LayerPtr& getLayer(const std::string& layerName) const { const LayerPtr& getLayer(const std::string& layerName) const {
auto it = layerMap_.find(layerName); auto it = layerMap_.find(layerName);
CHECK(it != layerMap_.end()) << "Unknown layer " << layerName; CHECK(it != layerMap_.end()) << "Unknown layer " << layerName;
......
...@@ -42,7 +42,7 @@ void CosSimLayer::forward(PassType passType) { ...@@ -42,7 +42,7 @@ void CosSimLayer::forward(PassType passType) {
/* malloc memory for the output_ if necessary */ /* malloc memory for the output_ if necessary */
int batchSize = getInputValue(0)->getHeight(); int batchSize = getInputValue(0)->getHeight();
int size = getSize(); int size = getSize();
CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed";
{ {
REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str());
...@@ -68,7 +68,7 @@ void CosSimLayer::forward(PassType passType) { ...@@ -68,7 +68,7 @@ void CosSimLayer::forward(PassType passType) {
void CosSimLayer::backward(const UpdateCallback& callback) { void CosSimLayer::backward(const UpdateCallback& callback) {
/* activation */ { /* activation */ {
REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str());
CHECK_EQ(backward_.size(), 1) << "Only one backward function needed"; CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed";
const auto outG = this->getOutputGrad(); const auto outG = this->getOutputGrad();
const auto outV = this->getOutputValue(); const auto outV = this->getOutputValue();
......
...@@ -112,7 +112,7 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap, ...@@ -112,7 +112,7 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap,
void CosSimVecMatLayer::forward(PassType passType) { void CosSimVecMatLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed";
MatrixPtr inV0 = getInputValue(0); MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1); MatrixPtr inV1 = getInputValue(1);
...@@ -145,7 +145,7 @@ void CosSimVecMatLayer::forward(PassType passType) { ...@@ -145,7 +145,7 @@ void CosSimVecMatLayer::forward(PassType passType) {
} }
void CosSimVecMatLayer::backward(const UpdateCallback& callback) { void CosSimVecMatLayer::backward(const UpdateCallback& callback) {
CHECK_EQ(backward_.size(), 1) << "Only one forward function needed"; CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed";
MatrixPtr inV0 = getInputValue(0); MatrixPtr inV0 = getInputValue(0);
MatrixPtr inV1 = getInputValue(1); MatrixPtr inV1 = getInputValue(1);
......
...@@ -17,10 +17,10 @@ limitations under the License. */ ...@@ -17,10 +17,10 @@ limitations under the License. */
TEST(RowBuffer, testAutoGrow) { TEST(RowBuffer, testAutoGrow) {
paddle::RowBuffer buf(128); paddle::RowBuffer buf(128);
ASSERT_EQ(128, buf.getWidth()); ASSERT_EQ(128UL, buf.getWidth());
ASSERT_TRUE(buf.isAutoGrowth()); ASSERT_TRUE(buf.isAutoGrowth());
buf.resize(2); buf.resize(2);
ASSERT_EQ(2, buf.getRowCount()); ASSERT_EQ(2UL, buf.getRowCount());
for (size_t i = 0; i < buf.getWidth() * 2; ++i) { for (size_t i = 0; i < buf.getWidth() * 2; ++i) {
buf.data()[i] = i; buf.data()[i] = i;
} }
...@@ -35,7 +35,7 @@ TEST(RowBuffer, testAutoGrow) { ...@@ -35,7 +35,7 @@ TEST(RowBuffer, testAutoGrow) {
data[i] = i; data[i] = i;
} }
ASSERT_EQ(3, buf.getRowCount()); ASSERT_EQ(3UL, buf.getRowCount());
for (size_t i = 0; i < buf.getRowCount() - 1; ++i) { for (size_t i = 0; i < buf.getRowCount() - 1; ++i) {
for (size_t j = 0; j < buf.getWidth(); ++j) { for (size_t j = 0; j < buf.getWidth(); ++j) {
ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5); ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5);
...@@ -51,7 +51,7 @@ TEST(RowBuffer, testWithMemBuf) { ...@@ -51,7 +51,7 @@ TEST(RowBuffer, testWithMemBuf) {
std::make_shared<paddle::CpuMemoryHandle>(128 * 2 * sizeof(real)); std::make_shared<paddle::CpuMemoryHandle>(128 * 2 * sizeof(real));
paddle::RowBuffer buf(mem, 128); paddle::RowBuffer buf(mem, 128);
ASSERT_TRUE(!buf.isAutoGrowth()); ASSERT_TRUE(!buf.isAutoGrowth());
ASSERT_EQ(2, buf.getRowCount()); ASSERT_EQ(2UL, buf.getRowCount());
for (size_t i = 0; i < buf.getWidth() * 2; ++i) { for (size_t i = 0; i < buf.getWidth() * 2; ++i) {
buf.data()[i] = i; buf.data()[i] = i;
} }
......
...@@ -23,7 +23,8 @@ __all__ = ['DataProviderConverter'] ...@@ -23,7 +23,8 @@ __all__ = ['DataProviderConverter']
class IScanner(object): class IScanner(object):
def __init__(self, input_type, pos): def __init__(self, input_type, pos):
self.input_type = input_type self.input_type = input_type
assert isinstance(self.input_type, dp2.InputType) if not isinstance(self.input_type, dp2.InputType):
raise ValueError("input type should be dataprovider2.InputType")
self.pos = pos self.pos = pos
def scan(self, dat): def scan(self, dat):
...@@ -50,7 +51,6 @@ class DenseScanner(IScanner): ...@@ -50,7 +51,6 @@ class DenseScanner(IScanner):
def finish_scan(self, argument): def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments) assert isinstance(argument, swig_paddle.Arguments)
assert isinstance(self.input_type, dp2.InputType)
if self.__mat__.dtype != numpy.float32: if self.__mat__.dtype != numpy.float32:
self.__mat__ = self.__mat__.astype(numpy.float32) self.__mat__ = self.__mat__.astype(numpy.float32)
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False) m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False)
...@@ -63,7 +63,6 @@ class SparseBinaryScanner(IScanner): ...@@ -63,7 +63,6 @@ class SparseBinaryScanner(IScanner):
self.__rows__ = [0] self.__rows__ = [0]
self.__cols__ = [] self.__cols__ = []
self.__height__ = 0 self.__height__ = 0
self.__nnz__ = 0
self.__value__ = [] self.__value__ = []
def scan(self, dat): def scan(self, dat):
...@@ -76,7 +75,6 @@ class SparseBinaryScanner(IScanner): ...@@ -76,7 +75,6 @@ class SparseBinaryScanner(IScanner):
def finish_scan(self, argument): def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments) assert isinstance(argument, swig_paddle.Arguments)
assert isinstance(self.input_type, dp2.InputType)
m = swig_paddle.Matrix.createSparse(self.__height__, m = swig_paddle.Matrix.createSparse(self.__height__,
self.input_type.dim, self.input_type.dim,
len(self.__cols__), len(self.__cols__),
......
...@@ -208,7 +208,7 @@ def __monkeypatch_gradient_machine__(): ...@@ -208,7 +208,7 @@ def __monkeypatch_gradient_machine__():
output = dict() output = dict()
for name in layerNames: for name in layerNames:
output[name] = __matrix_to_numpy__(self.getLayerOutput(name)) output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name))
return output return output
swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs
......
...@@ -10,28 +10,30 @@ RUN apt-get update && \ ...@@ -10,28 +10,30 @@ RUN apt-get update && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ apt-get install -y automake && \
apt-get clean -y apt-get clean -y
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install -U protobuf && \ pip install -U "protobuf==3.1.0" && \
pip install -U wheel pillow BeautifulSoup && \ pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \ pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter pip install -U sphinx_rtd_theme recommonmark jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1 cd .. && rm -rf cmake-3.4.1
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL ARG BUILD_AND_INSTALL
ARG WITH_AVX ARG WITH_AVX
ARG WITH_DOC ARG WITH_DOC
ARG WITH_STYLE_CHECK ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=OFF ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-ON} ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle RUN mkdir /paddle
......
...@@ -10,28 +10,30 @@ RUN apt-get update && \ ...@@ -10,28 +10,30 @@ RUN apt-get update && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ apt-get install -y automake && \
apt-get clean -y apt-get clean -y
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install -U protobuf && \ pip install -U "protobuf==3.1.0" && \
pip install -U wheel pillow BeautifulSoup && \ pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \ pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter pip install -U sphinx_rtd_theme recommonmark jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1 cd .. && rm -rf cmake-3.4.1
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL ARG BUILD_AND_INSTALL
ARG WITH_AVX ARG WITH_AVX
ARG WITH_DOC ARG WITH_DOC
ARG WITH_STYLE_CHECK ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=ON ENV WITH_GPU=ON
ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-ON} ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle RUN mkdir /paddle
......
...@@ -11,7 +11,7 @@ set -e ...@@ -11,7 +11,7 @@ set -e
# If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied # If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied
# source tree to /paddle, and this scripts should build it into # source tree to /paddle, and this scripts should build it into
# /paddle/build. # /paddle/build.
if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so
fi fi
...@@ -19,7 +19,7 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then ...@@ -19,7 +19,7 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then
mkdir -p /paddle/build # -p means no error if exists mkdir -p /paddle/build # -p means no error if exists
cd /paddle/build cd /paddle/build
cmake .. \ cmake .. \
-DWITH_DOC=ON \ -DWITH_DOC=${WITH_DOC:-OFF} \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_SWIG_PY=ON \ -DWITH_SWIG_PY=ON \
...@@ -29,28 +29,32 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then ...@@ -29,28 +29,32 @@ if [[ ${BUILD_AND_INSTALL:-ON} == 'ON' ]]; then
make -j `nproc` make -j `nproc`
make install make install
# Install woboq_codebrowser. if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then
git clone https://github.com/woboq/woboq_codebrowser /woboq apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev
cd /woboq # Install woboq_codebrowser.
cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ git clone https://github.com/woboq/woboq_codebrowser /woboq
-DCMAKE_BUILD_TYPE=Release \ cd /woboq
. cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \
make -DCMAKE_BUILD_TYPE=Release \
.
export WOBOQ_OUT=/usr/share/nginx/html/paddle make
export BUILD_DIR=/paddle/build
mkdir -p $WOBOQ_OUT export WOBOQ_OUT=/usr/share/nginx/html/paddle
cp -rv /woboq/data $WOBOQ_OUT/../data export BUILD_DIR=/paddle/build
/woboq/generator/codebrowser_generator \ mkdir -p $WOBOQ_OUT
-b /paddle/build \ cp -rv /woboq/data $WOBOQ_OUT/../data
-a \ /woboq/generator/codebrowser_generator \
-o $WOBOQ_OUT \ -b /paddle/build \
-p paddle:/paddle -a \
/woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT -o $WOBOQ_OUT \
cd /woboq -p paddle:/paddle
make clean /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT
cd /woboq
pip install /usr/local/opt/paddle/share/wheels/*.whl make clean
fi
pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl
pip install /usr/local/opt/paddle/share/wheels/paddle*.whl
paddle version paddle version
fi fi
......
...@@ -25,6 +25,7 @@ add_custom_target(paddle_python ALL DEPENDS ...@@ -25,6 +25,7 @@ add_custom_target(paddle_python ALL DEPENDS
add_subdirectory(paddle/trainer_config_helpers/tests) add_subdirectory(paddle/trainer_config_helpers/tests)
add_subdirectory(paddle/reader/tests) add_subdirectory(paddle/reader/tests)
add_subdirectory(paddle/v2/tests)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/
DESTINATION opt/paddle/share/wheels DESTINATION opt/paddle/share/wheels
......
...@@ -21,3 +21,5 @@ ...@@ -21,3 +21,5 @@
# #
# r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt")) # r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt"))
from decorator import * from decorator import *
import creator
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['np_array', 'text_file']
def np_array(x):
"""
Creates a reader that yields elements of x, if it is a
numpy vector. Or rows of x, if it is a numpy matrix.
Or any sub-hyperplane indexed by the highest dimension.
:param x: the numpy array to create reader from.
:returns: data reader created from x.
"""
def reader():
if x.ndim < 1:
yield x
for e in x:
yield e
return reader
def text_file(path):
"""
Creates a data reader that outputs text line by line from given text file.
Trailing new line ('\n') of each line will be removed.
:path: path of the text file.
:returns: data reader of text file
"""
def reader():
f = open(path, "r")
for l in f:
yield l.rstrip('\n')
f.close()
return reader
...@@ -2,3 +2,8 @@ add_test(NAME reader_decorator_test ...@@ -2,3 +2,8 @@ add_test(NAME reader_decorator_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/decorator_test.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/decorator_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
add_test(NAME reader_creator_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/creator_test.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.reader.creator
import numpy as np
import os
class TestNumpyArray(unittest.TestCase):
def test_numpy_array(self):
l = [[1, 2, 3], [4, 5, 6]]
x = np.array(l, np.int32)
reader = paddle.reader.creator.np_array(x)
for idx, e in enumerate(reader()):
self.assertItemsEqual(e, l[idx])
class TestTextFile(unittest.TestCase):
def test_text_file(self):
path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt")
reader = paddle.reader.creator.text_file(path)
for idx, e in enumerate(reader()):
self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1))
if __name__ == '__main__':
unittest.main()
...@@ -112,6 +112,8 @@ __all__ = [ ...@@ -112,6 +112,8 @@ __all__ = [
'priorbox_layer', 'priorbox_layer',
'spp_layer', 'spp_layer',
'pad_layer', 'pad_layer',
'eos_layer',
'layer_support',
] ]
...@@ -708,6 +710,7 @@ class MixedLayerType(LayerOutput): ...@@ -708,6 +710,7 @@ class MixedLayerType(LayerOutput):
# update the size which might be computed inside MixedLayer # update the size which might be computed inside MixedLayer
# according to the operator's output size # according to the operator's output size
self.size = ml.config.size self.size = ml.config.size
self.finalized = True
@wrap_name_default("mixed") @wrap_name_default("mixed")
...@@ -1287,6 +1290,12 @@ def last_seq(input, ...@@ -1287,6 +1290,12 @@ def last_seq(input,
""" """
Get Last Timestamp Activation of a sequence. Get Last Timestamp Activation of a sequence.
The simple usage is:
.. code-block:: python
seq = last_seq(input=layer)
:param agg_level: Aggregated level :param agg_level: Aggregated level
:param name: Layer name. :param name: Layer name.
:type name: basestring :type name: basestring
...@@ -1325,6 +1334,12 @@ def first_seq(input, ...@@ -1325,6 +1334,12 @@ def first_seq(input,
""" """
Get First Timestamp Activation of a sequence. Get First Timestamp Activation of a sequence.
The simple usage is:
.. code-block:: python
seq = first_seq(input=layer)
:param agg_level: aggregation level :param agg_level: aggregation level
:param name: Layer name. :param name: Layer name.
:type name: basestring :type name: basestring
...@@ -1425,7 +1440,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): ...@@ -1425,7 +1440,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
.. code-block:: python .. code-block:: python
expand = repeat_layer(layer, 4) expand = repeat_layer(input=layer, num_repeats=4)
:param input: Input layer :param input: Input layer
:type input: LayerOutput :type input: LayerOutput
...@@ -1797,6 +1812,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): ...@@ -1797,6 +1812,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
Note that the above computation is for one sample. Multiple samples are Note that the above computation is for one sample. Multiple samples are
processed in one batch. processed in one batch.
The example usage is:
.. code-block:: python
cos = cos_sim(a=layer1, b=layer2, size=3)
:param name: layer name :param name: layer name
:type name: basestring :type name: basestring
:param a: input layer a :param a: input layer a
...@@ -1958,6 +1979,16 @@ def img_conv_layer(input, ...@@ -1958,6 +1979,16 @@ def img_conv_layer(input,
pieces. First 256/4 = 64 channels will process by first 32 filters. The pieces. First 256/4 = 64 channels will process by first 32 filters. The
rest channels will be processed by rest group of filters. rest channels will be processed by rest group of filters.
The example usage is:
.. code-block:: python
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=16, stride=1,
bias_attr=False,
act=ReluActivation())
:param name: Layer name. :param name: Layer name.
:type name: basestring :type name: basestring
:param input: Layer Input. :param input: Layer Input.
...@@ -2097,6 +2128,34 @@ def img_pool_layer(input, ...@@ -2097,6 +2128,34 @@ def img_pool_layer(input,
.. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
- ceil_mode=True:
.. math::
w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride))
h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
- ceil_mode=False:
.. math::
w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride))
h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
The example usage is:
.. code-block:: python
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=MaxPooling())
:param padding: pooling padding width. :param padding: pooling padding width.
:type padding: int :type padding: int
:param padding_y: pooling padding height. It's equal to padding by default. :param padding_y: pooling padding height. It's equal to padding by default.
...@@ -2123,19 +2182,6 @@ def img_pool_layer(input, ...@@ -2123,19 +2182,6 @@ def img_pool_layer(input,
:param ceil_mode: Wether to use ceil mode to calculate output height and with. :param ceil_mode: Wether to use ceil mode to calculate output height and with.
Defalut is True. If set false, Otherwise use floor. Defalut is True. If set false, Otherwise use floor.
- ceil_mode=True:
.. math::
w = 1 + int(ceil(input_width + 2 * padding - pool_size) / float(stride))
h = 1 + int(ceil(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
- ceil_mode=False:
.. math::
w = 1 + int(floor(input_width + 2 * padding - pool_size) / float(stride))
h = 1 + int(floor(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
:type ceil_mode: bool :type ceil_mode: bool
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -2197,6 +2243,15 @@ def spp_layer(input, ...@@ -2197,6 +2243,15 @@ def spp_layer(input,
The details please refer to The details please refer to
`Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_. `Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
The example usage is:
.. code-block:: python
spp = spp_layer(input=data,
pyramid_height=2,
num_channels=16,
pool_type=MaxPooling())
:param name: layer name. :param name: layer name.
:type name: basestring :type name: basestring
:param input: layer's input. :param input: layer's input.
...@@ -2285,6 +2340,12 @@ def img_cmrnorm_layer(input, ...@@ -2285,6 +2340,12 @@ def img_cmrnorm_layer(input,
The details please refer to The details please refer to
`Alex's paper <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_. `Alex's paper <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_.
The example usage is:
.. code-block:: python
norm = img_cmrnorm_layer(input=net, size=5)
:param name: layer name. :param name: layer name.
:type name: None|basestring :type name: None|basestring
:param input: layer's input. :param input: layer's input.
...@@ -2340,6 +2401,12 @@ def batch_norm_layer(input, ...@@ -2340,6 +2401,12 @@ def batch_norm_layer(input,
The details of batch normalization please refer to this The details of batch normalization please refer to this
`paper <http://arxiv.org/abs/1502.03167>`_. `paper <http://arxiv.org/abs/1502.03167>`_.
The example usage is:
.. code-block:: python
norm = batch_norm_layer(input=net, act=ReluActivation())
:param name: layer name. :param name: layer name.
:type name: basestring :type name: basestring
:param input: batch normalization input. Better be linear activation. :param input: batch normalization input. Better be linear activation.
...@@ -3903,13 +3970,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -3903,13 +3970,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
.. code-block:: python .. code-block:: python
conv_shift = conv_shift_layer(input=[layer1, layer2]) conv_shift = conv_shift_layer(a=layer1, b=layer2)
:param name: layer name :param name: layer name
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: Input layer a.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b :param b: input layer b.
:type b: LayerOutput :type b: LayerOutput
:param layer_attr: layer's extra attribute. :param layer_attr: layer's extra attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -4001,8 +4068,8 @@ def tensor_layer(a, ...@@ -4001,8 +4068,8 @@ def tensor_layer(a,
@wrap_act_default() @wrap_act_default()
@layer_support() @layer_support()
def selective_fc_layer(input, def selective_fc_layer(input,
select,
size, size,
select=None,
act=None, act=None,
name=None, name=None,
pass_generation=False, pass_generation=False,
...@@ -4029,6 +4096,7 @@ def selective_fc_layer(input, ...@@ -4029,6 +4096,7 @@ def selective_fc_layer(input,
:type input: LayerOutput|list|tuple :type input: LayerOutput|list|tuple
:param select: The select layer. The output of select layer should be a :param select: The select layer. The output of select layer should be a
sparse binary matrix, and treat as the mask of selective fc. sparse binary matrix, and treat as the mask of selective fc.
If is None, acts exactly like fc_layer.
:type select: LayerOutput :type select: LayerOutput
:param size: The layer dimension. :param size: The layer dimension.
:type size: int :type size: int
...@@ -4257,7 +4325,7 @@ def block_expand_layer(input, ...@@ -4257,7 +4325,7 @@ def block_expand_layer(input,
.. code-block:: python .. code-block:: python
block_expand = block_expand_layer(input, block_expand = block_expand_layer(input=layer,
num_channels=128, num_channels=128,
stride_x=1, stride_x=1,
stride_y=1, stride_y=1,
...@@ -4461,7 +4529,7 @@ def warp_ctc_layer(input, ...@@ -4461,7 +4529,7 @@ def warp_ctc_layer(input,
- You can set 'blank' to any value ranged in [0, num_classes], which - You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels. should be consistent as that used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library, - As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer. 'linear' activation is expected instead in the 'input' layer.
The simple usage: The simple usage:
...@@ -4594,6 +4662,13 @@ def crf_decoding_layer(input, ...@@ -4594,6 +4662,13 @@ def crf_decoding_layer(input,
this layer will also calculate error. output.value[i] is 1 for incorrect this layer will also calculate error. output.value[i] is 1 for incorrect
decoding or 0 for correct decoding. decoding or 0 for correct decoding.
The simple usage:
.. code-block:: python
crf_decoding = crf_decoding_layer(input=input,
size=label_dim)
:param input: The first input layer. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param size: size of this layer. :param size: size of this layer.
......
...@@ -19,12 +19,14 @@ import trainer ...@@ -19,12 +19,14 @@ import trainer
import event import event
import data_type import data_type
import topology import topology
import data_feeder
import attr import attr
import pooling
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'topology', 'attr' 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'topology'
] ]
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from py_paddle import swig_paddle
from py_paddle import DataProviderConverter
import data_type
__all__ = ['DataFeeder']
class DataFeeder(DataProviderConverter):
"""
DataFeeder converts the data returned by paddle.reader into a data structure
of Arguments which is defined in the API. The paddle.reader usually returns
a list of mini-batch data entries. Each data entry in the list is one sampe.
Each sample is a list or a tuple with one feature or multiple features.
DataFeeder converts this mini-batch data entries into Arguments in order
to feed it to C++ interface.
The example usage:
data_types = [('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
reader_dict = {'image':0, 'label':1}
feeder = DataFeeder(data_types=data_types, reader_dict=reader_dict)
minibatch_data = [
( [1.0,2.0,3.0,4.0], 5, [6,7,8] ), # first sample
( [1.0,2.0,3.0,4.0], 5, [6,7,8] ) # second sample
]
# or minibatch_data = [
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# ]
arg = feeder(minibatch_data)
"""
def __init__(self, data_types, reader_dict):
"""
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type). For example:
[('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
:type data_types: A list of tuple
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type reader_dict: dict()
"""
self.input_names = []
input_types = []
self.reader_dict = reader_dict
for each in data_types:
self.input_names.append(each[0])
assert isinstance(each[1], data_type.InputType)
input_types.append(each[1])
DataProviderConverter.__init__(self, input_types)
def convert(self, dat, argument=None):
"""
:param dat: A list of mini-batch data. Each sample is a list or tuple
one feature or multiple features.
for example:
[
([0.2, 0.2], ), # first sample
([0.8, 0.3], ), # second sample
]
or,
[
[[0.2, 0.2], ], # first sample
[[0.8, 0.3], ], # second sample
]
:type dat: List
:param argument: An Arguments object contains this mini-batch data with
one or multiple features. The Arguments definition is
in the API.
:type argument: swig_paddle.Arguments
"""
def reorder_data(data):
retv = []
for each in data:
reorder = []
for name in self.input_names:
reorder.append(each[self.reader_dict[name]])
retv.append(reorder)
return retv
return DataProviderConverter.convert(self, reorder_data(dat), argument)
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
from paddle.trainer.PyDataProvider2 import \ from paddle.trainer.PyDataProvider2 import \
InputType, dense_vector, sparse_binary_vector,\ InputType, dense_vector, sparse_binary_vector,\
sparse_vector, integer_value, DataType sparse_vector, integer_value, integer_value_sequence
__all__ = [ __all__ = [
'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector', 'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector',
'integer_value', 'DataType' 'integer_value', 'integer_value_sequence'
] ]
import os
__all__ = ['DATA_HOME']
DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set')
if not os.path.exists(DATA_HOME):
os.makedirs(DATA_HOME)
import sklearn.datasets.mldata
import sklearn.model_selection
import numpy
from config import DATA_HOME
__all__ = ['train_creator', 'test_creator']
def __mnist_reader_creator__(data, target):
def reader():
n_samples = data.shape[0]
for i in xrange(n_samples):
yield (data[i] / 255.0).astype(numpy.float32), int(target[i])
return reader
TEST_SIZE = 10000
data = sklearn.datasets.mldata.fetch_mldata(
"MNIST original", data_home=DATA_HOME)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
data.data, data.target, test_size=TEST_SIZE, random_state=0)
def train_creator():
return __mnist_reader_creator__(X_train, y_train)
def test_creator():
return __mnist_reader_creator__(X_test, y_test)
def unittest():
assert len(list(test_creator()())) == TEST_SIZE
if __name__ == '__main__':
unittest()
...@@ -3,8 +3,6 @@ All training events. ...@@ -3,8 +3,6 @@ All training events.
There are: There are:
* BeginTraining
* EndTraining
* BeginIteration * BeginIteration
* EndIteration * EndIteration
* BeginPass * BeginPass
...@@ -12,15 +10,62 @@ There are: ...@@ -12,15 +10,62 @@ There are:
TODO(yuyang18): Complete it! TODO(yuyang18): Complete it!
""" """
__all__ = ['EndIteration'] import py_paddle.swig_paddle as api
__all__ = ['EndIteration', 'BeginIteration', 'BeginPass', 'EndPass']
class EndIteration(object): class WithMetric(object):
def __init__(self, evaluator):
if not isinstance(evaluator, api.Evaluator):
raise TypeError("Evaluator should be api.Evaluator type")
self.__evaluator__ = evaluator
@property
def metrics(self):
names = self.__evaluator__.getNames()
retv = dict()
for each_name in names:
val = self.__evaluator__.getValue(each_name)
retv[each_name] = val
return retv
class BeginPass(object):
"""
Event On One Pass Training Start.
"""
def __init__(self, pass_id):
self.pass_id = pass_id
class EndPass(WithMetric):
"""
Event On One Pass Training Complete.
"""
def __init__(self, pass_id, evaluator):
self.pass_id = pass_id
WithMetric.__init__(self, evaluator)
class BeginIteration(object):
"""
Event On One Batch Training Start.
"""
def __init__(self, pass_id, batch_id):
self.pass_id = pass_id
self.batch_id = batch_id
class EndIteration(WithMetric):
""" """
Event On One Batch Training Complete. Event On One Batch Training Complete.
""" """
def __init__(self, pass_id, batch_id, cost): def __init__(self, pass_id, batch_id, cost, evaluator):
self.pass_id = pass_id self.pass_id = pass_id
self.batch_id = batch_id self.batch_id = batch_id
self.cost = cost self.cost = cost
WithMetric.__init__(self, evaluator)
...@@ -71,19 +71,37 @@ import collections ...@@ -71,19 +71,37 @@ import collections
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
from paddle.trainer_config_helpers.config_parser_utils import \ from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as __parse__ parse_network_config as __parse__
from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.default_decorators import wrap_act_default
from paddle.trainer_config_helpers.default_decorators import wrap_bias_attr_default
from paddle.trainer_config_helpers.layers import layer_support
import data_type import data_type
import activation import activation
import attr import attr
__all__ = [ __all__ = [
'parse_network', 'data', 'fc', 'max_id', 'classification_cost', 'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp',
'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', 'maxout', 'img_cmrnorm', 'batch_norm', 'sum_to_one_norm', 'recurrent',
'lstmemory', 'grumemory', 'pool', 'last_seq', 'first_seq', 'concat',
'seq_concat', 'block_expand', 'expand', 'repeat', 'seq_reshape', 'addto',
'linear_comb', 'interpolation', 'bilinear_interp', 'power', 'scaling',
'slope_intercept', 'tensor', 'cos_sim', 'trans', 'max_id', 'sampling_id',
'pad', 'classification_cost', 'cross_entropy_cost',
'cross_entropy_with_selfnorm_cost', 'regression_cost',
'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost',
'sum_cost', 'huber_cost' 'sum_cost', 'huber_cost', 'crf', 'crf_decoding', 'ctc', 'warp_ctc', 'nce',
'hsigmoid', 'eos'
] ]
__projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps))
__all__ += __projection_names__
__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
__all__ += __operator_names__
def parse_network(*outputs): def parse_network(*outputs):
""" """
...@@ -101,9 +119,8 @@ def parse_network(*outputs): ...@@ -101,9 +119,8 @@ def parse_network(*outputs):
class Layer(object): class Layer(object):
def __init__(self, name, parent_layers): def __init__(self, name=None, parent_layers=None):
assert isinstance(parent_layers, dict) assert isinstance(parent_layers, dict)
assert isinstance(name, basestring)
self.name = name self.name = name
self.__parent_layers__ = parent_layers self.__parent_layers__ = parent_layers
...@@ -122,22 +139,25 @@ class Layer(object): ...@@ -122,22 +139,25 @@ class Layer(object):
self.__parent_layers__[layer_name]) self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer kwargs[layer_name] = v1_layer
if self.name not in context: if self.name is None:
return self.to_proto_impl(**kwargs)
elif self.name not in context:
context[self.name] = self.to_proto_impl(**kwargs) context[self.name] = self.to_proto_impl(**kwargs)
return context[self.name] return context[self.name]
def to_proto_impl(self, **kwargs): def to_proto_impl(self, **kwargs):
raise NotImplementedError() raise NotImplementedError()
def __convert_to_v2__(method_name, name_prefix, parent_names): def __convert_to_v2__(method_name, parent_names, is_default_name=True):
if name_prefix is not None: if is_default_name:
wrapper = wrap_name_default(name_prefix=name_prefix) wrapper = wrap_name_default(name_prefix=method_name)
else: else:
wrapper = None wrapper = None
class V2LayerImpl(Layer): class V2LayerImpl(Layer):
def __init__(self, name=None, **kwargs): def __init__(self, **kwargs):
parent_layers = dict() parent_layers = dict()
other_kwargs = dict() other_kwargs = dict()
for pname in parent_names: for pname in parent_names:
...@@ -148,6 +168,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): ...@@ -148,6 +168,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names):
if key not in parent_names: if key not in parent_names:
other_kwargs[key] = kwargs[key] other_kwargs[key] = kwargs[key]
name = kwargs.get('name', None)
super(V2LayerImpl, self).__init__(name, parent_layers) super(V2LayerImpl, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs self.__other_kwargs__ = other_kwargs
...@@ -160,7 +181,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): ...@@ -160,7 +181,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names):
args[each] = kwargs[each] args[each] = kwargs[each]
for each in self.__other_kwargs__: for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each] args[each] = self.__other_kwargs__[each]
return getattr(conf_helps, method_name)(name=self.name, **args) return getattr(conf_helps, method_name)(**args)
return V2LayerImpl return V2LayerImpl
...@@ -191,69 +212,171 @@ class DataLayerV2(Layer): ...@@ -191,69 +212,171 @@ class DataLayerV2(Layer):
return getattr(conf_helps, self.__method_name__)(name=self.name, **args) return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
LayerV2 = Layer class MixedLayerV2(Layer):
"""
This class is use to support `with` grammar. If not, the following code
could convert mixed_layer simply.
mixed = __convert_to_v2__(
'mixed_layer', name_prefix='mixed', parent_names=['input'])
"""
class AddToSealedMixedLayerExceptionV2(Exception):
pass
def __init__(self,
size=0,
input=None,
name=None,
act=None,
bias_attr=None,
layer_attr=None):
self.__method_name__ = 'mixed_layer'
self.finalized = False
self.__inputs__ = []
if input is not None:
self.__inputs__ = input
other_kwargs = dict()
other_kwargs['name'] = name
other_kwargs['size'] = size
other_kwargs['act'] = act
other_kwargs['bias_attr'] = bias_attr
other_kwargs['layer_attr'] = layer_attr
parent_layers = {"input": self.__inputs__}
super(MixedLayerV2, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs
def __iadd__(self, other):
if not self.finalized:
self.__inputs__.append(other)
return self
else:
raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2()
def __enter__(self):
assert len(self.__inputs__) == 0
return self
def __exit__(self, *args, **kwargs):
self.finalized = True
def to_proto_impl(self, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each]
return getattr(conf_helps, self.__method_name__)(**args)
@wrap_name_default("mixed")
@wrap_act_default(act=activation.Linear())
@wrap_bias_attr_default(has_bias=False)
@layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT)
def mixed(size=0,
name=None,
input=None,
act=None,
bias_attr=False,
layer_attr=None):
return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
data = DataLayerV2 data = DataLayerV2
fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) AggregateLevel = conf_helps.layers.AggregateLevel
max_id = __convert_to_v2__( ExpandLevel = conf_helps.layers.ExpandLevel
'maxid_layer', name_prefix='maxid', parent_names=['input'])
classification_cost = __convert_to_v2__( layer_list = [
'classification_cost', # [V2LayerImpl, V1_method_name, parent_names]
name_prefix='classification_cost', # fully connected layers
parent_names=['input', 'label', 'weight']) ['fc', 'fc_layer', ['input']],
regression_cost = __convert_to_v2__( # conv layers
'regression_cost', ['conv_shift', 'conv_shift_layer', ['a', 'b']],
name_prefix='regression_cost', ['img_conv', 'img_conv_layer', ['input']],
parent_names=['input', 'label', 'weight']) # image pooling layers
cross_entropy_cost = __convert_to_v2__( ['img_pool', 'img_pool_layer', ['input']],
'cross_entropy', ['spp', 'spp_layer', ['input']],
name_prefix='cross_entropy', ['maxout', 'maxout_layer', ['input']],
parent_names=['input', 'label']) # norm layers
cross_entropy_with_selfnorm_cost = __convert_to_v2__( ['img_cmrnorm', 'img_cmrnorm_layer', ['input']],
'cross_entropy_with_selfnorm', ['batch_norm', 'batch_norm_layer', ['input']],
name_prefix='cross_entropy_with_selfnorm', ['sum_to_one_norm', 'sum_to_one_norm_layer', ['input']],
parent_names=['input', 'label']) # recurrent layers
multi_binary_label_cross_entropy_cost = __convert_to_v2__( ['recurrent', 'recurrent_layer', ['input']],
'multi_binary_label_cross_entropy', ['lstmemory', 'lstmemory', ['input']],
name_prefix='multi_binary_label_cross_entropy', ['grumemory', 'grumemory', ['input']],
parent_names=['input', 'label']) # aggregate layers
rank_cost = __convert_to_v2__( ['pool', 'pooling_layer', ['input']],
'rank_cost', ['last_seq', 'last_seq', ['input']],
name_prefix='rank_cost', ['first_seq', 'first_seq', ['input']],
parent_names=['left', 'right', 'label', 'weight']) ['concat', 'concat_layer', ['input']],
lambda_cost = __convert_to_v2__( ['seq_concat', 'seq_concat_layer', ['a', 'b']],
'lambda_cost', name_prefix='lambda_cost', parent_names=['input', 'score']) # reshaping layers
sum_cost = __convert_to_v2__( ['block_expand', 'block_expand_layer', ['input']],
'sum_cost', name_prefix='sum_cost', parent_names=['input']) ['expand', 'expand_layer', ['input', 'expand_as']],
huber_cost = __convert_to_v2__( ['repeat', 'repeat_layer', ['input']],
'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) ['rotate', 'rotate_layer', ['input']],
['seq_reshape', 'seq_reshape_layer', ['input']],
if __name__ == '__main__': # math layers
pixel = data(name='pixel', type=data_type.dense_vector(784)) ['addto', 'addto_layer', ['input']],
label = data(name='label', type=data_type.integer_value(10)) ['linear_comb', 'linear_comb_layer', ['weights', 'vectors']],
weight = data(name='weight', type=data_type.dense_vector(10)) ['interpolation', 'interpolation_layer', ['input', 'weight']],
score = data(name='score', type=data_type.dense_vector(1)) ['bilinear_interp', 'bilinear_interp_layer', ['input']],
['power', 'power_layer', ['input', 'weight']],
hidden = fc(input=pixel, ['scaling', 'scaling_layer', ['input', 'weight']],
size=100, ['slope_intercept', 'slope_intercept_layer', ['input']],
act=activation.Sigmoid(), ['tensor', 'tensor_layer', ['a', 'b']],
param_attr=attr.Param(name='hidden')) ['cos_sim', 'cos_sim', ['a', 'b']],
inference = fc(input=hidden, size=10, act=activation.Softmax()) ['trans', 'trans_layer', ['input']],
maxid = max_id(input=inference) # sampling layers
cost1 = classification_cost(input=inference, label=label) ['max_id', 'maxid_layer', ['input']],
cost2 = classification_cost(input=inference, label=label, weight=weight) ['sampling_id', 'sampling_id_layer', ['input']],
cost3 = cross_entropy_cost(input=inference, label=label) # slicing and joining layers
cost4 = cross_entropy_with_selfnorm_cost(input=inference, label=label) ['pad', 'pad_layer', ['input']],
cost5 = regression_cost(input=inference, label=label) # cost layers
cost6 = regression_cost(input=inference, label=label, weight=weight) [
cost7 = multi_binary_label_cross_entropy_cost(input=inference, label=label) 'classification_cost', 'classification_cost',
cost8 = rank_cost(left=score, right=score, label=score) ['input', 'label', 'weight']
cost9 = lambda_cost(input=inference, score=score) ],
cost10 = sum_cost(input=inference) ['regression_cost', 'regression_cost', ['input', 'label', 'weight']],
cost11 = huber_cost(input=score, label=label) ['cross_entropy_cost', 'cross_entropy', ['input', 'label']],
[
print parse_network(cost1, cost2) 'cross_entropy_with_selfnorm_cost', 'cross_entropy_with_selfnorm',
print parse_network(cost3, cost4) ['input', 'label']
print parse_network(cost5, cost6) ],
print parse_network(cost7, cost8, cost9, cost10, cost11) [
print parse_network(inference, maxid) 'multi_binary_label_cross_entropy_cost',
'multi_binary_label_cross_entropy', ['input', 'label']
],
['rank_cost', 'rank_cost', ['left', 'right', 'label', 'weight']],
['lambda_cost', 'lambda_cost', ['input', 'score']],
['sum_cost', 'sum_cost', ['input']],
['huber_cost', 'huber_cost', ['input', 'label']],
['crf', 'crf_layer', ['input', 'label']],
['crf_decoding', 'crf_decoding_layer', ['input']],
['ctc', 'ctc_layer', ['input', 'label']],
['warp_ctc', 'warp_ctc_layer', ['input', 'label']],
['nce', 'nce_layer', ['input', 'label']],
['hsigmoid', 'hsigmoid', ['input', 'label']],
# check layers
['eos', 'eos_layer', ['input']]
]
for l in layer_list:
globals()[l[0]] = __convert_to_v2__(l[1], l[2])
# convert projection
for prj in __projection_names__:
globals()[prj] = __convert_to_v2__(
prj, parent_names=['input'], is_default_name=False)
# convert operator
operator_list = [
# [V1_method_name, parent_names],
['dotmul_operator', ['a', 'b']],
['conv_operator', ['img', 'filter']]
]
for op in operator_list:
globals()[op[0]] = __convert_to_v2__(
op[0], parent_names=op[1], is_default_name=False)
...@@ -3,7 +3,10 @@ import paddle.trainer_config_helpers.optimizers as v1_optimizers ...@@ -3,7 +3,10 @@ import paddle.trainer_config_helpers.optimizers as v1_optimizers
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.v2 import paddle.v2
__all__ = ['Adam', 'Adamax'] __all__ = [
'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
'RMSProp', 'ModelAverage', 'L2Regularization'
]
class Optimizer(object): class Optimizer(object):
...@@ -38,6 +41,14 @@ class Optimizer(object): ...@@ -38,6 +41,14 @@ class Optimizer(object):
pass_num) pass_num)
class Momentum(Optimizer):
def __init__(self, momentum=None, sparse=False, **kwargs):
learning_method = v1_optimizers.MomentumOptimizer(
momentum=None, sparse=False)
super(Momentum, self).__init__(
learning_method=learning_method, **kwargs)
class Adam(Optimizer): class Adam(Optimizer):
def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs): def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs):
learning_method = v1_optimizers.AdamOptimizer( learning_method = v1_optimizers.AdamOptimizer(
...@@ -52,7 +63,45 @@ class Adamax(Optimizer): ...@@ -52,7 +63,45 @@ class Adamax(Optimizer):
super(Adamax, self).__init__(learning_method=learning_method, **kwargs) super(Adamax, self).__init__(learning_method=learning_method, **kwargs)
class AdaGrad(Optimizer):
def __init__(self, **kwargs):
learning_method = v1_optimizers.AdaGradOptimizer()
super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs)
class DecayedAdaGrad(Optimizer):
def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
learning_method = v1_optimizers.DecayedAdaGradOptimizer(
rho=rho, epsilon=epsilon)
super(DecayedAdaGrad, self).__init__(
learning_method=learning_method, **kwargs)
class AdaDelta(Optimizer):
def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
learning_method = v1_optimizers.AdaDeltaOptimizer(
rho=rho, epsilon=epsilon)
super(AdaDelta, self).__init__(
learning_method=learning_method, **kwargs)
class RMSProp(Optimizer):
def __init__(self, rho=0.95, epsilon=1e-6, **kwargs):
learning_method = v1_optimizers.RMSPropOptimizer(
rho=rho, epsilon=epsilon)
super(RMSProp, self).__init__(learning_method=learning_method, **kwargs)
ModelAverage = v1_optimizers.ModelAverage
L2Regularization = v1_optimizers.L2Regularization
if __name__ == '__main__': if __name__ == '__main__':
swig_api.initPaddle('--use_gpu=false') swig_api.initPaddle('--use_gpu=false')
opt = paddle.v2.optimizer.Adam() for opt in [
print opt.enable_types() Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(),
AdaDelta(), RMSProp(), Adam(
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5),
gradient_clipping_threshold=25)
]:
print opt, opt.enable_types()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers.poolings import *
__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"]
Max = MaxPooling
CudnnMax = CudnnMaxPooling
Avg = AvgPooling
CudnnAvg = CudnnAvgPooling
Sum = SumPooling
SquareRootN = SquareRootNPooling
add_test(NAME test_v2_layer
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
add_test(NAME test_v2_api
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
add_test(NAME topology_test add_test(NAME topology_test
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/topology_test.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/topology_test.py
......
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pushd `dirname $0` > /dev/null
SCRIPTPATH=$PWD
popd > /dev/null
cd $SCRIPTPATH
$1 -m pip install ../../../../paddle/dist/*.whl
test_list="test_data_feeder.py"
export PYTHONPATH=$PWD/../../../../python/
for fn in $test_list
do
echo "test $fn"
$1 $fn
if [ $? -ne 0 ]; then
exit 1
fi
done
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import py_paddle.swig_paddle as api
import numpy as np
from paddle.v2 import data_type
from paddle.v2.data_feeder import DataFeeder
class DataFeederTest(unittest.TestCase):
def dense_reader(self, size):
data = np.random.random(size)
return data
def sparse_binary_reader(self, high, size_limit, non_empty=False):
num = np.random.randint(size_limit) # num could be 0
while non_empty and num == 0:
num = np.random.randint(size_limit)
return np.random.randint(high, size=num).tolist()
def test_dense(self):
def compare(input):
feeder = DataFeeder([('image', data_type.dense_vector(784))],
{'image': 0})
arg = feeder(input)
output = arg.getSlotValue(0).copyToNumpyMat()
input = np.array(input, dtype='float32')
self.assertAlmostEqual(input.all(), output.all())
# test numpy array
batch_size = 32
dim = 784
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(self.dense_reader(dim))
data.append(each_sample)
compare(data)
# each feature is a list
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(self.dense_reader(dim).tolist())
data.append(each_sample)
compare(data)
# test tuple
data = []
for i in xrange(batch_size):
each_sample = (self.dense_reader(dim).tolist(), )
data.append(each_sample)
compare(data)
def test_sparse_binary(self):
dim = 10000
batch_size = 32
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(self.sparse_binary_reader(dim, 50))
data.append(each_sample)
feeder = DataFeeder([('input', data_type.sparse_binary_vector(dim))],
{'input': 0})
arg = feeder(data)
output = arg.getSlotValue(0)
assert isinstance(output, api.Matrix)
for i in xrange(batch_size):
self.assertEqual(output.getSparseRowCols(i), data[i][0])
def test_sparse(self):
dim = 10000
batch_size = 32
v = []
w = []
data = []
for dat in xrange(batch_size):
each_sample = []
a = self.sparse_binary_reader(dim, 40, non_empty=True)
b = self.dense_reader(len(a)).tolist()
v.append(a)
w.append(np.array(b, dtype="float32"))
each_sample.append(zip(a, b))
data.append(each_sample)
feeder = DataFeeder([('input', data_type.sparse_vector(dim))],
{'input': 0})
arg = feeder(data)
output = arg.getSlotValue(0)
assert isinstance(output, api.Matrix)
for i in xrange(batch_size):
self.assertEqual(output.getSparseRowCols(i), v[i])
cols_value = output.getSparseRowColsVal(i)
value = [val[1] for val in cols_value]
value = np.array(value, dtype="float32")
self.assertAlmostEqual(value.all(), w[i].all())
def test_integer(self):
dim = 100
batch_size = 32
index = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(np.random.randint(dim))
index.append(each_sample)
feeder = DataFeeder([('input', data_type.integer_value(dim))],
{'input': 0})
arg = feeder(index)
output = arg.getSlotIds(0).copyToNumpyArray()
index = np.array(index, dtype='int')
self.assertEqual(output.all(), index.flatten().all())
def test_integer_sequence(self):
dim = 10000
batch_size = 32
start = [0]
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(
self.sparse_binary_reader(
dim, 30, non_empty=True))
data.append(each_sample)
start.append(len(each_sample[0]) + start[-1])
feeder = DataFeeder([('input', data_type.integer_value_sequence(dim))],
{'input': 0})
arg = feeder(data)
output_data = arg.getSlotIds(0).copyToNumpyArray()
output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
index = []
for dat in data:
index.extend(x for x in dat[0]) # only one feature, so dat[0]
index = np.array(index, dtype='int')
start = np.array(start, dtype='int')
self.assertEqual(output_data.all(), index.all())
self.assertEqual(output_start.all(), start.all())
def test_multiple_features(self):
batch_size = 2
data = []
for i in xrange(batch_size):
each_sample = []
each_sample.append(np.random.randint(10))
each_sample.append(
self.sparse_binary_reader(
20000, 40, non_empty=True))
each_sample.append(self.dense_reader(100))
data.append(each_sample)
# test multiple features
data_types = [('fea0', data_type.dense_vector(100)),
('fea1', data_type.sparse_binary_vector(20000)),
('fea2', data_type.integer_value(10))]
feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0})
arg = feeder(data)
output_dense = arg.getSlotValue(0).copyToNumpyMat()
output_sparse = arg.getSlotValue(1)
output_index = arg.getSlotIds(2).copyToNumpyArray()
for i in xrange(batch_size):
self.assertEqual(output_dense[i].all(), data[i][2].all())
self.assertEqual(output_sparse.getSparseRowCols(i), data[i][1])
self.assertEqual(output_index[i], data[i][0])
# reader returns 3 features, but only use 2 features
data_types = [('fea0', data_type.dense_vector(100)),
('fea2', data_type.integer_value(10))]
feeder = DataFeeder(data_types, {'fea0': 2, 'fea2': 0})
arg = feeder(data)
output_dense = arg.getSlotValue(0).copyToNumpyMat()
output_index = arg.getSlotIds(1).copyToNumpyArray()
for i in xrange(batch_size):
self.assertEqual(output_dense[i].all(), data[i][2].all())
self.assertEqual(output_index[i], data[i][0])
# reader returns 3 featreus, one is duplicate data
data_types = [('fea0', data_type.dense_vector(100)),
('fea1', data_type.sparse_binary_vector(20000)),
('fea2', data_type.integer_value(10)),
('fea3', data_type.dense_vector(100))]
feeder = DataFeeder(data_types,
{'fea0': 2,
'fea1': 1,
'fea2': 0,
'fea3': 2})
arg = feeder(data)
fea0 = arg.getSlotValue(0).copyToNumpyMat()
fea1 = arg.getSlotValue(1)
fea2 = arg.getSlotIds(2).copyToNumpyArray()
fea3 = arg.getSlotValue(3).copyToNumpyMat()
for i in xrange(batch_size):
self.assertEqual(fea0[i].all(), data[i][2].all())
self.assertEqual(fea1.getSparseRowCols(i), data[i][1])
self.assertEqual(fea2[i], data[i][0])
self.assertEqual(fea3[i].all(), data[i][2].all())
def test_multiple_features_tuple(self):
batch_size = 2
data = []
for i in xrange(batch_size):
a = np.random.randint(10)
b = self.sparse_binary_reader(20000, 40, non_empty=True)
c = self.dense_reader(100)
each_sample = (a, b, c)
data.append(each_sample)
# test multiple features
data_types = [('fea0', data_type.dense_vector(100)),
('fea1', data_type.sparse_binary_vector(20000)),
('fea2', data_type.integer_value(10))]
feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0})
arg = feeder(data)
out_dense = arg.getSlotValue(0).copyToNumpyMat()
out_sparse = arg.getSlotValue(1)
out_index = arg.getSlotIds(2).copyToNumpyArray()
for i in xrange(batch_size):
self.assertEqual(out_dense[i].all(), data[i][2].all())
self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1])
self.assertEqual(out_index[i], data[i][0])
if __name__ == '__main__':
api.initPaddle("--use_gpu=0")
unittest.main()
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import difflib
import unittest
import paddle.trainer_config_helpers as conf_helps
import paddle.v2.activation as activation
import paddle.v2.attr as attr
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.pooling as pooling
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
label = layer.data(name='label', type=data_type.integer_value(10))
weight = layer.data(name='weight', type=data_type.dense_vector(10))
score = layer.data(name='score', type=data_type.dense_vector(1))
hidden = layer.fc(input=pixel,
size=100,
act=activation.Sigmoid(),
param_attr=attr.Param(name='hidden'))
inference = layer.fc(input=hidden, size=10, act=activation.Softmax())
conv = layer.img_conv(
input=pixel,
filter_size=1,
filter_size_y=1,
num_channels=8,
num_filters=16,
act=activation.Linear())
class ImageLayerTest(unittest.TestCase):
def test_conv_layer(self):
conv_shift = layer.conv_shift(a=pixel, b=score)
print layer.parse_network(conv, conv_shift)
def test_pooling_layer(self):
maxpool = layer.img_pool(
input=conv,
pool_size=2,
num_channels=16,
padding=1,
pool_type=pooling.Max())
spp = layer.spp(input=conv,
pyramid_height=2,
num_channels=16,
pool_type=pooling.Max())
maxout = layer.maxout(input=conv, num_channels=16, groups=4)
print layer.parse_network(maxpool, spp, maxout)
def test_norm_layer(self):
norm1 = layer.img_cmrnorm(input=conv, size=5)
norm2 = layer.batch_norm(input=conv)
norm3 = layer.sum_to_one_norm(input=conv)
print layer.parse_network(norm1, norm2, norm3)
class AggregateLayerTest(unittest.TestCase):
def test_aggregate_layer(self):
pool = layer.pool(
input=pixel,
pooling_type=pooling.Avg(),
agg_level=layer.AggregateLevel.EACH_SEQUENCE)
last_seq = layer.last_seq(input=pixel)
first_seq = layer.first_seq(input=pixel)
concat = layer.concat(input=[last_seq, first_seq])
seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
class MathLayerTest(unittest.TestCase):
def test_math_layer(self):
addto = layer.addto(input=[pixel, pixel])
linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10)
interpolation = layer.interpolation(
input=[hidden, hidden], weight=score)
bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
power = layer.power(input=pixel, weight=score)
scaling = layer.scaling(input=pixel, weight=score)
slope = layer.slope_intercept(input=pixel)
tensor = layer.tensor(a=pixel, b=pixel, size=1000)
cos_sim = layer.cos_sim(a=pixel, b=pixel)
trans = layer.trans(input=tensor)
print layer.parse_network(addto, linear_comb, interpolation, power,
scaling, slope, tensor, cos_sim, trans)
class ReshapeLayerTest(unittest.TestCase):
def test_reshape_layer(self):
block_expand = layer.block_expand(
input=conv, num_channels=4, stride_x=1, block_x=1)
expand = layer.expand(
input=weight,
expand_as=pixel,
expand_level=layer.ExpandLevel.FROM_TIMESTEP)
repeat = layer.repeat(input=pixel, num_repeats=4)
reshape = layer.seq_reshape(input=pixel, reshape_size=4)
rotate = layer.rotate(input=pixel, height=16, width=49)
print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
class RecurrentLayerTest(unittest.TestCase):
def test_recurrent_layer(self):
word = layer.data(name='word', type=data_type.integer_value(12))
recurrent = layer.recurrent(input=word)
lstm = layer.lstmemory(input=word)
gru = layer.grumemory(input=word)
print layer.parse_network(recurrent, lstm, gru)
class CostLayerTest(unittest.TestCase):
def test_cost_layer(self):
cost1 = layer.classification_cost(input=inference, label=label)
cost2 = layer.classification_cost(
input=inference, label=label, weight=weight)
cost3 = layer.cross_entropy_cost(input=inference, label=label)
cost4 = layer.cross_entropy_with_selfnorm_cost(
input=inference, label=label)
cost5 = layer.regression_cost(input=inference, label=label)
cost6 = layer.regression_cost(
input=inference, label=label, weight=weight)
cost7 = layer.multi_binary_label_cross_entropy_cost(
input=inference, label=label)
cost8 = layer.rank_cost(left=score, right=score, label=score)
cost9 = layer.lambda_cost(input=inference, score=score)
cost10 = layer.sum_cost(input=inference)
cost11 = layer.huber_cost(input=score, label=label)
print layer.parse_network(cost1, cost2)
print layer.parse_network(cost3, cost4)
print layer.parse_network(cost5, cost6)
print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
crf = layer.crf(input=inference, label=label)
crf_decoding = layer.crf_decoding(input=inference, size=3)
ctc = layer.ctc(input=inference, label=label)
warp_ctc = layer.warp_ctc(input=pixel, label=label)
nce = layer.nce(input=inference, label=label, num_classes=3)
hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce,
hsigmoid)
class OtherLayerTest(unittest.TestCase):
def test_sampling_layer(self):
maxid = layer.max_id(input=inference)
sampling_id = layer.sampling_id(input=inference)
eos = layer.eos(input=maxid, eos_id=5)
print layer.parse_network(maxid, sampling_id, eos)
def test_slicing_joining_layer(self):
pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
print layer.parse_network(pad)
class ProjOpTest(unittest.TestCase):
def test_projection(self):
input = layer.data(name='data', type=data_type.dense_vector(784))
word = layer.data(
name='word', type=data_type.integer_value_sequence(10000))
fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
mixed0 = layer.mixed(
size=256,
input=[
layer.full_matrix_projection(input=fc0),
layer.full_matrix_projection(input=fc1)
])
with layer.mixed(size=200) as mixed1:
mixed1 += layer.full_matrix_projection(input=fc0)
mixed1 += layer.identity_projection(input=fc1)
table = layer.table_projection(input=word)
emb0 = layer.mixed(size=512, input=table)
with layer.mixed(size=512) as emb1:
emb1 += table
scale = layer.scaling_projection(input=fc0)
scale0 = layer.mixed(size=100, input=scale)
with layer.mixed(size=100) as scale1:
scale1 += scale
dotmul = layer.dotmul_projection(input=fc0)
dotmul0 = layer.mixed(size=100, input=dotmul)
with layer.mixed(size=100) as dotmul1:
dotmul1 += dotmul
context = layer.context_projection(input=fc0, context_len=5)
context0 = layer.mixed(size=100, input=context)
with layer.mixed(size=100) as context1:
context1 += context
conv = layer.conv_projection(
input=input,
filter_size=1,
num_channels=1,
num_filters=128,
stride=1,
padding=0)
conv0 = layer.mixed(input=conv, bias_attr=True)
with layer.mixed(bias_attr=True) as conv1:
conv1 += conv
print layer.parse_network(mixed0)
print layer.parse_network(mixed1)
print layer.parse_network(emb0)
print layer.parse_network(emb1)
print layer.parse_network(scale0)
print layer.parse_network(scale1)
print layer.parse_network(dotmul0)
print layer.parse_network(dotmul1)
print layer.parse_network(conv0)
print layer.parse_network(conv1)
def test_operator(self):
ipt0 = layer.data(name='data', type=data_type.dense_vector(784))
ipt1 = layer.data(name='word', type=data_type.dense_vector(128))
fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
dotmul_op = layer.dotmul_operator(a=fc0, b=fc1)
dotmul0 = layer.mixed(input=dotmul_op)
with layer.mixed() as dotmul1:
dotmul1 += dotmul_op
conv = layer.conv_operator(
img=ipt0,
filter=ipt1,
filter_size=1,
num_channels=1,
num_filters=128,
stride=1,
padding=0)
conv0 = layer.mixed(input=conv)
with layer.mixed() as conv1:
conv1 += conv
print layer.parse_network(dotmul0)
print layer.parse_network(dotmul1)
print layer.parse_network(conv0)
print layer.parse_network(conv1)
if __name__ == '__main__':
unittest.main()
...@@ -3,6 +3,7 @@ import collections ...@@ -3,6 +3,7 @@ import collections
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter from py_paddle import DataProviderConverter
from data_feeder import DataFeeder
from . import event as v2_event from . import event as v2_event
from . import optimizer as v2_optimizer from . import optimizer as v2_optimizer
from . import parameters as v2_parameters from . import parameters as v2_parameters
...@@ -68,7 +69,8 @@ class SGD(ITrainer): ...@@ -68,7 +69,8 @@ class SGD(ITrainer):
test_data_reader=None, test_data_reader=None,
event_handler=None, event_handler=None,
batch_size=32, batch_size=32,
data_types=None): data_types=None,
reader_dict=None):
""" """
Training method. Will train num_passes of input data. Training method. Will train num_passes of input data.
...@@ -96,22 +98,37 @@ class SGD(ITrainer): ...@@ -96,22 +98,37 @@ class SGD(ITrainer):
self.__optimizer__.enable_types()) self.__optimizer__.enable_types())
assert isinstance(gm, api.GradientMachine) assert isinstance(gm, api.GradientMachine)
parameters.append_gradient_machine(gm) parameters.append_gradient_machine(gm)
gm.randParameters()
updater = self.__optimizer__.create_local_updater() updater = self.__optimizer__.create_local_updater()
updater.init(gm) updater.init(gm)
gm.start() gm.start()
batch_evaluator = gm.makeEvaluator()
assert isinstance(batch_evaluator, api.Evaluator)
pass_evaluator = gm.makeEvaluator()
assert isinstance(pass_evaluator, api.Evaluator)
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
data_types_lists = [data_type[1] for data_type in topology.data_type()] data_types_lists = [data_type[1] for data_type in topology.data_type()]
converter = DataProviderConverter(input_types=data_types_lists) converter = DataProviderConverter(input_types=data_types_lists)
feeder = DataFeeder(data_types, reader_dict)
for pass_id in xrange(num_passes): for pass_id in xrange(num_passes):
event_handler(v2_event.BeginPass(pass_id))
pass_evaluator.start()
updater.startPass() updater.startPass()
for batch_id, data_batch in enumerate( for batch_id, data_batch in enumerate(
__data_reader_to_batch__(train_data_reader, batch_size, __data_reader_to_batch__(train_data_reader, batch_size,
topology)): topology)):
batch_evaluator.start()
event_handler(
v2_event.BeginIteration(
pass_id=pass_id, batch_id=batch_id))
pass_type = updater.startBatch(len(data_batch)) pass_type = updater.startBatch(len(data_batch))
gm.forwardBackward(converter(data_batch), out_args, pass_type) gm.forwardBackward(feeder(data_batch), out_args, pass_type)
gm.eval(pass_evaluator)
gm.eval(batch_evaluator)
for each_param in gm.getParameters(): for each_param in gm.getParameters():
updater.update(each_param) updater.update(each_param)
# Get cost. We use numpy to calculate total cost for this batch. # Get cost. We use numpy to calculate total cost for this batch.
...@@ -119,11 +136,17 @@ class SGD(ITrainer): ...@@ -119,11 +136,17 @@ class SGD(ITrainer):
cost_vec = cost_vec.copyToNumpyMat() cost_vec = cost_vec.copyToNumpyMat()
cost = cost_vec.sum() / len(data_batch) cost = cost_vec.sum() / len(data_batch)
updater.finishBatch(cost) updater.finishBatch(cost)
batch_evaluator.finish()
event_handler( event_handler(
v2_event.EndIteration( v2_event.EndIteration(
pass_id=pass_id, batch_id=batch_id, cost=cost)) pass_id=pass_id,
batch_id=batch_id,
cost=cost,
evaluator=batch_evaluator))
updater.finishPass() updater.finishPass()
pass_evaluator.finish()
event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
gm.finish() gm.finish()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册