提交 a5734f7c 编写于 作者: L Luo Tao

Merge branch 'develop' into stride

......@@ -55,6 +55,7 @@ extensions = [
'sphinx.ext.napoleon',
'sphinx.ext.graphviz'
]
mathjax_path="https://cdn.bootcss.com/mathjax/2.7.0/MathJax.js"
table_styling_embed_css = True
autodoc_member_order = 'bysource'
......
......@@ -42,7 +42,8 @@ void AgentLayer::forward(PassType passType) {
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realHeight) {
if (realOutput.ids) {
output_.ids->subVecFrom(*realOutput.ids, 0, numSamples_);
output_.ids =
IVector::create(realOutput.ids->getData(), numSamples_, useGpu_);
} else {
output_.subArgFrom(
realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_);
......
......@@ -107,6 +107,10 @@ void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image,
int channel = isDeconv_ ? numFilters_ : channels_[inIdx];
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
CHECK_EQ(image->getWidth(),
static_cast<size_t>(imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel));
real *imgData = image->getData() + startIdx * image->getWidth();
MatrixPtr imageTmp =
Matrix::create(imgData,
......
......@@ -36,7 +36,7 @@ namespace paddle {
* | |- 5
* |
* |-*- 0
* |- 1
* |- 1
* @endcode
*
* where * indicates an internal node, and each leaf node represents a class.
......
......@@ -94,7 +94,7 @@ docker build -t paddle:dev --build-arg UBUNTU_MIRROR=mirror://mirrors.ubuntu.com
Given the development image `paddle:dev`, the following command builds PaddlePaddle from the source tree on the development computer (host):
```bash
docker run -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=OFF" -e "RUN_TEST=OFF" paddle:dev
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=OFF" -e "RUN_TEST=OFF" paddle:dev
```
This command mounts the source directory on the host into `/paddle` in the container, so the default entry point of `paddle:dev`, `build.sh`, could build the source code with possible local changes. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed.
......@@ -110,7 +110,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
- `WITH_AVX`: ***Required***. Set to "OFF" prevents from generating AVX instructions. If you don't know what is AVX, you might want to set "ON".
- `WITH_TEST`: ***Optional, default OFF***. Build unit tests binaries. Once you've built the unit tests, you can run these test manually by the following command:
```bash
docker run -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" paddle:dev sh -c "cd /paddle/build; make coverall"
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" paddle:dev sh -c "cd /paddle/build; make coverall"
```
- `RUN_TEST`: ***Optional, default OFF***. Run unit tests after building. You can't run unit tests without building it.
......@@ -129,7 +129,7 @@ This production image is minimal -- it includes binary `paddle`, the shared libr
Again the development happens on the host. Suppose that we have a simple application program in `a.py`, we can test and run it using the production image:
```bash
docker run -it -v $PWD:/work paddle /work/a.py
docker run --rm -it -v $PWD:/work paddle /work/a.py
```
But this works only if all dependencies of `a.py` are in the production image. If this is not the case, we need to build a new Docker image from the production image and with more dependencies installs.
......
......@@ -24,8 +24,9 @@ add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp)
add_subdirectory(paddle/trainer_config_helpers/tests)
add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests)
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/
DESTINATION opt/paddle/share/wheels
......
......@@ -21,19 +21,22 @@ import data_type
import topology
import data_feeder
import networks
import evaluator
from . import dataset
from . import reader
from . import plot
import attr
import pooling
import inference
import networks
import py_paddle.swig_paddle as api
import minibatch
import plot
__all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
'topology', 'networks', 'infer'
'topology', 'networks', 'infer', 'plot', 'evaluator'
]
......
......@@ -65,13 +65,27 @@ class Layer(object):
def __init__(self, name=None, parent_layers=None):
assert isinstance(parent_layers, dict)
self.name = name
self.__contex__ = {}
self.__context__ = {}
self.__parent_layers__ = parent_layers
self.__children_layers__ = [] # used for evaluator.
def append_child(self, layer, parent_names):
self.__children_layers__.append((layer, parent_names))
def to_proto(self, context):
"""
function to set proto attribute
"""
self.__context__ = context
# short cut if myself is parsed before.
if self.context_name() in context:
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
# parse parent before myself
kwargs = dict()
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
......@@ -83,12 +97,29 @@ class Layer(object):
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
# parse myself.
ret_val = self.to_proto_impl(**kwargs)
if self.context_name() is not None and \
self.context_name() not in context:
context[self.context_name()] = ret_val
# parse children.
for layer, pnames in self.__children_layers__:
drop = False
# child will only be parsed if all parents are in context.
for pname in pnames:
if pname not in context:
drop = True
break
if drop:
continue
layer.to_proto(context=context)
if self.context_name() is None:
return self.to_proto_impl(**kwargs)
elif self.context_name() not in context:
context[self.context_name()] = self.to_proto_impl(**kwargs)
self.__contex__ = context
if self.use_context_name():
return ret_val
elif self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
......@@ -113,10 +144,13 @@ class Layer(object):
this layer is called.
:return:
"""
return self.__contex__[self.context_name()].size
return self.__context__[self.context_name()].size
def __convert_to_v2__(method_name, parent_names, is_default_name=True):
def __convert_to_v2__(method_name,
parent_names,
is_default_name=True,
attach_parent=False):
if is_default_name:
wrapper = wrap_name_default(name_prefix=method_name)
else:
......@@ -129,9 +163,20 @@ def __convert_to_v2__(method_name, parent_names, is_default_name=True):
parent_layers = dict()
other_kwargs = dict()
for pname in parent_names:
if kwargs.has_key(pname):
if pname in kwargs:
parent_layers[pname] = kwargs[pname]
if attach_parent:
pnames = [x.context_name() for x in parent_layers.values()]
for pname in parent_layers:
layers = kwargs[pname]
if not isinstance(layers, collections.Sequence):
layers = [layers]
for layer in layers:
layer.append_child(self, pnames)
for key in kwargs.keys():
if key not in parent_names:
other_kwargs[key] = kwargs[key]
......
......@@ -15,8 +15,10 @@
wmt14 dataset
"""
import tarfile
import gzip
from paddle.v2.dataset.common import download
from paddle.v2.parameters import Parameters
__all__ = ['train', 'test', 'build_dict']
......@@ -25,6 +27,9 @@ MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
# this is a small set of data for test. The original data is too large and will be add later.
URL_TRAIN = 'http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
MD5_TRAIN = 'a755315dd01c2c35bde29a744ede23a6'
# this is the pretrained model, whose bleu = 26.92
URL_MODEL = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.gz'
MD5_MODEL = '6b097d23e15654608c6f74923e975535'
START = "<s>"
END = "<e>"
......@@ -103,5 +108,13 @@ def test(dict_size):
download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size)
def model():
tar_file = download(URL_MODEL, 'wmt14', MD5_MODEL)
with gzip.open(tar_file, 'r') as f:
parameters = Parameters.from_tar(f)
return parameters
def fetch():
download(URL_TRAIN, 'wmt14', MD5_TRAIN)
download(URL_MODEL, 'wmt14', MD5_MODEL)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.trainer_config_helpers.evaluators as evs
import inspect
from config_base import __convert_to_v2__
__all__ = []
def initialize():
def convert_to_new_name(nm):
return nm[:-len("_evaluator")]
for __ev_name__ in filter(lambda x: x.endswith('_evaluator'), evs.__all__):
__ev__ = getattr(evs, __ev_name__)
if hasattr(__ev__, 'argspec'):
argspec = __ev__.argspec
else:
argspec = inspect.getargspec(__ev__)
parent_names = filter(lambda x: x in ['input', 'label', 'weight'],
argspec.args)
v2_ev = __convert_to_v2__(
__ev_name__,
parent_names=parent_names,
is_default_name='name' in argspec.args,
attach_parent=True)
__new_name__ = convert_to_new_name(__ev_name__)
globals()[__new_name__] = v2_ev
globals()[__new_name__].__name__ = __new_name__
__all__.append(__new_name__)
initialize()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from plot import Ploter
__all__ = ['Ploter']
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
class PlotData(object):
def __init__(self):
self.step = []
self.value = []
def append(self, step, value):
self.step.append(step)
self.value.append(value)
def reset(self):
self.step = []
self.value = []
class Ploter(object):
def __init__(self, *args):
self.__args__ = args
self.__plot_data__ = {}
for title in args:
self.__plot_data__[title] = PlotData()
# demo in notebooks will use Ploter to plot figure, but when we convert
# the ipydb to py file for testing, the import of matplotlib will make the
# script crash. So we can use `export DISABLE_PLOT=True` to disable import
# these libs
self.__disable_plot__ = os.environ.get("DISABLE_PLOT")
if not self.__plot_is_disabled__():
import matplotlib.pyplot as plt
from IPython import display
self.plt = plt
self.display = display
def __plot_is_disabled__(self):
return self.__disable_plot__ == "True"
def append(self, title, step, value):
assert isinstance(title, basestring)
assert self.__plot_data__.has_key(title)
data = self.__plot_data__[title]
assert isinstance(data, PlotData)
data.append(step, value)
def plot(self):
if self.__plot_is_disabled__():
return
titles = []
for title in self.__args__:
data = self.__plot_data__[title]
assert isinstance(data, PlotData)
if len(data.step) > 0:
titles.append(title)
self.plt.plot(data.step, data.value)
self.plt.legend(titles, loc='upper left')
self.display.clear_output(wait=True)
self.display.display(self.plt.gcf())
self.plt.gcf().clear()
def reset(self):
for key in self.__plot_data__:
data = self.__plot_data__[key]
assert isinstance(data, PlotData)
data.reset()
from IPython import display
import os
class PlotCost(object):
"""
append train and test cost in event_handle and then call plot.
"""
def __init__(self):
self.train_costs = ([], [])
self.test_costs = ([], [])
self.__disable_plot__ = os.environ.get("DISABLE_PLOT")
if not self.__plot_is_disabled__():
import matplotlib.pyplot as plt
self.plt = plt
def __plot_is_disabled__(self):
return self.__disable_plot__ == "True"
def plot(self):
if self.__plot_is_disabled__():
return
self.plt.plot(*self.train_costs)
self.plt.plot(*self.test_costs)
title = []
if len(self.train_costs[0]) > 0:
title.append('Train Cost')
if len(self.test_costs[0]) > 0:
title.append('Test Cost')
self.plt.legend(title, loc='upper left')
display.clear_output(wait=True)
display.display(self.plt.gcf())
self.plt.gcf().clear()
def append_train_cost(self, step, cost):
self.train_costs[0].append(step)
self.train_costs[1].append(cost)
def append_test_cost(self, step, cost):
self.test_costs[0].append(step)
self.test_costs[1].append(cost)
def reset(self):
self.train_costs = ([], [])
self.test_costs = ([], [])
add_test(NAME test_ploter
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/plot/tests/run_tests.sh
${PYTHON_EXECUTABLE})
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import test_ploter
__all__ = ['test_ploter.py']
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pushd `dirname $0` > /dev/null
SCRIPTPATH=$PWD
popd > /dev/null
cd $SCRIPTPATH
$1 -m pip install ../../../../../paddle/dist/*.whl
export DISABLE_PLOT="True"
test_list="test_ploter.py"
export PYTHONPATH=$PWD/../../../../../python/
for fn in $test_list
do
echo "test $fn"
$1 $fn
if [ $? -ne 0 ]; then
exit 1
fi
done
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from paddle.v2.plot import Ploter
class TestCommon(unittest.TestCase):
def test_append(self):
title1 = "title1"
title2 = "title2"
plot_test = Ploter(title1, title2)
plot_test.append(title1, 1, 2)
plot_test.append(title1, 2, 5)
plot_test.append(title2, 3, 4)
self.assertEqual(plot_test.__plot_data__[title1].step, [1, 2])
self.assertEqual(plot_test.__plot_data__[title1].value, [2, 5])
self.assertEqual(plot_test.__plot_data__[title2].step, [3])
self.assertEqual(plot_test.__plot_data__[title2].value, [4])
plot_test.reset()
self.assertEqual(plot_test.__plot_data__[title1].step, [])
self.assertEqual(plot_test.__plot_data__[title1].value, [])
self.assertEqual(plot_test.__plot_data__[title2].step, [])
self.assertEqual(plot_test.__plot_data__[title2].value, [])
if __name__ == '__main__':
unittest.main()
......@@ -19,6 +19,7 @@ import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.pooling as pooling
import paddle.v2.networks as networks
import paddle.v2.evaluator as evaluator
pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
label = layer.data(name='label', type=data_type.integer_value(10))
......@@ -262,5 +263,20 @@ class NetworkTests(unittest.TestCase):
print layer.parse_network(vgg_out)
class EvaluatorTest(unittest.TestCase):
def test_evaluator(self):
img = layer.data(name='pixel', type=data_type.dense_vector(784))
output = layer.fc(input=img,
size=10,
act=activation.Softmax(),
name='fc_here')
lbl = layer.data(name='label', type=data_type.integer_value(10))
cost = layer.cross_entropy_cost(input=output, label=lbl)
evaluator.classification_error(input=output, label=lbl)
print layer.parse_network(cost)
print layer.parse_network(output)
if __name__ == '__main__':
unittest.main()
......@@ -52,6 +52,12 @@ class SGD(object):
self.__topology__ = topology
self.__parameters__ = parameters
self.__topology_in_proto__ = topology.proto()
# In local mode, disable sparse_remote_update.
for param in self.__topology_in_proto__.parameters:
if param.sparse_remote_update:
param.sparse_remote_update = False
self.__data_types__ = topology.data_type()
gm = api.GradientMachine.createFromConfigProto(
self.__topology_in_proto__, api.CREATE_MODE_NORMAL,
......
......@@ -7,7 +7,8 @@ packages=['paddle',
'paddle.utils',
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader']
'paddle.v2.reader',
'paddle.v2.plot']
setup(name='paddle',
version='${PADDLE_VERSION}',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册