提交 ba39e688 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into srl_api_v2

...@@ -52,6 +52,10 @@ def wrap_param_default(param_names=None, ...@@ -52,6 +52,10 @@ def wrap_param_default(param_names=None,
kwargs[name] = default_factory(func) kwargs[name] = default_factory(func)
return func(*args, **kwargs) return func(*args, **kwargs)
if hasattr(func, 'argspec'):
__wrapper__.argspec = func.argspec
else:
__wrapper__.argspec = inspect.getargspec(func)
return __wrapper__ return __wrapper__
return __impl__ return __impl__
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import functools import functools
import collections import collections
import inspect
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
...@@ -316,6 +317,11 @@ def layer_support(*attrs): ...@@ -316,6 +317,11 @@ def layer_support(*attrs):
val.check(method.__name__) val.check(method.__name__)
return method(*args, **kwargs) return method(*args, **kwargs)
if hasattr(method, 'argspec'):
wrapper.argspec = method.argspec
else:
wrapper.argspec = inspect.getargspec(method)
return wrapper return wrapper
return decorator return decorator
......
""" """
CIFAR Dataset. CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
URL: https://www.cs.toronto.edu/~kriz/cifar.html
the default train_creator, test_creator used for CIFAR-10 dataset.
""" """
import cPickle import cPickle
import itertools import itertools
import tarfile
import numpy import numpy
import paddle.v2.dataset.common
import tarfile
from config import download __all__ = ['train100', 'test100', 'train10', 'test10']
__all__ = [
'cifar_100_train_creator', 'cifar_100_test_creator', 'train_creator',
'test_creator'
]
CIFAR10_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
CIFAR100_URL = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
def __read_batch__(filename, sub_name): def reader_creator(filename, sub_name):
def reader(): def read_batch(batch):
def __read_one_batch_impl__(batch): data = batch['data']
data = batch['data'] labels = batch.get('labels', batch.get('fine_labels', None))
labels = batch.get('labels', batch.get('fine_labels', None)) assert labels is not None
assert labels is not None for sample, label in itertools.izip(data, labels):
for sample, label in itertools.izip(data, labels): yield (sample / 255.0).astype(numpy.float32), int(label)
yield (sample / 255.0).astype(numpy.float32), int(label)
def reader():
with tarfile.open(filename, mode='r') as f: with tarfile.open(filename, mode='r') as f:
names = (each_item.name for each_item in f names = (each_item.name for each_item in f
if sub_name in each_item.name) if sub_name in each_item.name)
for name in names: for name in names:
batch = cPickle.load(f.extractfile(name)) batch = cPickle.load(f.extractfile(name))
for item in __read_one_batch_impl__(batch): for item in read_batch(batch):
yield item yield item
return reader return reader
def cifar_100_train_creator(): def train100():
fn = download(url=CIFAR100_URL, md5=CIFAR100_MD5) return reader_creator(
return __read_batch__(fn, 'train') paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
'train')
def cifar_100_test_creator():
fn = download(url=CIFAR100_URL, md5=CIFAR100_MD5)
return __read_batch__(fn, 'test')
def train_creator():
"""
Default train reader creator. Use CIFAR-10 dataset.
"""
fn = download(url=CIFAR10_URL, md5=CIFAR10_MD5)
return __read_batch__(fn, 'data_batch')
def test_creator(): def test100():
""" return reader_creator(
Default test reader creator. Use CIFAR-10 dataset. paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
""" 'test')
fn = download(url=CIFAR10_URL, md5=CIFAR10_MD5)
return __read_batch__(fn, 'test_batch')
def unittest(): def train10():
for _ in train_creator()(): return reader_creator(
pass paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
for _ in test_creator()(): 'data_batch')
pass
if __name__ == '__main__': def test10():
unittest() return reader_creator(
paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
'test_batch')
import requests
import hashlib
import os
import shutil
__all__ = ['DATA_HOME', 'download', 'md5file']
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
if not os.path.exists(DATA_HOME):
os.makedirs(DATA_HOME)
def md5file(fname):
hash_md5 = hashlib.md5()
f = open(fname, "rb")
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
f.close()
return hash_md5.hexdigest()
def download(url, module_name, md5sum):
dirname = os.path.join(DATA_HOME, module_name)
if not os.path.exists(dirname):
os.makedirs(dirname)
filename = os.path.join(dirname, url.split('/')[-1])
if not (os.path.exists(filename) and md5file(filename) == md5sum):
r = requests.get(url, stream=True)
with open(filename, 'w') as f:
shutil.copyfileobj(r.raw, f)
return filename
import hashlib
import os
import shutil
import urllib2
__all__ = ['DATA_HOME', 'download']
DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set')
if not os.path.exists(DATA_HOME):
os.makedirs(DATA_HOME)
def download(url, md5):
filename = os.path.split(url)[-1]
assert DATA_HOME is not None
filepath = os.path.join(DATA_HOME, md5)
if not os.path.exists(filepath):
os.makedirs(filepath)
__full_file__ = os.path.join(filepath, filename)
def __file_ok__():
if not os.path.exists(__full_file__):
return False
md5_hash = hashlib.md5()
with open(__full_file__, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
md5_hash.update(chunk)
return md5_hash.hexdigest() == md5
while not __file_ok__():
response = urllib2.urlopen(url)
with open(__full_file__, mode='wb') as of:
shutil.copyfileobj(fsrc=response, fdst=of)
return __full_file__
import sklearn.datasets.mldata """
import sklearn.model_selection MNIST dataset.
"""
import numpy import numpy
from config import DATA_HOME import paddle.v2.dataset.common
import subprocess
__all__ = ['train_creator', 'test_creator'] __all__ = ['train', 'test']
URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/'
TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
TEST_IMAGE_MD5 = '25e3cc63507ef6e98d5dc541e8672bb6'
TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz'
TEST_LABEL_MD5 = '4e9511fe019b2189026bd0421ba7b688'
TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz'
TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz'
TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
def __mnist_reader_creator__(data, target):
def reader_creator(image_filename, label_filename, buffer_size):
def reader(): def reader():
n_samples = data.shape[0] # According to http://stackoverflow.com/a/38061619/724872, we
for i in xrange(n_samples): # cannot use standard package gzip here.
yield (data[i] / 255.0).astype(numpy.float32), int(target[i]) m = subprocess.Popen(["zcat", image_filename], stdout=subprocess.PIPE)
m.stdout.read(16) # skip some magic bytes
return reader l = subprocess.Popen(["zcat", label_filename], stdout=subprocess.PIPE)
l.stdout.read(8) # skip some magic bytes
while True:
labels = numpy.fromfile(
l.stdout, 'ubyte', count=buffer_size).astype("int")
TEST_SIZE = 10000 if labels.size != buffer_size:
break # numpy.fromfile returns empty slice after EOF.
data = sklearn.datasets.mldata.fetch_mldata( images = numpy.fromfile(
"MNIST original", data_home=DATA_HOME) m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape(
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( (buffer_size, 28 * 28)).astype('float32')
data.data, data.target, test_size=TEST_SIZE, random_state=0)
images = images / 255.0 * 2.0 - 1.0
def train_creator(): for i in xrange(buffer_size):
return __mnist_reader_creator__(X_train, y_train) yield images[i, :], int(labels[i])
m.terminate()
l.terminate()
def test_creator(): return reader
return __mnist_reader_creator__(X_test, y_test)
def unittest(): def train():
assert len(list(test_creator()())) == TEST_SIZE return reader_creator(
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
TRAIN_IMAGE_MD5),
paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist',
TRAIN_LABEL_MD5), 100)
if __name__ == '__main__': def test():
unittest() return reader_creator(
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
TEST_IMAGE_MD5),
paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist',
TEST_LABEL_MD5), 100)
import zipfile import zipfile
from config import download from common import download
import re import re
import random import random
import functools import functools
......
import paddle.v2.dataset.cifar
import unittest
class TestCIFAR(unittest.TestCase):
def check_reader(self, reader):
sum = 0
label = 0
for l in reader():
self.assertEqual(l[0].size, 3072)
if l[1] > label:
label = l[1]
sum += 1
return sum, label
def test_test10(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.test10())
self.assertEqual(instances, 10000)
self.assertEqual(max_label_value, 9)
def test_train10(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.train10())
self.assertEqual(instances, 50000)
self.assertEqual(max_label_value, 9)
def test_test100(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.test100())
self.assertEqual(instances, 10000)
self.assertEqual(max_label_value, 99)
def test_train100(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.cifar.train100())
self.assertEqual(instances, 50000)
self.assertEqual(max_label_value, 99)
if __name__ == '__main__':
unittest.main()
import paddle.v2.dataset.common
import unittest
import tempfile
class TestCommon(unittest.TestCase):
def test_md5file(self):
_, temp_path = tempfile.mkstemp()
with open(temp_path, 'w') as f:
f.write("Hello\n")
self.assertEqual('09f7e02f1290be211da707a266f153b3',
paddle.v2.dataset.common.md5file(temp_path))
def test_download(self):
yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
self.assertEqual(
paddle.v2.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460',
paddle.v2.dataset.common.download(
yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d'))
if __name__ == '__main__':
unittest.main()
import paddle.v2.dataset.mnist
import unittest
class TestMNIST(unittest.TestCase):
def check_reader(self, reader):
sum = 0
label = 0
for l in reader():
self.assertEqual(l[0].size, 784)
if l[1] > label:
label = l[1]
sum += 1
return sum, label
def test_train(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.mnist.train())
self.assertEqual(instances, 60000)
self.assertEqual(max_label_value, 9)
def test_test(self):
instances, max_label_value = self.check_reader(
paddle.v2.dataset.mnist.test())
self.assertEqual(instances, 10000)
self.assertEqual(max_label_value, 9)
if __name__ == '__main__':
unittest.main()
...@@ -67,6 +67,7 @@ paddle.v2.parameters.create, no longer exposed to users. ...@@ -67,6 +67,7 @@ paddle.v2.parameters.create, no longer exposed to users.
""" """
import collections import collections
import inspect
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
from paddle.trainer_config_helpers.config_parser_utils import \ from paddle.trainer_config_helpers.config_parser_utils import \
...@@ -74,26 +75,14 @@ from paddle.trainer_config_helpers.config_parser_utils import \ ...@@ -74,26 +75,14 @@ from paddle.trainer_config_helpers.config_parser_utils import \
from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default
from paddle.trainer_config_helpers.default_decorators import wrap_bias_attr_default from paddle.trainer_config_helpers.default_decorators import \
wrap_bias_attr_default
from paddle.trainer_config_helpers.layers import layer_support from paddle.trainer_config_helpers.layers import layer_support
import data_type import data_type
import activation import activation
import attr
__all__ = ['parse_network', 'data']
__all__ = [
'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp',
'maxout', 'img_cmrnorm', 'batch_norm', 'sum_to_one_norm', 'recurrent',
'lstmemory', 'grumemory', 'pool', 'last_seq', 'first_seq', 'concat',
'seq_concat', 'block_expand', 'expand', 'repeat', 'seq_reshape', 'addto',
'linear_comb', 'interpolation', 'bilinear_interp', 'power', 'scaling',
'slope_intercept', 'tensor', 'cos_sim', 'trans', 'max_id', 'sampling_id',
'pad', 'classification_cost', 'cross_entropy_cost',
'cross_entropy_with_selfnorm_cost', 'regression_cost',
'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost',
'sum_cost', 'huber_cost', 'crf', 'crf_decoding', 'ctc', 'warp_ctc', 'nce',
'hsigmoid', 'eos'
]
__projection_names__ = filter(lambda x: x.endswith('_projection'), __projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps)) dir(conf_helps))
...@@ -289,83 +278,51 @@ data = DataLayerV2 ...@@ -289,83 +278,51 @@ data = DataLayerV2
AggregateLevel = conf_helps.layers.AggregateLevel AggregateLevel = conf_helps.layers.AggregateLevel
ExpandLevel = conf_helps.layers.ExpandLevel ExpandLevel = conf_helps.layers.ExpandLevel
layer_list = [
# [V2LayerImpl, V1_method_name, parent_names] def __layer_name_mapping__(inname):
# fully connected layers if inname in ['data_layer', 'memory', 'mixed_layer']:
['fc', 'fc_layer', ['input']], # Do Not handle these layers
# conv layers return
['conv_shift', 'conv_shift_layer', ['a', 'b']], elif inname == 'maxid_layer':
['img_conv', 'img_conv_layer', ['input']], return 'max_id'
# image pooling layers elif inname.endswith('memory') or inname.endswith(
['img_pool', 'img_pool_layer', ['input']], '_seq') or inname.endswith('_sim') or inname == 'hsigmoid':
['spp', 'spp_layer', ['input']], return inname
['maxout', 'maxout_layer', ['input']], elif inname in [
# norm layers 'cross_entropy', 'multi_binary_label_cross_entropy',
['img_cmrnorm', 'img_cmrnorm_layer', ['input']], 'cross_entropy_with_selfnorm'
['batch_norm', 'batch_norm_layer', ['input']], ]:
['sum_to_one_norm', 'sum_to_one_norm_layer', ['input']], return inname + "_cost"
# recurrent layers elif inname.endswith('_cost'):
['recurrent', 'recurrent_layer', ['input']], return inname
['lstmemory', 'lstmemory', ['input']], elif inname.endswith("_layer"):
['grumemory', 'grumemory', ['input']], return inname[:-len("_layer")]
# aggregate layers
['pool', 'pooling_layer', ['input']],
['last_seq', 'last_seq', ['input']], def __layer_name_mapping_parent_names__(inname):
['first_seq', 'first_seq', ['input']], all_args = getattr(conf_helps, inname).argspec.args
['concat', 'concat_layer', ['input']], return filter(
['seq_concat', 'seq_concat_layer', ['a', 'b']], lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as',
# reshaping layers 'weights', 'vectors', 'weight', 'score', 'left', 'right'],
['block_expand', 'block_expand_layer', ['input']], all_args)
['expand', 'expand_layer', ['input', 'expand_as']],
['repeat', 'repeat_layer', ['input']],
['rotate', 'rotate_layer', ['input']], def __convert_layer__(_new_name_, _old_name_, _parent_names_):
['seq_reshape', 'seq_reshape_layer', ['input']], global __all__
# math layers __all__.append(_new_name_)
['addto', 'addto_layer', ['input']], globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_)
['linear_comb', 'linear_comb_layer', ['weights', 'vectors']],
['interpolation', 'interpolation_layer', ['input', 'weight']],
['bilinear_interp', 'bilinear_interp_layer', ['input']], for each_layer_name in dir(conf_helps):
['power', 'power_layer', ['input', 'weight']], new_name = __layer_name_mapping__(each_layer_name)
['scaling', 'scaling_layer', ['input', 'weight']], if new_name is not None:
['slope_intercept', 'slope_intercept_layer', ['input']], parent_names = __layer_name_mapping_parent_names__(each_layer_name)
['tensor', 'tensor_layer', ['a', 'b']], assert len(parent_names) != 0, each_layer_name
['cos_sim', 'cos_sim', ['a', 'b']], __convert_layer__(new_name, each_layer_name, parent_names)
['trans', 'trans_layer', ['input']],
# sampling layers del parent_names
['max_id', 'maxid_layer', ['input']], del new_name
['sampling_id', 'sampling_id_layer', ['input']], del each_layer_name
# slicing and joining layers
['pad', 'pad_layer', ['input']],
# cost layers
[
'classification_cost', 'classification_cost',
['input', 'label', 'weight']
],
['regression_cost', 'regression_cost', ['input', 'label', 'weight']],
['cross_entropy_cost', 'cross_entropy', ['input', 'label']],
[
'cross_entropy_with_selfnorm_cost', 'cross_entropy_with_selfnorm',
['input', 'label']
],
[
'multi_binary_label_cross_entropy_cost',
'multi_binary_label_cross_entropy', ['input', 'label']
],
['rank_cost', 'rank_cost', ['left', 'right', 'label', 'weight']],
['lambda_cost', 'lambda_cost', ['input', 'score']],
['sum_cost', 'sum_cost', ['input']],
['huber_cost', 'huber_cost', ['input', 'label']],
['crf', 'crf_layer', ['input', 'label']],
['crf_decoding', 'crf_decoding_layer', ['input']],
['ctc', 'ctc_layer', ['input', 'label']],
['warp_ctc', 'warp_ctc_layer', ['input', 'label']],
['nce', 'nce_layer', ['input', 'label']],
['hsigmoid', 'hsigmoid', ['input', 'label']],
# check layers
['eos', 'eos_layer', ['input']]
]
for l in layer_list:
globals()[l[0]] = __convert_to_v2__(l[1], l[2])
# convert projection # convert projection
for prj in __projection_names__: for prj in __projection_names__:
......
...@@ -11,17 +11,13 @@ ...@@ -11,17 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import difflib
import unittest import unittest
import paddle.trainer_config_helpers as conf_helps
import paddle.v2.activation as activation import paddle.v2.activation as activation
import paddle.v2.attr as attr import paddle.v2.attr as attr
import paddle.v2.data_type as data_type import paddle.v2.data_type as data_type
import paddle.v2.layer as layer import paddle.v2.layer as layer
import paddle.v2.pooling as pooling import paddle.v2.pooling as pooling
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
label = layer.data(name='label', type=data_type.integer_value(10)) label = layer.data(name='label', type=data_type.integer_value(10))
...@@ -70,7 +66,7 @@ class ImageLayerTest(unittest.TestCase): ...@@ -70,7 +66,7 @@ class ImageLayerTest(unittest.TestCase):
class AggregateLayerTest(unittest.TestCase): class AggregateLayerTest(unittest.TestCase):
def test_aggregate_layer(self): def test_aggregate_layer(self):
pool = layer.pool( pool = layer.pooling(
input=pixel, input=pixel,
pooling_type=pooling.Avg(), pooling_type=pooling.Avg(),
agg_level=layer.AggregateLevel.EACH_SEQUENCE) agg_level=layer.AggregateLevel.EACH_SEQUENCE)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册