提交 52f6c9a6 编写于 作者: L Luo Tao

Merge branch 'develop' into link

......@@ -8,10 +8,13 @@ os:
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons:
apt:
......@@ -39,18 +42,23 @@ addons:
- lcov
- graphviz
- swig
- clang-format-3.8
before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE`
if [ $? -eq 0 ]; then # if git diff return no zero, then rerun unit test.
if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
fi
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit
script:
- paddle/scripts/travis/main.sh
notifications:
......
# External dependency to Google protobuf.
http_archive(
name = "protobuf",
url = "http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256 = "0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix = "protobuf-3.1.0",
)
name="protobuf",
url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix="protobuf-3.1.0", )
# External dependency to gtest 1.7.0. This method comes from
# https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
new_http_archive(
name = "gtest",
url = "https://github.com/google/googletest/archive/release-1.7.0.zip",
sha256 = "b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
build_file = "third_party/gtest.BUILD",
strip_prefix = "googletest-release-1.7.0",
)
name="gtest",
url="https://github.com/google/googletest/archive/release-1.7.0.zip",
sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
build_file="third_party/gtest.BUILD",
strip_prefix="googletest-release-1.7.0", )
......@@ -25,4 +25,3 @@ test 4 2 256 512
test 4 2 512 128
test 4 2 512 256
test 4 2 512 512
......@@ -10,4 +10,4 @@ Then you can run the command below. The flag -d specifies the training data (cif
$python gan_trainer.py -d cifar --use_gpu 1
The generated images will be stored in ./cifar_samples/
The corresponding models will be stored in ./cifar_params/
\ No newline at end of file
The corresponding models will be stored in ./cifar_params/
......@@ -15,4 +15,3 @@ set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz
......@@ -15,5 +15,3 @@ do
gunzip ${fname}.gz
fi
done
......@@ -14,10 +14,9 @@
from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator")
assert mode in set(["generator",
"discriminator",
"generator_training",
"discriminator_training"])
assert mode in set([
"generator", "discriminator", "generator_training", "discriminator_training"
])
is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training"
......@@ -38,8 +37,8 @@ sample_dim = 2
settings(
batch_size=128,
learning_rate=1e-4,
learning_method=AdamOptimizer(beta1=0.5)
)
learning_method=AdamOptimizer(beta1=0.5))
def discriminator(sample):
"""
......@@ -50,70 +49,87 @@ def discriminator(sample):
of the sample is from real data.
"""
param_attr = ParamAttr(is_static=is_generator_training)
bias_attr = ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0)
hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
act=ReluActivation(),
name="dis_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn, name="dis_prob", size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
bias_attr = ParamAttr(
is_static=is_generator_training, initial_mean=1.0, initial_std=0)
hidden = fc_layer(
input=sample,
name="dis_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(
input=hidden,
name="dis_hidden2",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(
hidden2,
act=ReluActivation(),
name="dis_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(
is_static=is_generator_training, initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(
input=hidden_bn,
name="dis_prob",
size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
def generator(noise):
"""
generator generates a sample given noise
"""
param_attr = ParamAttr(is_static=is_discriminator_training)
bias_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0)
hidden = fc_layer(input=noise,
name="gen_layer_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
act=ReluActivation(),
name="gen_layer_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn,
name="gen_layer1",
size=sample_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
bias_attr = ParamAttr(
is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)
hidden = fc_layer(
input=noise,
name="gen_layer_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(
input=hidden,
name="gen_hidden2",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(
hidden2,
act=ReluActivation(),
name="gen_layer_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(
is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(
input=hidden_bn,
name="gen_layer1",
size=sample_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
if is_generator_training:
noise = data_layer(name="noise", size=noise_dim)
......@@ -126,7 +142,8 @@ if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1)
prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error')
classification_error_evaluator(
input=prob, label=label, name=mode + '_error')
outputs(cost)
if is_generator:
......
......@@ -15,10 +15,9 @@ from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator")
dataSource = get_config_arg("data", str, "mnist")
assert mode in set(["generator",
"discriminator",
"generator_training",
"discriminator_training"])
assert mode in set([
"generator", "discriminator", "generator_training", "discriminator_training"
])
is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training"
......@@ -36,24 +35,33 @@ noise_dim = 100
gf_dim = 64
df_dim = 64
if dataSource == "mnist":
sample_dim = 28 # image dim
c_dim = 1 # image color
sample_dim = 28 # image dim
c_dim = 1 # image color
else:
sample_dim = 32
c_dim = 3
s2, s4 = int(sample_dim/2), int(sample_dim/4),
s8, s16 = int(sample_dim/8), int(sample_dim/16)
s2, s4 = int(sample_dim / 2), int(sample_dim / 4),
s8, s16 = int(sample_dim / 8), int(sample_dim / 16)
settings(
batch_size=128,
learning_rate=2e-4,
learning_method=AdamOptimizer(beta1=0.5)
)
learning_method=AdamOptimizer(beta1=0.5))
def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
param_attr, bias_attr, param_attr_bn, bn, trans=False,
act=ReluActivation()):
def conv_bn(input,
channels,
imgSize,
num_filters,
output_x,
stride,
name,
param_attr,
bias_attr,
param_attr_bn,
bn,
trans=False,
act=ReluActivation()):
"""
conv_bn is a utility function that constructs a convolution/deconv layer
with an optional batch_norm layer
......@@ -63,10 +71,10 @@ def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
:param trans: whether to use conv (False) or deconv (True)
:type trans: bool
"""
# calculate the filter_size and padding size based on the given
# imgSize and ouput size
tmp = imgSize - (output_x - 1) * stride
tmp = imgSize - (output_x - 1) * stride
if tmp <= 1 or tmp > 5:
raise ValueError("conv input-output dimension does not fit")
elif tmp <= 3:
......@@ -76,111 +84,134 @@ def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
filter_size = tmp
padding = 0
print (imgSize, output_x, stride, filter_size, padding)
print(imgSize, output_x, stride, filter_size, padding)
if trans:
nameApx = "_conv"
else:
nameApx = "_convt"
if bn:
conv = img_conv_layer(input, filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx, num_channels=channels,
act=LinearActivation(), groups=1, stride=stride,
padding=padding, bias_attr=bias_attr,
param_attr=param_attr, shared_biases=True, layer_attr=None,
filter_size_y=None, stride_y=None, padding_y=None,
trans=trans)
conv_bn = batch_norm_layer(conv,
act=act,
name=name + nameApx + "_bn",
bias_attr=bias_attr,
param_attr=param_attr_bn,
use_global_stats=False)
conv = img_conv_layer(
input,
filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx,
num_channels=channels,
act=LinearActivation(),
groups=1,
stride=stride,
padding=padding,
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=True,
layer_attr=None,
filter_size_y=None,
stride_y=None,
padding_y=None,
trans=trans)
conv_bn = batch_norm_layer(
conv,
act=act,
name=name + nameApx + "_bn",
bias_attr=bias_attr,
param_attr=param_attr_bn,
use_global_stats=False)
return conv_bn
else:
conv = img_conv_layer(input, filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx, num_channels=channels,
act=act, groups=1, stride=stride,
padding=padding, bias_attr=bias_attr,
param_attr=param_attr, shared_biases=True, layer_attr=None,
filter_size_y=None, stride_y=None, padding_y=None,
trans=trans)
conv = img_conv_layer(
input,
filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx,
num_channels=channels,
act=act,
groups=1,
stride=stride,
padding=padding,
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=True,
layer_attr=None,
filter_size_y=None,
stride_y=None,
padding_y=None,
trans=trans)
return conv
def generator(noise):
"""
generator generates a sample given noise
"""
param_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=0.0,
initial_std=0.02)
bias_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=0.0,
initial_std=0.0)
param_attr_bn=ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02)
h1 = fc_layer(input=noise,
name="gen_layer_h1",
size=s8 * s8 * gf_dim * 4,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
h1_bn = batch_norm_layer(h1,
act=ReluActivation(),
name="gen_layer_h1_bn",
bias_attr=bias_attr,
param_attr=param_attr_bn,
use_global_stats=False)
h2_bn = conv_bn(h1_bn,
channels=gf_dim*4,
output_x=s8,
num_filters=gf_dim*2,
imgSize=s4,
stride=2,
name="gen_layer_h2",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True,
trans=True)
h3_bn = conv_bn(h2_bn,
channels=gf_dim*2,
output_x=s4,
num_filters=gf_dim,
imgSize=s2,
stride=2,
name="gen_layer_h3",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True,
trans=True)
return conv_bn(h3_bn,
channels=gf_dim,
output_x=s2,
num_filters=c_dim,
imgSize=sample_dim,
stride=2,
name="gen_layer_h4",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=False,
trans=True,
act=TanhActivation())
param_attr = ParamAttr(
is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.02)
bias_attr = ParamAttr(
is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.0)
param_attr_bn = ParamAttr(
is_static=is_discriminator_training, initial_mean=1.0, initial_std=0.02)
h1 = fc_layer(
input=noise,
name="gen_layer_h1",
size=s8 * s8 * gf_dim * 4,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
h1_bn = batch_norm_layer(
h1,
act=ReluActivation(),
name="gen_layer_h1_bn",
bias_attr=bias_attr,
param_attr=param_attr_bn,
use_global_stats=False)
h2_bn = conv_bn(
h1_bn,
channels=gf_dim * 4,
output_x=s8,
num_filters=gf_dim * 2,
imgSize=s4,
stride=2,
name="gen_layer_h2",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True,
trans=True)
h3_bn = conv_bn(
h2_bn,
channels=gf_dim * 2,
output_x=s4,
num_filters=gf_dim,
imgSize=s2,
stride=2,
name="gen_layer_h3",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True,
trans=True)
return conv_bn(
h3_bn,
channels=gf_dim,
output_x=s2,
num_filters=c_dim,
imgSize=sample_dim,
stride=2,
name="gen_layer_h4",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=False,
trans=True,
act=TanhActivation())
def discriminator(sample):
......@@ -191,58 +222,60 @@ def discriminator(sample):
of the sample is from generator and dimension 1 is the probabblity
of the sample is from real data.
"""
param_attr = ParamAttr(is_static=is_generator_training,
initial_mean=0.0,
initial_std=0.02)
bias_attr = ParamAttr(is_static=is_generator_training,
initial_mean=0.0,
initial_std=0.0)
param_attr_bn=ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0.02)
h0 = conv_bn(sample,
channels=c_dim,
imgSize=sample_dim,
num_filters=df_dim,
output_x=s2,
stride=2,
name="dis_h0",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=False)
h1_bn = conv_bn(h0,
channels=df_dim,
imgSize=s2,
num_filters=df_dim*2,
output_x=s4,
stride=2,
name="dis_h1",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True)
h2_bn = conv_bn(h1_bn,
channels=df_dim*2,
imgSize=s4,
num_filters=df_dim*4,
output_x=s8,
stride=2,
name="dis_h2",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True)
return fc_layer(input=h2_bn, name="dis_prob", size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
param_attr = ParamAttr(
is_static=is_generator_training, initial_mean=0.0, initial_std=0.02)
bias_attr = ParamAttr(
is_static=is_generator_training, initial_mean=0.0, initial_std=0.0)
param_attr_bn = ParamAttr(
is_static=is_generator_training, initial_mean=1.0, initial_std=0.02)
h0 = conv_bn(
sample,
channels=c_dim,
imgSize=sample_dim,
num_filters=df_dim,
output_x=s2,
stride=2,
name="dis_h0",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=False)
h1_bn = conv_bn(
h0,
channels=df_dim,
imgSize=s2,
num_filters=df_dim * 2,
output_x=s4,
stride=2,
name="dis_h1",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True)
h2_bn = conv_bn(
h1_bn,
channels=df_dim * 2,
imgSize=s4,
num_filters=df_dim * 4,
output_x=s8,
stride=2,
name="dis_h2",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True)
return fc_layer(
input=h2_bn,
name="dis_prob",
size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
if is_generator_training:
......@@ -250,13 +283,14 @@ if is_generator_training:
sample = generator(noise)
if is_discriminator_training:
sample = data_layer(name="sample", size=sample_dim * sample_dim*c_dim)
sample = data_layer(name="sample", size=sample_dim * sample_dim * c_dim)
if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1)
prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error')
classification_error_evaluator(
input=prob, label=label, name=mode + '_error')
outputs(cost)
if is_generator:
......
......@@ -16,7 +16,7 @@ import argparse
import random
import numpy
import cPickle
import sys,os
import sys, os
from PIL import Image
from paddle.trainer.config_parser import parse_config
......@@ -24,6 +24,7 @@ from paddle.trainer.config_parser import logger
import py_paddle.swig_paddle as api
import matplotlib.pyplot as plt
def plot2DScatter(data, outputfile):
'''
Plot the data as a 2D scatter plot and save to outputfile
......@@ -41,9 +42,11 @@ def plot2DScatter(data, outputfile):
plt.scatter(x, y)
plt.savefig(outputfile, bbox_inches='tight')
def CHECK_EQ(a, b):
assert a == b, "a=%s, b=%s" % (a, b)
def copy_shared_parameters(src, dst):
'''
copy the parameters from src to dst
......@@ -52,11 +55,9 @@ def copy_shared_parameters(src, dst):
:param dst: the destination of the parameters
:type dst: GradientMachine
'''
src_params = [src.getParameter(i)
for i in xrange(src.getParameterSize())]
src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
src_params = dict([(p.getName(), p) for p in src_params])
for i in xrange(dst.getParameterSize()):
dst_param = dst.getParameter(i)
src_param = src_params.get(dst_param.getName(), None)
......@@ -67,15 +68,17 @@ def copy_shared_parameters(src, dst):
CHECK_EQ(len(src_value), len(dst_value))
dst_value.copyFrom(src_value)
dst_param.setValueUpdated()
def print_parameters(src):
src_params = [src.getParameter(i)
for i in xrange(src.getParameterSize())]
src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
print "***************"
for p in src_params:
print "Name is %s" % p.getName()
print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray()
print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray(
)
def load_mnist_data(imageFile):
f = open(imageFile, "rb")
......@@ -86,33 +89,36 @@ def load_mnist_data(imageFile):
n = 60000
else:
n = 10000
data = numpy.fromfile(f, 'ubyte', count=n*28*28).reshape((n, 28*28))
data = numpy.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
data = data / 255.0 * 2.0 - 1.0
f.close()
return data.astype('float32')
def load_cifar_data(cifar_path):
batch_size = 10000
data = numpy.zeros((5*batch_size, 32*32*3), dtype = "float32")
data = numpy.zeros((5 * batch_size, 32 * 32 * 3), dtype="float32")
for i in range(1, 6):
file = cifar_path + "/data_batch_" + str(i)
fo = open(file, 'rb')
dict = cPickle.load(fo)
fo.close()
data[(i - 1)*batch_size:(i*batch_size), :] = dict["data"]
data[(i - 1) * batch_size:(i * batch_size), :] = dict["data"]
data = data / 255.0 * 2.0 - 1.0
return data
# synthesize 2-D uniform data
def load_uniform_data():
data = numpy.random.rand(1000000, 2).astype('float32')
return data
def merge(images, size):
if images.shape[1] == 28*28:
if images.shape[1] == 28 * 28:
h, w, c = 28, 28, 1
else:
h, w, c = 32, 32, 3
......@@ -124,6 +130,7 @@ def merge(images, size):
((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
return img.astype('uint8')
def save_images(images, path):
merged_img = merge(images, [8, 8])
if merged_img.shape[2] == 1:
......@@ -131,14 +138,17 @@ def save_images(images, path):
else:
im = Image.fromarray(merged_img, mode="RGB")
im.save(path)
def get_real_samples(batch_size, data_np):
return data_np[numpy.random.choice(data_np.shape[0], batch_size,
replace=False),:]
return data_np[numpy.random.choice(
data_np.shape[0], batch_size, replace=False), :]
def get_noise(batch_size, noise_dim):
return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
def get_fake_samples(generator_machine, batch_size, noise):
gen_inputs = api.Arguments.createArguments(1)
gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
......@@ -147,12 +157,14 @@ def get_fake_samples(generator_machine, batch_size, noise):
fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
return fake_samples
def get_training_loss(training_machine, inputs):
outputs = api.Arguments.createArguments(0)
training_machine.forward(inputs, outputs, api.PASS_TEST)
loss = outputs.getSlotValue(0).copyToNumpyMat()
return numpy.mean(loss)
def prepare_discriminator_data_batch_pos(batch_size, data_np):
real_samples = get_real_samples(batch_size, data_np)
labels = numpy.ones(batch_size, dtype='int32')
......@@ -161,6 +173,7 @@ def prepare_discriminator_data_batch_pos(batch_size, data_np):
inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
return inputs
def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
fake_samples = get_fake_samples(generator_machine, batch_size, noise)
labels = numpy.zeros(batch_size, dtype='int32')
......@@ -169,6 +182,7 @@ def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
return inputs
def prepare_generator_data_batch(batch_size, noise):
label = numpy.ones(batch_size, dtype='int32')
inputs = api.Arguments.createArguments(2)
......@@ -193,10 +207,9 @@ def get_layer_size(model_conf, layer_name):
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
parser.add_argument("--use_gpu", default="1",
help="1 means use gpu for training")
parser.add_argument("--gpu_id", default="0",
help="the gpu_id parameter")
parser.add_argument(
"--use_gpu", default="1", help="1 means use gpu for training")
parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
args = parser.parse_args()
data_source = args.data_source
use_gpu = args.use_gpu
......@@ -208,30 +221,32 @@ def main():
if not os.path.exists("./%s_params/" % data_source):
os.makedirs("./%s_params/" % data_source)
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100',
'--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source)
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
'--log_period=100', '--gpu_id=' + args.gpu_id,
'--save_dir=' + "./%s_params/" % data_source)
if data_source == "uniform":
conf = "gan_conf.py"
num_iter = 10000
else:
conf = "gan_conf_image.py"
num_iter = 1000
gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
dis_conf = parse_config(conf,
"mode=discriminator_training,data=" + data_source)
generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
batch_size = dis_conf.opt_config.batch_size
noise_dim = get_layer_size(gen_conf.model_config, "noise")
if data_source == "mnist":
data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte")
elif data_source == "cifar":
data_np = load_cifar_data("./data/cifar-10-batches-py/")
else:
data_np = load_uniform_data()
# this creates a gradient machine for discriminator
dis_training_machine = api.GradientMachine.createFromConfigProto(
dis_conf.model_config)
......@@ -244,26 +259,24 @@ def main():
logger.info(str(generator_conf.model_config))
generator_machine = api.GradientMachine.createFromConfigProto(
generator_conf.model_config)
dis_trainer = api.Trainer.create(
dis_conf, dis_training_machine)
gen_trainer = api.Trainer.create(
gen_conf, gen_training_machine)
dis_trainer = api.Trainer.create(dis_conf, dis_training_machine)
gen_trainer = api.Trainer.create(gen_conf, gen_training_machine)
dis_trainer.startTrain()
gen_trainer.startTrain()
# Sync parameters between networks (GradientMachine) at the beginning
copy_shared_parameters(gen_training_machine, dis_training_machine)
copy_shared_parameters(gen_training_machine, generator_machine)
# constrain that either discriminator or generator can not be trained
# consecutively more than MAX_strike times
curr_train = "dis"
curr_strike = 0
MAX_strike = 5
for train_pass in xrange(100):
dis_trainer.startTrainPass()
gen_trainer.startTrainPass()
......@@ -272,23 +285,25 @@ def main():
noise = get_noise(batch_size, noise_dim)
data_batch_dis_pos = prepare_discriminator_data_batch_pos(
batch_size, data_np)
dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos)
dis_loss_pos = get_training_loss(dis_training_machine,
data_batch_dis_pos)
data_batch_dis_neg = prepare_discriminator_data_batch_neg(
generator_machine, batch_size, noise)
dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg)
dis_loss_neg = get_training_loss(dis_training_machine,
data_batch_dis_neg)
dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
# Do forward pass in generator to get the gen_loss
data_batch_gen = prepare_generator_data_batch(
batch_size, noise)
data_batch_gen = prepare_generator_data_batch(batch_size, noise)
gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
if i % 100 == 0:
print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg)
print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos,
dis_loss_neg)
print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss)
# Decide which network to train based on the training history
# And the relative size of the loss
if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \
......@@ -297,11 +312,12 @@ def main():
curr_strike += 1
else:
curr_train = "dis"
curr_strike = 1
curr_strike = 1
dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
copy_shared_parameters(dis_training_machine, gen_training_machine)
dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
copy_shared_parameters(dis_training_machine,
gen_training_machine)
else:
if curr_train == "gen":
curr_strike += 1
......@@ -311,19 +327,23 @@ def main():
gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
# TODO: add API for paddle to allow true parameter sharing between different GradientMachines
# so that we do not need to copy shared parameters.
copy_shared_parameters(gen_training_machine, dis_training_machine)
copy_shared_parameters(gen_training_machine,
dis_training_machine)
copy_shared_parameters(gen_training_machine, generator_machine)
dis_trainer.finishTrainPass()
gen_trainer.finishTrainPass()
# At the end of each pass, save the generated samples/images
fake_samples = get_fake_samples(generator_machine, batch_size, noise)
if data_source == "uniform":
plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" %
(data_source, train_pass))
else:
save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
save_images(fake_samples, "./%s_samples/train_pass%s.png" %
(data_source, train_pass))
dis_trainer.finishTrain()
gen_trainer.finishTrain()
if __name__ == '__main__':
main()
......@@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This configuration is a demonstration of how to implement the stacked LSTM
with residual connections, i.e. an LSTM layer takes the sum of the hidden states
......@@ -46,11 +45,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
define_py_data_sources2(
train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
......@@ -58,10 +58,9 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
gradient_clipping_threshold=25)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
......@@ -73,17 +72,15 @@ for i in range(3):
# The input to the current layer is the sum of the hidden state
# and input of the previous layer.
current_input = addto_layer(input=[previous_input, previous_hidden_state])
hidden_state = simple_lstm(input=current_input, size=128,
lstm_cell_attr=ExtraAttr(drop_rate=0.1))
hidden_state = simple_lstm(
input=current_input, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
previous_input, previous_hidden_state = current_input, hidden_state
lstm = previous_hidden_state
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2,
bias_attr=bias_attr,
act=SoftmaxActivation())
output = fc_layer(
input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
......
......@@ -33,7 +33,7 @@ def extract_dict_features(pair_file, feature_file):
ctx_n1 = sentence_list[verb_index - 1]
else:
ctx_n1 = 'bos'
if verb_index > 1:
mark[verb_index - 2] = 1
ctx_n2 = sentence_list[verb_index - 2]
......@@ -48,7 +48,7 @@ def extract_dict_features(pair_file, feature_file):
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels_list) - 3:
mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2]
......@@ -69,7 +69,6 @@ def extract_dict_features(pair_file, feature_file):
feature_out.write(feature_str + '\n')
if __name__ == '__main__':
usage = '-p pair_file -f feature_file'
......
......@@ -66,8 +66,8 @@ def transform_labels(sentences, labels):
else:
verb_list = []
for x in labels[i][0]:
if x !='-':
verb_list.append(x)
if x != '-':
verb_list.append(x)
for j in xrange(1, len(labels[i])):
label_list = labels[i][j]
......@@ -93,7 +93,7 @@ def transform_labels(sentences, labels):
is_in_bracket = True
else:
print 'error:', ll
sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq))
return sen_lab_pair
......@@ -103,7 +103,7 @@ def write_file(sen_lab_pair, output_file):
sentence = x[0]
label_seq = ' '.join(x[2])
assert len(sentence.split()) == len(x[2])
fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')
fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n')
if __name__ == '__main__':
......
......@@ -21,7 +21,7 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
settings.predicate_dict = predicate_dict
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
......@@ -29,25 +29,28 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(2),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
integer_value_sequence(len(label_dict))
]
def get_batch_size(yeild_data):
return len(yeild_data[0])
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
@provider(
init_hook=hook,
should_shuffle=True,
calc_batch_size=get_batch_size,
can_over_batch_size=False,
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
......
......@@ -20,7 +20,7 @@ from paddle.trainer_config_helpers import *
#file paths
word_dict_file = './data/wordDict.txt'
label_dict_file = './data/targetDict.txt'
predicate_file= './data/verbDict.txt'
predicate_file = './data/verbDict.txt'
train_list_file = './data/train.list'
test_list_file = './data/test.list'
......@@ -47,7 +47,6 @@ if not is_predict:
w = line.strip()
predicate_dict[w] = i
if is_test:
train_list_file = None
......@@ -57,9 +56,11 @@ if not is_predict:
test_list=test_list_file,
module='dataprovider',
obj='process',
args={'word_dict': word_dict,
'label_dict': label_dict,
'predicate_dict': predicate_dict })
args={
'word_dict': word_dict,
'label_dict': label_dict,
'predicate_dict': predicate_dict
})
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
......@@ -77,24 +78,16 @@ mark_dim = 5
hidden_dim = 512
depth = 8
########################### Optimizer #######################################
settings(
batch_size=150,
learning_method=MomentumOptimizer(momentum=0),
learning_rate=2e-2,
regularization=L2Regularization(8e-4),
is_async=False,
model_average=ModelAverage(average_window=0.5,
max_average_window=10000),
)
model_average=ModelAverage(
average_window=0.5, max_average_window=10000), )
####################################### network ##############################
#8 features and 1 target
......@@ -108,22 +101,28 @@ ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
default_std=1/math.sqrt(hidden_dim)/3.0
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
std_0 = ParameterAttribute(initial_std=0.)
std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(
size=word_dim,
input=predicate,
param_attr=ParameterAttribute(
name='vemb', initial_std=default_std))
mark_embedding = embedding_layer(
name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
embedding_layer(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
......@@ -131,84 +130,89 @@ hidden_0 = mixed_layer(
name='hidden0',
size=hidden_dim,
bias_attr=std_default,
input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
input=[
full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
hidden_para_attr = ParameterAttribute(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(
name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = mixed_layer(name='hidden'+str(i),
size=hidden_dim,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
]
)
lstm = lstmemory(name='lstm'+str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2)==1),
bias_attr=std_0,
param_attr=lstm_para_attr)
mix_hidden = mixed_layer(
name='hidden' + str(i),
size=hidden_dim,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = lstmemory(
name='lstm' + str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = mixed_layer(name='output',
size=label_dict_len,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
],
)
feature_out = mixed_layer(
name='output',
size=label_dict_len,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
if not is_predict:
crf_l = crf_layer( name = 'crf',
size = label_dict_len,
input = feature_out,
label = target,
param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
)
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
size = label_dict_len,
input = feature_out,
label = target,
param_attr=ParameterAttribute(name='crfw')
)
crf_l = crf_layer(
name='crf',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(name='crfw'))
eval = sum_evaluator(input=crf_dec_l)
outputs(crf_l)
else:
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
size = label_dict_len,
input = feature_out,
param_attr=ParameterAttribute(name='crfw')
)
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
param_attr=ParameterAttribute(name='crfw'))
outputs(crf_dec_l)
......@@ -26,7 +26,8 @@ UNK_IDX = 0
class Prediction():
def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
def __init__(self, train_conf, dict_file, model_dir, label_file,
predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
......@@ -35,7 +36,7 @@ class Prediction():
self.dict = {}
self.labels = {}
self.predicate_dict={}
self.predicate_dict = {}
self.labels_reverse = {}
self.load_dict_label(dict_file, label_file, predicate_dict_file)
......@@ -44,25 +45,18 @@ class Prediction():
len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf,
'dict_len=' + str(len_dict) +
',label_len=' + str(len_label) +
',pred_len=' + str(len_pred) +
',is_predict=True')
train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(model_dir)
slots = [
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_pred),
integer_value_sequence(2)
]
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_pred), integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
def load_dict_label(self, dict_file, label_file, predicate_dict_file):
......@@ -78,6 +72,7 @@ class Prediction():
for line_count, line in enumerate(open(predicate_dict_file, 'r')):
self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
......@@ -88,9 +83,10 @@ class Prediction():
).split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
] * sen_len
ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
......@@ -99,7 +95,7 @@ class Prediction():
marks = mark.split()
mark_slot = [int(w) for w in marks]
yield word_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
......@@ -123,8 +119,9 @@ class Prediction():
def option_parser():
usage = ("python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
usage = (
"python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-c",
......@@ -187,8 +184,9 @@ def main():
output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
predict.predict(data_file,output_file)
predict = Prediction(train_conf, dict_file, model_path, label_file,
predict_dict_file)
predict.predict(data_file, output_file)
if __name__ == '__main__':
......
......@@ -71,9 +71,7 @@ class SentimentPrediction():
transform word into integer index according to the dictionary.
"""
words = data.strip().split()
word_slot = [
self.word_dict[w] for w in words if w in self.word_dict
]
word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
return word_slot
def batch_predict(self, data_batch):
......@@ -85,8 +83,8 @@ class SentimentPrediction():
if self.label is None:
print("predicting label is %d" % (lab[0]))
else:
print("predicting label is %s" %
(self.label[lab[0]]))
print("predicting label is %s" % (self.label[lab[0]]))
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
......@@ -143,9 +141,10 @@ def main():
batch.append([predict.get_index(line)])
if len(batch) == batch_size:
predict.batch_predict(batch)
batch=[]
batch = []
if len(batch) > 0:
predict.batch_predict(batch)
if __name__ == '__main__':
main()
......@@ -14,6 +14,13 @@ cd paddle
git submodule update --init --recursive
```
If you already have a local PaddlePaddle repo and have not initialized the submodule, your local submodule folder will be empty. You can simply run the last line of the above codes in your PaddlePaddle home directory to initialize your submodule folder.
If you have already initialized your submodule and you would like to sync with the upstream submodule repo, you can run the following command
```
git submodule update --remote
```
## <span id="requirements">Requirements</span>
To compile the source code, your computer must be equipped with the following dependencies.
......
......@@ -122,9 +122,9 @@ The general development workflow with Docker and Bazel is as follows:
git clone --recursive https://github.com/paddlepaddle/paddle
2. Build a development Docker image `paddle:dev` from the source code.
This image contains all the development tools and dependencies of
PaddlePaddle.
2. Build a development Docker image :code:`paddle:dev` from the source
code. This image contains all the development tools and
dependencies of PaddlePaddle.
.. code-block:: bash
......@@ -139,14 +139,22 @@ The general development workflow with Docker and Bazel is as follows:
.. code-block:: bash
docker run \
-d # run the container in background mode \
--name paddle # we can run a nginx container to serve documents \
-p 2022:22 # so we can SSH into this container \
-v $PWD:/paddle # mount the source code \
-v $HOME/.cache/bazel:/root/.cache/bazel # mount Bazel cache \
docker run \
-d \
--name paddle \
-p 2022:22 \
-v $PWD:/paddle \
-v $HOME/.cache/bazel:/root/.cache/bazel \
paddle:dev
where :code:`-d` makes the container running in background,
:code:`--name paddle` allows us to run a nginx container to serve
documents in this container, :code:`-p 2022:22` allows us to SSH
into this container, :code:`-v $PWD:/paddle` shares the source code
on the host with the container, :code:`-v
$HOME/.cache/bazel:/root/.cache/bazel` shares Bazel cache on the
host with the container.
4. SSH into the container:
.. code-block:: bash
......
......@@ -306,4 +306,4 @@ I1116 09:10:18.019069 50 ParameterClient2.cpp:122] pserver 2 192.168.223.143:
I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165
I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164
I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165
```
\ No newline at end of file
```
......@@ -40,4 +40,4 @@ spec:
- name: jobpath
mountPath: /home/jobpath
restartPolicy: Never
\ No newline at end of file
......@@ -19,7 +19,6 @@ import socket
import os
import argparse
# configuration for cluster
API = "/api/v1/namespaces/"
JOBSELECTOR = "labelSelector=job-name="
......@@ -145,8 +144,8 @@ def startPaddle(idMap={}, train_args_dict=None):
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog="start_paddle.py",
description='simple tool for k8s')
parser = argparse.ArgumentParser(
prog="start_paddle.py", description='simple tool for k8s')
args, train_args_list = parser.parse_known_args()
train_args = refine_unknown_args(train_args_list)
train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2]))
......
情感分析教程
===========================
.. toctree::
:maxdepth: 3
:glob:
情感分析教程
===========================
.. toctree::
:maxdepth: 3
:glob:
Training Locally <sentiment_analysis.md>
\ No newline at end of file
......@@ -28,4 +28,4 @@ $(document).ready(function(){
$('.doc-menu-vertical').find('li.current').last().addClass('active');
$('.doc-menu-vertical').perfectScrollbar();
});
\ No newline at end of file
});
......@@ -15,8 +15,8 @@ limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "Internal.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
std::vector<int> GradientMachine::defaultParamTypes = {
PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM};
......
......@@ -16,14 +16,13 @@ limitations under the License. */
#include "PaddleAPI.h"
#include <vector>
#include <algorithm>
#include <vector>
template <typename T1, typename T2>
void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) {
dest->resize(src.size());
std::transform(src.begin(),
src.end(),
dest->begin(),
[](T1 t) { return static_cast<T2>(t); });
std::transform(src.begin(), src.end(), dest->begin(), [](T1 t) {
return static_cast<T2>(t);
});
}
......@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/CpuSparseMatrix.h"
#include <iostream>
#include <cstring>
#include <iostream>
#include "PaddleAPI.h"
#include "paddle/math/CpuSparseMatrix.h"
#include "paddle/math/SparseMatrix.h"
struct MatrixPrivate {
std::shared_ptr<paddle::Matrix> mat;
......
......@@ -16,8 +16,8 @@ limitations under the License. */
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <stdexcept>
#include <string>
#include <vector>
#include "paddle/utils/GlobalConstants.h"
#include "paddle/utils/TypeDefs.h"
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/parameter/Parameter.h"
#include "PaddleAPI.h"
struct ParameterPrivate {
std::shared_ptr<paddle::Parameter> sharedPtr;
......
......@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#include "paddle/parameter/ParameterOptimizer.h"
#include "Internal.h"
#include <algorithm>
#include "Internal.h"
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
struct ParameterOptimizerPrivate {
std::unique_ptr<paddle::ParameterOptimizer> optimizer;
......@@ -36,16 +36,13 @@ struct ParameterTraverseCallbackPrivate {
size_t sparseId) {
std::vector<paddle::VectorPtr> real_vecs;
real_vecs.resize(vecs.size());
std::transform(vecs.begin(),
vecs.end(),
real_vecs.begin(),
[](Vector* v) {
if (v) {
return *(paddle::VectorPtr*)(v->getSharedPtr());
} else {
return paddle::VectorPtr();
}
});
std::transform(vecs.begin(), vecs.end(), real_vecs.begin(), [](Vector* v) {
if (v) {
return *(paddle::VectorPtr*)(v->getSharedPtr());
} else {
return paddle::VectorPtr();
}
});
paddle::ParameterConfig& real_conf =
*(paddle::ParameterConfig*)(const_cast<ParameterConfig&>(conf)
......
......@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <iterator>
#include <sstream>
#include <vector>
#include "PaddleAPI.h"
#include "paddle/gserver/gradientmachines/GradientMachine.h"
#include "paddle/parameter/Argument.h"
#include "paddle/utils/Flags.h"
#include <vector>
#include <sstream>
#include <algorithm>
#include <iterator>
// used to represent partial sequence
struct Path {
......
......@@ -16,12 +16,12 @@ limitations under the License. */
#include "PaddleAPIPrivate.h"
#include <stdlib.h>
#include <memory>
#include <atomic>
#include <memory>
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "paddle/trainer/ParamUtil.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "paddle/trainer/TrainerInternal.h"
#include "paddle/utils/Flags.h"
......
......@@ -14,16 +14,16 @@ limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/PythonUtil.h"
#include "paddle/utils/Flags.h"
#include "paddle/utils/Excepts.h"
#include "paddle/parameter/Parameter.h"
#include "paddle/utils/Excepts.h"
#include "paddle/utils/Flags.h"
#include "paddle/utils/PythonUtil.h"
#include "paddle/utils/Util.h"
#include <fenv.h>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <algorithm>
void initPaddle(int argc, char** argv) {
paddle::initMain(argc, argv);
......
......@@ -282,7 +282,7 @@ FloatArray Vector::getData() const {
}
void Vector::copyFrom(Vector* src) throw(RangeError) {
if (src->m->vec->getSize() != m->vec->getSize()) {
if (src->m->vec->getSize() != m->vec->getSize()) {
throw RangeError();
}
m->vec->copyFrom(*src->m->vec);
......
......@@ -100,11 +100,12 @@ class TestMatrix(unittest.TestCase):
for a, e in zip(gpu_m.getData(), [1.0, 3.23, 3.0, 4.0, 5.0, 6.0]):
self.assertAlmostEqual(a, e)
def test_numpy(self):
numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32")
m = swig_paddle.Matrix.createDenseFromNumpy(numpy_mat)
self.assertEqual((int(m.getHeight()), int(m.getWidth())), numpy_mat.shape)
self.assertEqual((int(m.getHeight()), int(m.getWidth())),
numpy_mat.shape)
self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
for a, e in zip(m.getData(), [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]):
self.assertAlmostEqual(a, e)
......
......@@ -26,17 +26,17 @@ class TestIVector(unittest.TestCase):
self.assertEqual(m[i], 0)
m[i] = i
self.assertEqual(m[i], i)
m = swig_paddle.IVector.createZero(10)
self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
self.assertEqual(m.getData(), [0]*10)
self.assertEqual(m.getData(), [0] * 10)
def test_create(self):
m = swig_paddle.IVector.create(range(10), False)
self.assertIsNotNone(m)
for i in xrange(10):
self.assertEqual(m[i], i)
m = swig_paddle.IVector.create(range(10))
self.assertEqual(m.isGpu(), swig_paddle.isUsingGpu())
self.assertEqual(m.getData(), range(10))
......@@ -69,7 +69,7 @@ class TestIVector(unittest.TestCase):
expect_vec = range(0, 10)
expect_vec[4] = 7
self.assertEqual(vec.getData(), expect_vec)
def test_numpy(self):
vec = np.array([1, 3, 4, 65, 78, 1, 4], dtype="int32")
iv = swig_paddle.IVector.createVectorFromNumpy(vec)
......@@ -85,10 +85,10 @@ class TestVector(unittest.TestCase):
self.assertTrue(util.doubleEqual(v[i], 0))
v[i] = i
self.assertTrue(util.doubleEqual(v[i], i))
v = swig_paddle.Vector.createZero(10)
self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu())
self.assertEqual(v.getData(), [0]*10)
self.assertEqual(v.getData(), [0] * 10)
def testCreate(self):
v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)], False)
......@@ -96,14 +96,13 @@ class TestVector(unittest.TestCase):
for i in xrange(len(v)):
self.assertTrue(util.doubleEqual(v[i], i / 100.0))
self.assertEqual(100, len(v))
v = swig_paddle.Vector.create([x / 100.0 for x in xrange(100)])
self.assertEqual(v.isGpu(), swig_paddle.isUsingGpu())
self.assertEqual(100, len(v))
vdata = v.getData()
for i in xrange(len(v)):
self.assertTrue(util.doubleEqual(vdata[i], i / 100.0))
def testCpuNumpy(self):
numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32")
......@@ -128,7 +127,7 @@ class TestVector(unittest.TestCase):
for i in xrange(1, len(numpy_3)):
util.doubleEqual(numpy_3[i], vec[i])
def testNumpy(self):
numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32")
vec = swig_paddle.Vector.createVectorFromNumpy(numpy_arr)
......@@ -136,7 +135,6 @@ class TestVector(unittest.TestCase):
vecData = vec.getData()
for n, v in zip(numpy_arr, vecData):
self.assertTrue(util.doubleEqual(n, v))
def testCopyFromNumpy(self):
vec = swig_paddle.Vector.createZero(1, False)
......
......@@ -223,9 +223,9 @@ typedef struct {
#ifdef __NVCC__
#include "paddle/utils/Logging.h"
#include "hl_cuda.h"
#include "cuda_runtime.h"
#include "hl_cuda.h"
#include "paddle/utils/Logging.h"
extern __thread bool g_sync_flag;
extern __thread cudaStream_t default_stream;
......
......@@ -16,8 +16,8 @@ limitations under the License. */
#define HL_DSO_LOADER_H_
#include <dlfcn.h>
#include <string>
#include <memory>
#include <string>
#include "hl_base.h"
/**
......
......@@ -15,28 +15,28 @@ limitations under the License. */
#ifndef HL_GPU_H_
#define HL_GPU_H_
#include "hl_aggregate.h"
#include "hl_base.h"
#include "hl_cnn.h"
#include "hl_cuda.h"
#include "hl_cuda_cublas.h"
#include "hl_cuda_cudnn.h"
#include "hl_matrix.h"
#include "hl_aggregate.h"
#include "hl_cnn.h"
#include "hl_sparse.h"
#include "hl_lstm.h"
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
#include "hl_warpctc_wrap.h"
#ifdef HPPL_STUB_FUNC
#include "stub/hl_cuda_stub.h"
#include "stub/hl_cuda_cublas_stub.h"
#include "stub/hl_cuda_cudnn_stub.h"
#include "stub/hl_matrix_stub.h"
#include "stub/hl_aggregate_stub.h"
#include "stub/hl_cnn_stub.h"
#include "stub/hl_sparse_stub.h"
#include "stub/hl_cuda_cublas_stub.h"
#include "stub/hl_cuda_cudnn_stub.h"
#include "stub/hl_cuda_stub.h"
#include "stub/hl_lstm_stub.h"
#include "stub/hl_matrix_stub.h"
#include "stub/hl_sequence_stub.h"
#include "stub/hl_sparse_stub.h"
#endif
#endif /* HL_GPU_H_ */
......@@ -14,7 +14,7 @@ limitations under the License. */
#ifndef HL_TIME_H_
#define HL_TIME_H_
#include <cstdint>
/**
* @brief High resolution timer.
*
......
......@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_cuda_cublas.h"
#include <sys/time.h>
#include <mutex>
#include "hl_cuda.h"
#include "hl_cuda_cublas.h"
#include "hl_thread.ph"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/Logging.h"
namespace dynload {
......
......@@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_cuda_cudnn.h"
#include <cudnn.h>
#include <mutex>
#include "hl_cuda_cudnn.h"
#include "hl_cuda_cudnn.ph"
#include "hl_thread.ph"
#include "hl_dso_loader.h"
#include "paddle/utils/Logging.h"
#include "hl_thread.ph"
#include "paddle/utils/CommandLineParser.h"
#include "paddle/utils/Logging.h"
P_DEFINE_int32(cudnn_conv_workspace_limit_in_mb,
4096,
......
......@@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_cuda.h"
#include <cuda_profiler_api.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <unistd.h>
#include <mutex>
#include "hl_cuda.h"
#include "hl_cuda.ph"
#include "hl_dso_loader.h"
#include "hl_thread.ph"
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#ifdef PADDLE_USE_DSO
#include <mutex>
#include <cuda_runtime.h>
#include <mutex>
#include "hl_dso_loader.h"
/**
......
......@@ -12,10 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <chrono>
#include "hl_time.h"
#include <stdlib.h>
#include <chrono>
#include <cstdint>
#include <iostream>
#include "hl_time.h"
using std::chrono::high_resolution_clock;
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <mutex>
#include "hl_warpctc_wrap.h"
#include <mutex>
#include "hl_dso_loader.h"
#include "paddle/utils/Logging.h"
......
......@@ -15,13 +15,13 @@ limitations under the License. */
#include "ActivationFunction.h"
#include <algorithm>
#include <memory>
#include <iostream>
#include <type_traits>
#include <memory>
#include <string>
#include <thread>
#include "paddle/utils/ClassRegistrar.h"
#include <type_traits>
#include "paddle/parameter/Argument.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Logging.h"
......
......@@ -14,12 +14,12 @@ limitations under the License. */
#include "DataProvider.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Logging.h"
#include <algorithm>
#include <unistd.h>
#include <algorithm>
#include "ProtoDataProvider.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
namespace paddle {
......
......@@ -14,28 +14,28 @@ limitations under the License. */
#pragma once
#include <vector>
#include <memory>
#include <mutex>
#include <iostream>
#include <fstream>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fstream>
#include <iostream>
#include <memory>
#include <mutex>
#include <vector>
#include "DataConfig.pb.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/Vector.h"
#include "paddle/parameter/Argument.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Locks.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Queue.h"
#include "paddle/utils/Locks.h"
#include "paddle/utils/ThreadLocal.h"
#include "paddle/utils/TypeDefs.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Util.h"
#include "paddle/math/Vector.h"
#include "DataConfig.pb.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/parameter/Argument.h"
namespace paddle {
/**
......
......@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Util.h"
#include "MultiDataProvider.h"
#include "paddle/utils/Logging.h"
#include <algorithm>
#include "paddle/utils/Logging.h"
#include "paddle/utils/Util.h"
namespace paddle {
......
......@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "ProtoDataProvider.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/StringUtil.h"
#include <algorithm>
#include <fstream>
#include <istream>
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/Logging.h"
#include "DataProviderGroup.h"
#include "paddle/utils/Logging.h"
P_DEFINE_double(memory_threshold_on_load_data,
1.0,
......@@ -562,16 +562,16 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
auto mat = cpuArguments[slot].value;
mat->resize(size, dim);
if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
->copyFrom(dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseNonValueData.data(),
HPPL_STREAM_1);
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseNonValueData.data(),
HPPL_STREAM_1);
} else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
->copyFrom(dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseNonValueData.data());
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseNonValueData.data());
} else {
LOG(FATAL) << "Not Supported";
}
......@@ -598,16 +598,16 @@ int64_t ProtoDataProvider::getNextBatchInternal(int64_t size,
auto mat = cpuArguments[slot].value;
mat->resize(size, dim);
if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
->copyFrom(dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseFloatValueData.data(),
HPPL_STREAM_1);
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseFloatValueData.data(),
HPPL_STREAM_1);
} else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
->copyFrom(dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseFloatValueData.data());
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
dataPos.data(),
slots_[slot].indices.data(),
slots_[slot].sparseFloatValueData.data());
} else {
LOG(FATAL) << "Not Supported";
}
......
......@@ -16,8 +16,8 @@ limitations under the License. */
#include <vector>
#include "paddle/utils/Stat.h"
#include "DataFormat.pb.h"
#include "paddle/utils/Stat.h"
#include "DataProvider.h"
#include "ProtoReader.h"
......
......@@ -16,10 +16,10 @@ limitations under the License. */
#include <memory>
#include <google/protobuf/message_lite.h>
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/io/gzip_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/message_lite.h>
namespace paddle {
......
......@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "PyDataProvider.h"
#include "paddle/utils/PythonUtil.h"
#include <fenv.h>
#include "paddle/utils/Util.h"
#include "paddle/utils/Excepts.h"
#include "paddle/utils/PythonUtil.h"
#include "paddle/utils/Util.h"
namespace paddle {
......@@ -316,16 +316,16 @@ void PyDataProvider::handleSparseNonValueSlot(
auto mat = cpuArguments[slotIndex].value;
mat->resize(slot.sampleNum, dim, slot.sampleNum, NO_VALUE, SPARSE_CSR);
if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
->copyFrom(slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseNonValueData.data(),
HPPL_STREAM_1);
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseNonValueData.data(),
HPPL_STREAM_1);
} else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
->copyFrom(slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseNonValueData.data());
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseNonValueData.data());
} else {
LOG(FATAL) << "Not Supported";
}
......@@ -347,16 +347,16 @@ void PyDataProvider::handleSparseValueSlot(
auto mat = cpuArguments[slotIndex].value;
mat->resize(slot.sampleNum, dim, slot.sampleNum, FLOAT_VALUE, SPARSE_CSR);
if (std::dynamic_pointer_cast<GpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)
->copyFrom(slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseFloatValueData.data(),
HPPL_STREAM_DEFAULT);
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseFloatValueData.data(),
HPPL_STREAM_DEFAULT);
} else if (std::dynamic_pointer_cast<CpuSparseMatrix>(mat)) {
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
->copyFrom(slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseFloatValueData.data());
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)->copyFrom(
slot.sampleSequenceIdVec.data(),
slot.indices.data(),
slot.sparseFloatValueData.data());
} else {
LOG(FATAL) << "Not Supported";
}
......
......@@ -15,18 +15,18 @@ limitations under the License. */
#ifndef PADDLE_NO_PYTHON
#include <Python.h>
#include <numpy/numpyconfig.h>
#include <stdio.h>
#include <stdlib.h>
#include <unordered_set>
#include <list>
#include <numpy/numpyconfig.h>
#include <unordered_set>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/ndarrayobject.h>
#include "DataProvider.h"
#include "paddle/utils/PythonUtil.h"
#include "paddle/utils/Locks.h"
#include "paddle/utils/PythonUtil.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......@@ -400,10 +400,9 @@ private:
if (this->loadThread_) { // wait poolActualSize < poolSize;
std::unique_lock<std::mutex> l(mtx_);
pushCV_.wait(l,
[this, additionalBatchSize] {
return this->poolActualSize_ < poolSize_;
});
pushCV_.wait(l, [this, additionalBatchSize] {
return this->poolActualSize_ < poolSize_;
});
}
{
......@@ -529,12 +528,10 @@ public:
// but, loading from cache, cache object should ensure
// data pool ready.
std::unique_lock<std::mutex> l(mtx_);
pullCV_.wait(l,
[this, &size] {
return this->poolActualSize_ >=
std::max(size, this->minPoolSize_) ||
callingContexts_.empty();
});
pullCV_.wait(l, [this, &size] {
return this->poolActualSize_ >= std::max(size, this->minPoolSize_) ||
callingContexts_.empty();
});
if (unittest::OnPoolFilled) {
(*unittest::OnPoolFilled)(this->poolActualSize_);
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "paddle/gserver/evaluators/Evaluator.h"
#include "paddle/utils/Stat.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
......@@ -842,9 +842,9 @@ void PnpairEvaluator::calc(std::vector<PredictionResult>& predictArray) {
auto start = predictArray.begin();
while (start != predictArray.end()) {
auto end = std::find_if(
start + 1,
predictArray.end(),
[=](const PredictionResult& x) { return x.queryid != start->queryid; });
start + 1, predictArray.end(), [=](const PredictionResult& x) {
return x.queryid != start->queryid;
});
CHECK(end != start);
stat(start - predictArray.begin(),
end - predictArray.begin(),
......
......@@ -14,11 +14,11 @@ limitations under the License. */
#pragma once
#include "paddle/pserver/ParameterClient2.h"
#include "paddle/utils/ClassRegistrar.h"
#include <fstream>
#include "ModelConfig.pb.h"
#include "paddle/parameter/Argument.h"
#include <fstream>
#include "paddle/pserver/ParameterClient2.h"
#include "paddle/utils/ClassRegistrar.h"
namespace paddle {
......
......@@ -14,16 +14,16 @@ limitations under the License. */
#include "GradientMachine.h"
#include "paddle/utils/Logging.h"
#include <fstream>
#include "paddle/utils/Logging.h"
#include "hl_gpu.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h"
#include "GradientMachineMode.h"
#include "MultiGradientMachine.h"
#include "NeuralNetwork.h"
#include "MultiNetwork.h"
#include "GradientMachineMode.h"
#include "NeuralNetwork.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h"
#include "hl_gpu.h"
namespace paddle {
......
......@@ -17,15 +17,15 @@ limitations under the License. */
#include <iostream>
#include <vector>
#include "paddle/math/Matrix.h"
#include "paddle/parameter/Parameter.h"
#include "paddle/parameter/ParameterUpdaterBase.h"
#include "paddle/utils/Thread.h"
#include "TrainerConfig.pb.h"
#include "ModelConfig.pb.h"
#include "TrainerConfig.pb.h"
#include "paddle/gserver/dataproviders/DataProvider.h"
#include "paddle/gserver/evaluators/Evaluator.h"
#include "paddle/gserver/layers/Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/parameter/Parameter.h"
#include "paddle/parameter/ParameterUpdaterBase.h"
#include "paddle/utils/Thread.h"
namespace paddle {
/**
......
......@@ -18,9 +18,9 @@ limitations under the License. */
#include "GradientMachine.h"
#include "paddle/utils/Queue.h"
#include "paddle/utils/Locks.h"
#include "hl_gpu.h"
#include "paddle/utils/Locks.h"
#include "paddle/utils/Queue.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
#include <algorithm>
#include "MultiNetwork.h"
......
......@@ -14,15 +14,15 @@ limitations under the License. */
#include "paddle/utils/Util.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/CustomStackTrace.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "hl_gpu.h"
#include "MultiNetwork.h"
#include "NeuralNetwork.h"
#include "RecurrentGradientMachine.h"
#include "MultiNetwork.h"
#include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
void parameterInitNN(int paramId,
......
......@@ -14,18 +14,18 @@ limitations under the License. */
#pragma once
#include <memory>
#include <map>
#include <functional>
#include <map>
#include <memory>
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/parameter/Parameter.h"
#include "ModelConfig.pb.h"
#include "paddle/gserver/dataproviders/DataProvider.h"
#include "paddle/gserver/gradientmachines/GradientMachine.h"
#include "paddle/gserver/layers/CostLayer.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/dataproviders/DataProvider.h"
#include "paddle/gserver/layers/Layer.h"
#include "paddle/parameter/Parameter.h"
#include "paddle/utils/ClassRegistrar.h"
namespace paddle {
/*
......@@ -57,14 +57,13 @@ void parameterInitNN(int paramId,
class NeuralNetwork : public GradientMachine {
public:
virtual void init(
const ModelConfig& config,
ParamInitCallback callback = nullptr,
const std::vector<ParameterType>&
parameterTypes = std::vector<ParameterType>{PARAMETER_VALUE,
PARAMETER_GRADIENT,
PARAMETER_MOMENTUM},
bool useGpu = FLAGS_use_gpu);
virtual void init(const ModelConfig& config,
ParamInitCallback callback = nullptr,
const std::vector<ParameterType>& parameterTypes =
std::vector<ParameterType>{PARAMETER_VALUE,
PARAMETER_GRADIENT,
PARAMETER_MOMENTUM},
bool useGpu = FLAGS_use_gpu);
/**
* Connect two submodels and
......
......@@ -37,14 +37,13 @@ public:
NeuralNetwork *rootNetwork = nullptr)
: NeuralNetwork(subModelName, rootNetwork) {}
virtual void init(
const ModelConfig &config,
ParamInitCallback callback = nullptr,
const std::vector<ParameterType>
&parameterTypes = std::vector<ParameterType>{PARAMETER_VALUE,
PARAMETER_GRADIENT,
PARAMETER_MOMENTUM},
bool useGpu = FLAGS_use_gpu);
virtual void init(const ModelConfig &config,
ParamInitCallback callback = nullptr,
const std::vector<ParameterType> &parameterTypes =
std::vector<ParameterType>{PARAMETER_VALUE,
PARAMETER_GRADIENT,
PARAMETER_MOMENTUM},
bool useGpu = FLAGS_use_gpu);
virtual void forward(const std::vector<Argument> &inArgs,
std::vector<Argument> *outArgs,
......
......@@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
#include "paddle/utils/Flags.h"
#include "RecurrentGradientMachine.h"
#include <dlfcn.h>
#include <algorithm>
#include <cmath>
#include <functional>
#include <dlfcn.h>
#include <limits>
#include <cmath>
#include "RecurrentGradientMachine.h"
#include "NeuralNetwork.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/Flags.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
P_DEFINE_string(diy_beam_search_prob_so, "", "the diy beam search cost so");
......@@ -78,20 +78,22 @@ static inline SymbolType loadDiySymbol(const char* symbolName) {
return reinterpret_cast<SymbolType>(sym);
}
static InitFunction __init__diy_prob_method([] {
std::string soName = FLAGS_diy_beam_search_prob_so;
if (!soName.empty()) {
gDiyProbHandle = dlopen(soName.c_str(), RTLD_LAZY);
CHECK(gDiyProbHandle) << "Cannot Open DIY Prob So " << soName;
atexit(exit_diy_prob);
gDiyProbMethod =
loadDiySymbol<decltype(gDiyProbMethod)>(DIY_CALC_PROB_SYMBOL_NAME);
gDiyProbStart =
loadDiySymbol<decltype(gDiyProbStart)>(DIY_START_CALC_PROB_SYMBOL_NAME);
gDiyProbStop =
loadDiySymbol<decltype(gDiyProbStop)>(DIY_FINISH_CALC_PROB_SYMBOL_NAME);
}
}, std::numeric_limits<int>::max());
static InitFunction __init__diy_prob_method(
[] {
std::string soName = FLAGS_diy_beam_search_prob_so;
if (!soName.empty()) {
gDiyProbHandle = dlopen(soName.c_str(), RTLD_LAZY);
CHECK(gDiyProbHandle) << "Cannot Open DIY Prob So " << soName;
atexit(exit_diy_prob);
gDiyProbMethod =
loadDiySymbol<decltype(gDiyProbMethod)>(DIY_CALC_PROB_SYMBOL_NAME);
gDiyProbStart = loadDiySymbol<decltype(gDiyProbStart)>(
DIY_START_CALC_PROB_SYMBOL_NAME);
gDiyProbStop = loadDiySymbol<decltype(gDiyProbStop)>(
DIY_FINISH_CALC_PROB_SYMBOL_NAME);
}
},
std::numeric_limits<int>::max());
class BeamSearchControlCallbacks {
public:
......@@ -1281,10 +1283,9 @@ void RecurrentGradientMachine::beamSearch(size_t batchSize) {
std::vector<std::vector<int>*> prefixes;
prefixes.resize(paths.size());
std::transform(
paths.begin(),
paths.end(),
prefixes.begin(),
[](const Path& p) { return const_cast<std::vector<int>*>(&p.ids); });
paths.begin(), paths.end(), prefixes.begin(), [](const Path& p) {
return const_cast<std::vector<int>*>(&p.ids);
});
beamSearchCtrlCallbacks_->beamSearchCandidateAdjust(
prefixes, frames_[machineCur].get(), i);
}
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#pragma once
#include <functional>
#include "GradientMachine.h"
#include "NeuralNetwork.h"
#include <functional>
#include "paddle/utils/Locks.h"
......
......@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "Layer.h"
#include "BatchNormBaseLayer.h"
#include "BatchNormalizationLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
#ifndef PADDLE_ONLY_CPU
#include "CudnnBatchNormLayer.h"
#endif
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once
#include "paddle/utils/Stat.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once
#include "Layer.h"
#include "BatchNormBaseLayer.h"
#include "Layer.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "Layer.h"
#include "Projection.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "ContextProjection.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "ConvBaseLayer.h"
#include "paddle/math/MathUtils.h"
#include "paddle/utils/Logging.h"
namespace paddle {
bool ConvBaseLayer::init(const LayerMap& layerMap,
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/math/Matrix.h"
#include "paddle/math/MathUtils.h"
#include "Operator.h"
#include "paddle/math/MathUtils.h"
#include "paddle/math/Matrix.h"
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "ConvProjection.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include "CostLayer.h"
#include <algorithm>
#include "paddle/utils/Logging.h"
#include <cmath>
#include "CostLayer.h"
#include <memory>
#include "paddle/utils/Logging.h"
#include "paddle/math/SparseMatrix.h"
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "Layer.h"
#include "CudnnBatchNormLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#pragma once
#include "paddle/utils/Stat.h"
#include "Layer.h"
#include "BatchNormBaseLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "CudnnConvLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "CudnnConvLayer.h"
namespace paddle {
......
......@@ -14,10 +14,10 @@ limitations under the License. */
#pragma once
#include <vector>
#include "ConvBaseLayer.h"
#include "paddle/math/Matrix.h"
#include "Projection.h"
#include <vector>
#include "paddle/math/Matrix.h"
namespace paddle {
......
......@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "CudnnPoolLayer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/math/Matrix.h"
#include "CudnnPoolLayer.h"
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "Layer.h"
#include "paddle/utils/Logging.h"
namespace paddle {
/**
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#pragma once
#include <vector>
#include "ConvBaseLayer.h"
#include "paddle/math/Matrix.h"
#include <vector>
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ExpandConvLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "ExpandConvLayer.h"
namespace paddle {
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#pragma once
#include "paddle/math/Matrix.h"
#include <vector>
#include "ExpandConvBaseLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ExpandConvTransLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "ExpandConvTransLayer.h"
/* The implementation of the convTransLayer is basically a swap of forward and
* backward of the original convLayer.
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#pragma once
#include "paddle/math/Matrix.h"
#include <vector>
#include "ExpandConvBaseLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
......
......@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "FullyConnectedLayer.h"
#include <algorithm>
#include <vector>
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/math/SparseMatrix.h"
#include <vector>
#include <algorithm>
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "GatedRecurrentLayer.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......@@ -386,8 +386,9 @@ void GatedRecurrentLayer::backwardBatch(int batchSize, MatrixPtr inputGrad) {
{
batchSize = outputGradTmp->getHeight();
gruValue.prevOutValue =
(n == 0 ? nullptr : (batchValue_->getBatchValue(n - 1, batchSize))
->getData());
(n == 0
? nullptr
: (batchValue_->getBatchValue(n - 1, batchSize))->getData());
gruGrad.prevOutGrad =
(n == 0 ? nullptr
: (batchGrad_->getBatchValue(n - 1, batchSize))->getData());
......
......@@ -14,10 +14,10 @@ limitations under the License. */
#pragma once
#include "paddle/math/Matrix.h"
#include "SequenceToBatch.h"
#include "GruCompute.h"
#include "Layer.h"
#include "SequenceToBatch.h"
#include "paddle/math/Matrix.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Util.h"
#include "GruCompute.h"
#include "hl_recurrent_apply.cuh"
#include "paddle/utils/Util.h"
namespace paddle {
......
......@@ -14,9 +14,9 @@ limitations under the License. */
#pragma once
#include "paddle/utils/TypeDefs.h"
#include "ModelConfig.pb.h"
#include "hl_gpu.h"
#include "paddle/utils/TypeDefs.h"
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "GruCompute.h"
#include "Layer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Stat.h"
#include "Projection.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......
......@@ -14,15 +14,15 @@ limitations under the License. */
#include "paddle/utils/Util.h"
#include "paddle/utils/Logging.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Logging.h"
#include "AddtoLayer.h"
#include "CRFLayer.h"
#include "CosSimLayer.h"
#include "CostLayer.h"
#include "ExpandConvLayer.h"
#include "CRFLayer.h"
#include "DataLayer.h"
#include "ExpandConvLayer.h"
#include "FullyConnectedLayer.h"
#include "HierarchicalSigmoidLayer.h"
#include "MaxLayer.h"
......
......@@ -14,18 +14,18 @@ limitations under the License. */
#pragma once
#include <memory>
#include <functional>
#include <paddle/parameter/Argument.h>
#include "paddle/utils/ClassRegistrar.h"
#include <functional>
#include <memory>
#include "ModelConfig.pb.h"
#include "paddle/math/CpuSparseMatrix.h"
#include "paddle/parameter/Parameter.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Util.h"
#include "ModelConfig.pb.h"
#include "paddle/gserver/activations/ActivationFunction.h"
#include <paddle/parameter/ParallelParameter.h>
#include <paddle/parameter/Weight.h>
#include "paddle/gserver/activations/ActivationFunction.h"
/// Macro for registering a layer type.
/// Example: REGISTER_LAYER(crf_error, CRFDecodingErrorLayer);
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "LinearChainCRF.h"
#include <algorithm>
namespace paddle {
......
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <math.h>
#include "LinearChainCTC.h"
#include <math.h>
#include <limits>
namespace paddle {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册