提交 3d1f8b47 编写于 作者: L Luo Tao

Merge branch 'develop' into doc3

.gitignore
\ No newline at end of file
......@@ -8,3 +8,4 @@ build/
.cproject
.pydevproject
Makefile
.test_env/
cmake_minimum_required(VERSION 2.8)
project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 9)
set(PADDLE_PATCH_VERSION 0a0)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
......@@ -12,6 +8,17 @@ include(package)
find_package(SWIG 2.0)
find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
# Check protobuf library version.
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
OUTPUT_VARIABLE PROTOBUF_VERSION)
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
set(PROTOBUF_3 OFF)
if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
set(PROTOBUF_3 ON)
endif()
find_package(PythonLibs 2.7 REQUIRED)
find_package(PythonInterp 2.7 REQUIRED)
find_package(ZLIB REQUIRED)
......@@ -45,7 +52,7 @@ option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
......@@ -64,31 +71,11 @@ include(check_packages)
include(swig)
include(coveralls)
# add PaddlePaddle version
if(DEFINED ENV{PADDLE_VERSION})
add_definitions(-DPADDLE_VERSION=\"$ENV{PADDLE_VERSION}\")
else()
if(EXISTS ${PROJ_ROOT}/.svn/)
find_package(Subversion REQUIRED)
if(SUBVERSION_FOUND)
Subversion_WC_INFO(${PROJ_ROOT} Project)
add_definitions(-DPADDLE_VERSION=${Project_WC_REVISION})
endif()
elseif(EXISTS ${PROJ_ROOT}/.git/)
find_package(Git REQUIRED)
execute_process(
COMMAND ${GIT_EXECUTABLE} log -1 --format=%H
WORKING_DIRECTORY ${PROJ_ROOT}
OUTPUT_VARIABLE GIT_SHA1
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${GIT_RESULT})
add_definitions(-DPADDLE_VERSION=\"${GIT_SHA1}\")
else()
message(WARNING "Cannot add paddle version from git tag")
endif()
endif()
endif()
# Set PaddlePaddle version to Git tag name or Git commit ID.
find_package(Git REQUIRED)
# version.cmake will get the current PADDLE_VERSION
include(version)
add_definitions(-DPADDLE_VERSION=\"${PADDLE_VERSION}\")
if(NOT WITH_GPU)
......
# Release v0.9.0
## New Features:
* New Layers
* bilinear interpolation layer.
* spatial pyramid-pool layer.
* de-convolution layer.
* maxout layer.
* Support rectangle padding, stride, window and input for Pooling Operation.
* Add —job=time in trainer, which can be used to print time info without compiler option -WITH_TIMER=ON.
* Expose cost_weight/nce_layer in `trainer_config_helpers`
* Add FAQ, concepts, h-rnn docs.
* Add Bidi-LSTM and DB-LSTM to quick start demo @alvations
* Add usage track scripts.
## Improvements
* Add Travis-CI for Mac OS X. Enable swig unittest in Travis-CI. Skip Travis-CI when only docs are changed.
* Add code coverage tools.
* Refine convolution layer to speedup and reduce GPU memory.
* Speed up PyDataProvider2
* Add ubuntu deb package build scripts.
* Make Paddle use git-flow branching model.
* PServer support no parameter blocks.
## Bug Fixes
* add zlib link to py_paddle
* add input sparse data check for sparse layer at runtime
* Bug fix for sparse matrix multiplication
* Fix floating-point overflow problem of tanh
* Fix some nvcc compile options
* Fix a bug in yield dictionary in DataProvider
* Fix SRL hang when exit.
# Release v0.8.0beta.1
New features:
* Mac OSX is supported by source code. #138
* Both GPU and CPU versions of PaddlePaddle are supported.
* Support CUDA 8.0
* Enhance `PyDataProvider2`
* Add dictionary yield format. `PyDataProvider2` can yield a dictionary with key is data_layer's name, value is features.
* Add `min_pool_size` to control memory pool in provider.
* Add `deb` install package & docker image for no_avx machines.
* Especially for cloud computing and virtual machines
* Automatically disable `avx` instructions in cmake when machine's CPU don't support `avx` instructions.
* Add Parallel NN api in trainer_config_helpers.
* Add `travis ci` for Github
Bug fixes:
* Several bugs in trainer_config_helpers. Also complete the unittest for trainer_config_helpers
* Check if PaddlePaddle is installed when unittest.
* Fix bugs in GTX series GPU
* Fix bug in MultinomialSampler
Also more documentation was written since last release.
# Release v0.8.0beta.0
PaddlePaddle v0.8.0beta.0 release. The install package is not stable yet and it's a pre-release version.
# Get the latest git tag.
set(PADDLE_VERSION $ENV{PADDLE_VERSION})
set(tmp_version "HEAD")
while ("${PADDLE_VERSION}" STREQUAL "")
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version}
WORKING_DIRECTORY ${PROJ_ROOT}
OUTPUT_VARIABLE GIT_TAG_NAME
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT ${GIT_RESULT})
# Check the tag is a correct version
if (${GIT_TAG_NAME} MATCHES "v[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?")
string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME})
else() # otherwise, get the previous git tag name.
set(tmp_version "${GIT_TAG_NAME}~1")
endif()
else()
set(PADDLE_VERSION "0.0.0")
message(WARNING "Cannot add paddle version from git tag")
endif()
endwhile()
message(STATUS "Paddle version is ${PADDLE_VERSION}")
output/
uniform_params/
cifar_params/
mnist_params/
*.png
.pydevproject
.project
*.log
*.pyc
data/mnist_data/
data/cifar-10-batches-py/
# Generative Adversarial Networks (GAN)
This demo implements GAN training described in the original GAN paper (https://arxiv.org/abs/1406.2661) and DCGAN (https://arxiv.org/abs/1511.06434).
The general training procedures are implemented in gan_trainer.py. The neural network configurations are specified in gan_conf.py (for synthetic data) and gan_conf_image.py (for image data).
In order to run the model, first download the corresponding data by running the shell script in ./data.
Then you can run the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).
$python gan_trainer.py -d cifar --use_gpu 1
The generated images will be stored in ./cifar_samples/
The corresponding models will be stored in ./cifar_params/
\ No newline at end of file
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz
#!/usr/bin/env sh
# This script downloads the mnist data and unzips it.
set -e
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
rm -rf "$DIR/mnist_data"
mkdir "$DIR/mnist_data"
cd "$DIR/mnist_data"
echo "Downloading..."
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
do
if [ ! -e $fname ]; then
wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
gunzip ${fname}.gz
fi
done
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator")
assert mode in set(["generator",
"discriminator",
"generator_training",
"discriminator_training"])
is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training"
is_generator = mode == "generator"
is_discriminator = mode == "discriminator"
# The network structure below follows the ref https://arxiv.org/abs/1406.2661
# Here we used two hidden layers and batch_norm
print('mode=%s' % mode)
# the dim of the noise (z) as the input of the generator network
noise_dim = 10
# the dim of the hidden layer
hidden_dim = 10
# the dim of the generated sample
sample_dim = 2
settings(
batch_size=128,
learning_rate=1e-4,
learning_method=AdamOptimizer(beta1=0.5)
)
def discriminator(sample):
"""
discriminator ouputs the probablity of a sample is from generator
or real data.
The output has two dimenstional: dimension 0 is the probablity
of the sample is from generator and dimension 1 is the probabblity
of the sample is from real data.
"""
param_attr = ParamAttr(is_static=is_generator_training)
bias_attr = ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0)
hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
act=ReluActivation(),
name="dis_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn, name="dis_prob", size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
def generator(noise):
"""
generator generates a sample given noise
"""
param_attr = ParamAttr(is_static=is_discriminator_training)
bias_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0)
hidden = fc_layer(input=noise,
name="gen_layer_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
act=ReluActivation(),
name="gen_layer_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn,
name="gen_layer1",
size=sample_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
if is_generator_training:
noise = data_layer(name="noise", size=noise_dim)
sample = generator(noise)
if is_discriminator_training:
sample = data_layer(name="sample", size=sample_dim)
if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1)
prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error')
outputs(cost)
if is_generator:
noise = data_layer(name="noise", size=noise_dim)
outputs(generator(noise))
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator")
dataSource = get_config_arg("data", str, "mnist")
assert mode in set(["generator",
"discriminator",
"generator_training",
"discriminator_training"])
is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training"
is_generator = mode == "generator"
is_discriminator = mode == "discriminator"
# The network structure below follows the dcgan paper
# (https://arxiv.org/abs/1511.06434)
print('mode=%s' % mode)
# the dim of the noise (z) as the input of the generator network
noise_dim = 100
# the number of filters in the layer in generator/discriminator that is
# closet to the image
gf_dim = 64
df_dim = 64
if dataSource == "mnist":
sample_dim = 28 # image dim
c_dim = 1 # image color
else:
sample_dim = 32
c_dim = 3
s2, s4 = int(sample_dim/2), int(sample_dim/4),
s8, s16 = int(sample_dim/8), int(sample_dim/16)
settings(
batch_size=128,
learning_rate=2e-4,
learning_method=AdamOptimizer(beta1=0.5)
)
def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
param_attr, bias_attr, param_attr_bn, bn, trans=False,
act=ReluActivation()):
"""
conv_bn is a utility function that constructs a convolution/deconv layer
with an optional batch_norm layer
:param bn: whether to use batch_norm_layer
:type bn: bool
:param trans: whether to use conv (False) or deconv (True)
:type trans: bool
"""
# calculate the filter_size and padding size based on the given
# imgSize and ouput size
tmp = imgSize - (output_x - 1) * stride
if tmp <= 1 or tmp > 5:
raise ValueError("conv input-output dimension does not fit")
elif tmp <= 3:
filter_size = tmp + 2
padding = 1
else:
filter_size = tmp
padding = 0
print (imgSize, output_x, stride, filter_size, padding)
if trans:
nameApx = "_conv"
else:
nameApx = "_convt"
if bn:
conv = img_conv_layer(input, filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx, num_channels=channels,
act=LinearActivation(), groups=1, stride=stride,
padding=padding, bias_attr=bias_attr,
param_attr=param_attr, shared_biases=True, layer_attr=None,
filter_size_y=None, stride_y=None, padding_y=None,
trans=trans)
conv_bn = batch_norm_layer(conv,
act=act,
name=name + nameApx + "_bn",
bias_attr=bias_attr,
param_attr=param_attr_bn,
use_global_stats=False)
return conv_bn
else:
conv = img_conv_layer(input, filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx, num_channels=channels,
act=act, groups=1, stride=stride,
padding=padding, bias_attr=bias_attr,
param_attr=param_attr, shared_biases=True, layer_attr=None,
filter_size_y=None, stride_y=None, padding_y=None,
trans=trans)
return conv
def generator(noise):
"""
generator generates a sample given noise
"""
param_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=0.0,
initial_std=0.02)
bias_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=0.0,
initial_std=0.0)
param_attr_bn=ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02)
h1 = fc_layer(input=noise,
name="gen_layer_h1",
size=s8 * s8 * gf_dim * 4,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
h1_bn = batch_norm_layer(h1,
act=ReluActivation(),
name="gen_layer_h1_bn",
bias_attr=bias_attr,
param_attr=param_attr_bn,
use_global_stats=False)
h2_bn = conv_bn(h1_bn,
channels=gf_dim*4,
output_x=s8,
num_filters=gf_dim*2,
imgSize=s4,
stride=2,
name="gen_layer_h2",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True,
trans=True)
h3_bn = conv_bn(h2_bn,
channels=gf_dim*2,
output_x=s4,
num_filters=gf_dim,
imgSize=s2,
stride=2,
name="gen_layer_h3",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True,
trans=True)
return conv_bn(h3_bn,
channels=gf_dim,
output_x=s2,
num_filters=c_dim,
imgSize=sample_dim,
stride=2,
name="gen_layer_h4",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=False,
trans=True,
act=TanhActivation())
def discriminator(sample):
"""
discriminator ouputs the probablity of a sample is from generator
or real data.
The output has two dimenstional: dimension 0 is the probablity
of the sample is from generator and dimension 1 is the probabblity
of the sample is from real data.
"""
param_attr = ParamAttr(is_static=is_generator_training,
initial_mean=0.0,
initial_std=0.02)
bias_attr = ParamAttr(is_static=is_generator_training,
initial_mean=0.0,
initial_std=0.0)
param_attr_bn=ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0.02)
h0 = conv_bn(sample,
channels=c_dim,
imgSize=sample_dim,
num_filters=df_dim,
output_x=s2,
stride=2,
name="dis_h0",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=False)
h1_bn = conv_bn(h0,
channels=df_dim,
imgSize=s2,
num_filters=df_dim*2,
output_x=s4,
stride=2,
name="dis_h1",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True)
h2_bn = conv_bn(h1_bn,
channels=df_dim*2,
imgSize=s4,
num_filters=df_dim*4,
output_x=s8,
stride=2,
name="dis_h2",
param_attr=param_attr,
bias_attr=bias_attr,
param_attr_bn=param_attr_bn,
bn=True)
return fc_layer(input=h2_bn, name="dis_prob", size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
if is_generator_training:
noise = data_layer(name="noise", size=noise_dim)
sample = generator(noise)
if is_discriminator_training:
sample = data_layer(name="sample", size=sample_dim * sample_dim*c_dim)
if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1)
prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error')
outputs(cost)
if is_generator:
noise = data_layer(name="noise", size=noise_dim)
outputs(generator(noise))
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import random
import numpy
import cPickle
import sys,os
from PIL import Image
from paddle.trainer.config_parser import parse_config
from paddle.trainer.config_parser import logger
import py_paddle.swig_paddle as api
import matplotlib.pyplot as plt
def plot2DScatter(data, outputfile):
'''
Plot the data as a 2D scatter plot and save to outputfile
data needs to be two dimensinoal
'''
x = data[:, 0]
y = data[:, 1]
logger.info("The mean vector is %s" % numpy.mean(data, 0))
logger.info("The std vector is %s" % numpy.std(data, 0))
heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.clf()
plt.scatter(x, y)
plt.savefig(outputfile, bbox_inches='tight')
def CHECK_EQ(a, b):
assert a == b, "a=%s, b=%s" % (a, b)
def copy_shared_parameters(src, dst):
'''
copy the parameters from src to dst
:param src: the source of the parameters
:type src: GradientMachine
:param dst: the destination of the parameters
:type dst: GradientMachine
'''
src_params = [src.getParameter(i)
for i in xrange(src.getParameterSize())]
src_params = dict([(p.getName(), p) for p in src_params])
for i in xrange(dst.getParameterSize()):
dst_param = dst.getParameter(i)
src_param = src_params.get(dst_param.getName(), None)
if src_param is None:
continue
src_value = src_param.getBuf(api.PARAMETER_VALUE)
dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
CHECK_EQ(len(src_value), len(dst_value))
dst_value.copyFrom(src_value)
dst_param.setValueUpdated()
def print_parameters(src):
src_params = [src.getParameter(i)
for i in xrange(src.getParameterSize())]
print "***************"
for p in src_params:
print "Name is %s" % p.getName()
print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray()
def load_mnist_data(imageFile):
f = open(imageFile, "rb")
f.read(16)
# Define number of samples for train/test
if "train" in imageFile:
n = 60000
else:
n = 10000
data = numpy.fromfile(f, 'ubyte', count=n*28*28).reshape((n, 28*28))
data = data / 255.0 * 2.0 - 1.0
f.close()
return data.astype('float32')
def load_cifar_data(cifar_path):
batch_size = 10000
data = numpy.zeros((5*batch_size, 32*32*3), dtype = "float32")
for i in range(1, 6):
file = cifar_path + "/data_batch_" + str(i)
fo = open(file, 'rb')
dict = cPickle.load(fo)
fo.close()
data[(i - 1)*batch_size:(i*batch_size), :] = dict["data"]
data = data / 255.0 * 2.0 - 1.0
return data
# synthesize 2-D uniform data
def load_uniform_data():
data = numpy.random.rand(1000000, 2).astype('float32')
return data
def merge(images, size):
if images.shape[1] == 28*28:
h, w, c = 28, 28, 1
else:
h, w, c = 32, 32, 3
img = numpy.zeros((h * size[0], w * size[1], c))
for idx in xrange(size[0] * size[1]):
i = idx % size[1]
j = idx // size[1]
img[j*h:j*h+h, i*w:i*w+w, :] = \
((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
return img.astype('uint8')
def save_images(images, path):
merged_img = merge(images, [8, 8])
if merged_img.shape[2] == 1:
im = Image.fromarray(numpy.squeeze(merged_img)).convert('RGB')
else:
im = Image.fromarray(merged_img, mode="RGB")
im.save(path)
def get_real_samples(batch_size, data_np):
return data_np[numpy.random.choice(data_np.shape[0], batch_size,
replace=False),:]
def get_noise(batch_size, noise_dim):
return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
def get_fake_samples(generator_machine, batch_size, noise):
gen_inputs = api.Arguments.createArguments(1)
gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
gen_outputs = api.Arguments.createArguments(0)
generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
return fake_samples
def get_training_loss(training_machine, inputs):
outputs = api.Arguments.createArguments(0)
training_machine.forward(inputs, outputs, api.PASS_TEST)
loss = outputs.getSlotValue(0).copyToNumpyMat()
return numpy.mean(loss)
def prepare_discriminator_data_batch_pos(batch_size, data_np):
real_samples = get_real_samples(batch_size, data_np)
labels = numpy.ones(batch_size, dtype='int32')
inputs = api.Arguments.createArguments(2)
inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(real_samples))
inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
return inputs
def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
fake_samples = get_fake_samples(generator_machine, batch_size, noise)
labels = numpy.zeros(batch_size, dtype='int32')
inputs = api.Arguments.createArguments(2)
inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(fake_samples))
inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
return inputs
def prepare_generator_data_batch(batch_size, noise):
label = numpy.ones(batch_size, dtype='int32')
inputs = api.Arguments.createArguments(2)
inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(label))
return inputs
def find(iterable, cond):
for item in iterable:
if cond(item):
return item
return None
def get_layer_size(model_conf, layer_name):
layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
return layer_conf.size
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
parser.add_argument("--use_gpu", default="1",
help="1 means use gpu for training")
parser.add_argument("--gpu_id", default="0",
help="the gpu_id parameter")
args = parser.parse_args()
data_source = args.data_source
use_gpu = args.use_gpu
assert data_source in ["mnist", "cifar", "uniform"]
assert use_gpu in ["0", "1"]
if not os.path.exists("./%s_samples/" % data_source):
os.makedirs("./%s_samples/" % data_source)
if not os.path.exists("./%s_params/" % data_source):
os.makedirs("./%s_params/" % data_source)
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100',
'--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source)
if data_source == "uniform":
conf = "gan_conf.py"
num_iter = 10000
else:
conf = "gan_conf_image.py"
num_iter = 1000
gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
batch_size = dis_conf.opt_config.batch_size
noise_dim = get_layer_size(gen_conf.model_config, "noise")
if data_source == "mnist":
data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte")
elif data_source == "cifar":
data_np = load_cifar_data("./data/cifar-10-batches-py/")
else:
data_np = load_uniform_data()
# this creates a gradient machine for discriminator
dis_training_machine = api.GradientMachine.createFromConfigProto(
dis_conf.model_config)
# this create a gradient machine for generator
gen_training_machine = api.GradientMachine.createFromConfigProto(
gen_conf.model_config)
# generator_machine is used to generate data only, which is used for
# training discriminator
logger.info(str(generator_conf.model_config))
generator_machine = api.GradientMachine.createFromConfigProto(
generator_conf.model_config)
dis_trainer = api.Trainer.create(
dis_conf, dis_training_machine)
gen_trainer = api.Trainer.create(
gen_conf, gen_training_machine)
dis_trainer.startTrain()
gen_trainer.startTrain()
# Sync parameters between networks (GradientMachine) at the beginning
copy_shared_parameters(gen_training_machine, dis_training_machine)
copy_shared_parameters(gen_training_machine, generator_machine)
# constrain that either discriminator or generator can not be trained
# consecutively more than MAX_strike times
curr_train = "dis"
curr_strike = 0
MAX_strike = 5
for train_pass in xrange(100):
dis_trainer.startTrainPass()
gen_trainer.startTrainPass()
for i in xrange(num_iter):
# Do forward pass in discriminator to get the dis_loss
noise = get_noise(batch_size, noise_dim)
data_batch_dis_pos = prepare_discriminator_data_batch_pos(
batch_size, data_np)
dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos)
data_batch_dis_neg = prepare_discriminator_data_batch_neg(
generator_machine, batch_size, noise)
dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg)
dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
# Do forward pass in generator to get the gen_loss
data_batch_gen = prepare_generator_data_batch(
batch_size, noise)
gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
if i % 100 == 0:
print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg)
print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss)
# Decide which network to train based on the training history
# And the relative size of the loss
if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \
((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss):
if curr_train == "dis":
curr_strike += 1
else:
curr_train = "dis"
curr_strike = 1
dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
copy_shared_parameters(dis_training_machine, gen_training_machine)
else:
if curr_train == "gen":
curr_strike += 1
else:
curr_train = "gen"
curr_strike = 1
gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
# TODO: add API for paddle to allow true parameter sharing between different GradientMachines
# so that we do not need to copy shared parameters.
copy_shared_parameters(gen_training_machine, dis_training_machine)
copy_shared_parameters(gen_training_machine, generator_machine)
dis_trainer.finishTrainPass()
gen_trainer.finishTrainPass()
# At the end of each pass, save the generated samples/images
fake_samples = get_fake_samples(generator_machine, batch_size, noise)
if data_source == "uniform":
plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
else:
save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
dis_trainer.finishTrain()
gen_trainer.finishTrain()
if __name__ == '__main__':
main()
......@@ -15,25 +15,11 @@ set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
set(PADDLE_DOXYGEN_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/doxygen_xml")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
@ONLY)
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in"
"${CMAKE_CURRENT_BINARY_DIR}/Doxyfile"
@ONLY
)
add_custom_target(paddle_doxygen_docs ALL
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
sphinx_add_target(paddle_docs
html
${BINARY_BUILD_DIR}
......@@ -41,6 +27,5 @@ sphinx_add_target(paddle_docs
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
add_dependencies(paddle_docs
gen_proto_py
paddle_doxygen_docs)
add_dependencies(paddle_docs
gen_proto_py)
此差异已折叠。
ABOUT
=======
PaddlPaddle is an easy-to-use, efficient, flexible and scalable deep learning platform,
which is originally developed by Baidu scientists and engineers for the purpose of applying deep learning to many products at Baidu.
PaddlePaddle is now open source but far from complete, which is intended to be built upon, improved, scaled, and extended.
We hope to build an active open source community both by providing feedback and by actively contributing to the source code.
Credits
--------
We owe many thanks to `all contributors and developers <https://github.com/PaddlePaddle/Paddle/blob/develop/authors>`_ of PaddlePaddle!
../../demo/sentiment_analysis/bi_lstm.jpg
\ No newline at end of file
../../demo/text_generation/encoder-decoder-attention-model.png
\ No newline at end of file
DataProvider Introduction
=========================
Introduction
==============
DataProvider is a module that loads training or testing data into cpu or gpu
memory for the following triaining or testing process.
......
How to use PyDataProvider2
==========================
PyDataProvider2
=================
We highly recommand users to use PyDataProvider2 to provide training or testing
data to PaddlePaddle. The user only needs to focus on how to read a single
......
API
====
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/index.rst
data_provider/pydataprovider2.rst
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/index.rst
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
\ No newline at end of file
Python Prediction API
=====================
Python Prediction
==================
PaddlePaddle offers a set of clean prediction interfaces for python with the help of
SWIG. The main steps of predict values in python are:
......
Parameter and Extra Layer Attribute
===================================
Parameter Attributes
=======================
.. automodule:: paddle.trainer_config_helpers.attrs
:members:
Docker installation guide
==========================
PaddlePaddle provide the `Docker <https://www.docker.com/>`_ image. `Docker`_ is a lightweight container utilities. The performance of PaddlePaddle in `Docker`_ container is basically as same as run it in a normal linux. The `Docker`_ is a very convenient way to deliver the binary release for linux programs.
.. note::
The `Docker`_ image is the recommended way to run PaddlePaddle
PaddlePaddle Docker images
--------------------------
There are 12 `images <https://hub.docker.com/r/paddledev/paddle/tags/>`_ for PaddlePaddle, and the name is :code:`paddle-dev/paddle`, tags are\:
+-----------------+------------------+------------------------+-----------------------+
| | normal | devel | demo |
+=================+==================+========================+=======================+
| CPU | cpu-latest | cpu-devel-latest | cpu-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| GPU | gpu-latest | gpu-devel-latest | gpu-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| CPU WITHOUT AVX | cpu-noavx-latest | cpu-devel-noavx-latest | cpu-demo-noavx-latest |
+-----------------+------------------+------------------------+-----------------------+
| GPU WITHOUT AVX | gpu-noavx-latest | gpu-devel-noavx-latest | gpu-demo-noavx-latest |
+-----------------+------------------+------------------------+-----------------------+
And the three columns are:
* normal\: The docker image only contains binary of PaddlePaddle.
* devel\: The docker image contains PaddlePaddle binary, source code and essential build environment.
* demo\: The docker image contains the dependencies to run PaddlePaddle demo.
And the four rows are:
* CPU\: CPU Version. Support CPU which has :code:`AVX` instructions.
* GPU\: GPU Version. Support GPU, and cpu has :code:`AVX` instructions.
* CPU WITHOUT AVX\: CPU Version, which support most CPU even doesn't have :code:`AVX` instructions.
* GPU WITHOUT AVX\: GPU Version, which support most CPU even doesn't have :code:`AVX` instructions.
User can choose any version depends on machine. The following script can help you to detect your CPU support :code:`AVX` or not.
.. code-block:: bash
if cat /proc/cpuinfo | grep -q avx ; then echo "Support AVX"; else echo "Not support AVX"; fi
If the output is :code:`Support AVX`, then you can choose the AVX version of PaddlePaddle, otherwise, you need select :code:`noavx` version of PaddlePaddle. For example, the CPU develop version of PaddlePaddle is :code:`paddle-dev/paddle:cpu-devel-latest`.
The PaddlePaddle images don't contain any entry command. You need to write your entry command to use this image. See :code:`Remote Access` part or just use following command to run a :code:`bash`
.. code-block:: bash
docker run -it paddledev/paddle:cpu-latest /bin/bash
Download and Run Docker images
------------------------------
You have to install Docker in your machine which has linux kernel version 3.10+ first. You can refer to the official guide https://docs.docker.com/engine/installation/ for further information.
You can use :code:`docker pull ` to download images first, or just launch a container with :code:`docker run` \:
.. code-block:: bash
docker run -it paddledev/paddle:cpu-latest
If you want to launch container with GPU support, you need to set some environment variables at the same time:
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
Some notes for docker
---------------------
Performance
+++++++++++
Since Docker is based on the lightweight virtual containers, the CPU computing performance maintains well. And GPU driver and equipments are all mapped to the container, so the GPU computing performance would not be seriously affected.
If you use high performance nic, such as RDMA(RoCE 40GbE or IB 56GbE), Ethernet(10GbE), it is recommended to use config "-net = host".
Remote access
+++++++++++++
If you want to enable ssh access background, you need to build an image by yourself. Please refer to official guide https://docs.docker.com/engine/reference/builder/ for further information.
Following is a simple Dockerfile with ssh:
.. literalinclude:: ../../doc_cn/build_and_install/install/paddle_ssh.Dockerfile
Then you can build an image with Dockerfile and launch a container:
.. code-block:: bash
# cd into Dockerfile directory
docker build . -t paddle_ssh
# run container, and map host machine port 8022 to container port 22
docker run -d -p 8022:22 --name paddle_ssh_machine paddle_ssh
Now, you can ssh on port 8022 to access the container, username is root, password is also root:
.. code-block:: bash
ssh -p 8022 root@YOUR_HOST_MACHINE
You can stop and delete the container as following:
.. code-block:: bash
# stop
docker stop paddle_ssh_machine
# delete
docker rm paddle_ssh_machine
Cluster Train
====================
.. toctree::
:glob:
opensource/cluster_train.md
internal/index.md
......@@ -25,20 +25,6 @@ sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc/templates"]
# -- Doxygen Settings
breathe_projects = {
'paddle': '@PADDLE_DOXYGEN_OUTPUT@/xml'
}
breathe_default_project = 'paddle'
breathe_domain_by_extension = {
'h': 'cpp', # mapping XXX.h XXX.cpp together
}
breathe_default_members = {
'protected-members','undoc-members'
}
# -- General configuration ------------------------------------------------
# General information about the project.
......@@ -62,7 +48,6 @@ extensions = [
'sphinx.ext.autosummary',
'sphinx.ext.mathjax',
'sphinx.ext.napoleon',
'breathe'
]
......
Development Guide
=================
.. toctree::
:maxdepth: 1
layer.md
new_layer/new_layer.rst
../source/index.md
# Layer Documents
* [Layer Source Code Document](../source/gserver/layers/index.rst)
* [Layer Python API Document](../ui/api/trainer_config_helpers/index.rst)
# Introduction
Basic Usage
=============
PaddlePaddle is a deep learning platform open-sourced by Baidu. With PaddlePaddle, you can easily train a classic neural network within a couple lines of configuration, or you can build sophisticated models that provide state-of-the-art performance on difficult learning tasks like sentiment analysis, machine translation, image caption and so on.
## 1. A Classic Problem
1. A Classic Problem
---------------------
Now, to give you a hint of what using PaddlePaddle looks like, let's start with a fundamental learning problem - <a href="https://en.wikipedia.org/wiki/Simple_linear_regression">**simple linear regression**</a> : you have observed a set of two-dimensional data points of `X` and `Y`, where `X` is an explanatory variable and `Y` is corresponding dependent variable, and you want to recover the underlying correlation between `X` and `Y`. Linear regression can be used in many practical scenarios. For example, `X` can be a variable about house size, and `Y` a variable about house price. You can build a model that captures relationship between them by observing real estate markets.
Now, to give you a hint of what using PaddlePaddle looks like, let's start with a fundamental learning problem - `simple linear regression <https://en.wikipedia.org/wiki/Simple_linear_regression>`_: you have observed a set of two-dimensional data points of ``X`` and ``Y``, where ``X`` is an explanatory variable and ``Y`` is corresponding dependent variable, and you want to recover the underlying correlation between ``X`` and ``Y``. Linear regression can be used in many practical scenarios. For example, ``X`` can be a variable about house size, and ``Y`` a variable about house price. You can build a model that captures relationship between them by observing real estate markets.
## 2. Prepare the Data
2. Prepare the Data
--------------------
Suppose the true relationship can be characterized as `Y = 2X + 0.3`, let's see how to recover this pattern only from observed data. Here is a piece of python code that feeds synthetic data to PaddlePaddle. The code is pretty self-explanatory, the only extra thing you need to add for PaddlePaddle is a definition of input data types.
Suppose the true relationship can be characterized as ``Y = 2X + 0.3``, let's see how to recover this pattern only from observed data. Here is a piece of python code that feeds synthetic data to PaddlePaddle. The code is pretty self-explanatory, the only extra thing you need to add for PaddlePaddle is a definition of input data types.
```python
# dataprovider.py
from paddle.trainer.PyDataProvider2 import *
import random
.. code-block:: python
# define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
yield [x], [2*x+0.3]
```
# dataprovider.py
from paddle.trainer.PyDataProvider2 import *
import random
## 3. Train a NeuralNetwork in PaddlePaddle
# define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
yield [x], [2*x+0.3]
To recover this relationship between `X` and `Y`, we use a neural network with one layer of linear activation units and a square error cost layer. Don't worry if you are not familiar with these terminologies, it's just saying that we are starting from a random line `Y' = wX + b` , then we gradually adapt `w` and `b` to minimize the difference between `Y'` and `Y`. Here is what it looks like in PaddlePaddle:
3. Train a NeuralNetwork
-------------------------
```python
# trainer_config.py
from paddle.trainer_config_helpers import *
To recover this relationship between ``X`` and ``Y``, we use a neural network with one layer of linear activation units and a square error cost layer. Don't worry if you are not familiar with these terminologies, it's just saying that we are starting from a random line ``Y' = wX + b`` , then we gradually adapt ``w`` and ``b`` to minimize the difference between ``Y'`` and ``Y``. Here is what it looks like in PaddlePaddle:
# 1. read data. Suppose you saved above python code as dataprovider.py
data_file = 'empty.list'
with open(data_file, 'w') as f: f.writelines(' ')
define_py_data_sources2(train_list=data_file, test_list=None,
module='dataprovider', obj='process',args={})
.. code-block:: python
# 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# trainer_config.py
from paddle.trainer_config_helpers import *
# 3. Network configuration
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
outputs(cost)
```
# 1. read data. Suppose you saved above python code as dataprovider.py
data_file = 'empty.list'
with open(data_file, 'w') as f: f.writelines(' ')
define_py_data_sources2(train_list=data_file, test_list=None,
module='dataprovider', obj='process',args={})
# 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# 3. Network configuration
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
outputs(cost)
Some of the most fundamental usages of PaddlePaddle are demonstrated:
......@@ -55,46 +59,51 @@ Some of the most fundamental usages of PaddlePaddle are demonstrated:
- The second part describes learning algorithm. It defines in what ways adjustments are made to model parameters. PaddlePaddle provides a rich set of optimizers, but a simple momentum based optimizer will suffice here, and it processes 12 data points each time.
- Finally, the network configuration. It usually is as simple as "stacking" layers. Three kinds of layers are used in this configuration:
- **Data Layer**: a network always starts with one or more data layers. They provide input data to the rest of the network. In this problem, two data layers are used respectively for `X` and `Y`.
- **Data Layer**: a network always starts with one or more data layers. They provide input data to the rest of the network. In this problem, two data layers are used respectively for ``X`` and ``Y``.
- **FC Layer**: FC layer is short for Fully Connected Layer, which connects all the input units to current layer and does the actual computation specified as activation function. Computation layers like this are the fundamental building blocks of a deeper model.
- **Cost Layer**: in training phase, cost layers are usually the last layers of the network. They measure the performance of current model, and provide guidence to adjust parameters.
Now that everything is ready, you can train the network with a simple command line call:
```
paddle train --config=trainer_config.py --save_dir=./output --num_passes=30
```
This means that PaddlePaddle will train this network on the synthectic dataset for 30 passes, and save all the models under path `./output`. You will see from the messages printed out during training phase that the model cost is decreasing as time goes by, which indicates we are getting a closer guess.
.. code-block:: bash
paddle train --config=trainer_config.py --save_dir=./output --num_passes=30
This means that PaddlePaddle will train this network on the synthectic dataset for 30 passes, and save all the models under path ``./output``. You will see from the messages printed out during training phase that the model cost is decreasing as time goes by, which indicates we are getting a closer guess.
4. Evaluate the Model
-----------------------
## 4. Evaluate the Model
Usually, a different dataset that left out during training phase should be used to evalute the models. However, we are lucky enough to know the real answer: ``w=2, b=0.3``, thus a better option is to check out model parameters directly.
Usually, a different dataset that left out during training phase should be used to evalute the models. However, we are lucky enough to know the real answer: `w=2, b=0.3`, thus a better option is to check out model parameters directly.
In PaddlePaddle, training is just to get a collection of model parameters, which are ``w`` and ``b`` in this case. Each parameter is saved in an individual file in the popular ``numpy`` array format. Here is the code that reads parameters from last pass.
In PaddlePaddle, training is just to get a collection of model parameters, which are `w` and `b` in this case. Each parameter is saved in an individual file in the popular `numpy` array format. Here is the code that reads parameters from last pass.
.. code-block:: python
```python
import numpy as np
import os
import numpy as np
import os
def load(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b'))
# w=1.999743, b=0.300137
```
def load(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b'))
# w=1.999743, b=0.300137
<center> ![](./parameters.png) </center>
.. image:: parameters.png
:align: center
Although starts from a random guess, you can see that value of `w` changes quickly towards 2 and `b` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer.
Although starts from a random guess, you can see that value of ``w`` changes quickly towards 2 and ``b`` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer.
There, you have recovered the underlying pattern between `X` and `Y` only from observed data.
There, you have recovered the underlying pattern between ``X`` and ``Y`` only from observed data.
## 5. Where to Go from Here
5. Where to Go from Here
-------------------------
- <a href="../build/index.html"> Build and Installation </a>
- <a href="../demo/quick_start/index_en.html">Quick Start</a>
- <a href="../demo/index.html">Example and Demo</a>
- `Install and Build <../build_and_install/index.html>`_
- `Tutorials <../demo/quick_start/index_en.html>`_
- `Example and Demo <../demo/index.html>`_
Using and Building Docker Images
================================
We release PaddlePaddle in the form of `Docker <https://www.docker.com/>`_ images on `dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_. Running as Docker containers is currently the only officially-supported way to running PaddlePaddle.
Run Docker images
-----------------
For each version of PaddlePaddle, we release 4 variants of Docker images:
+-----------------+-------------+-------+
| | CPU AVX | GPU |
+=================+=============+=======+
| cpu | yes | no |
+-----------------+-------------+-------+
| cpu-noavx | no | no |
+-----------------+-------------+-------+
| gpu | yes | yes |
+-----------------+-------------+-------+
| gpu-noavx | no | yes |
+-----------------+-------------+-------+
We run the following command on Linux to check if the CPU supports :code:`AVX`.
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
On Mac OS X, we need to run
.. code-block:: bash
sysctl -a | grep machdep.cpu.leaf7_features
Once we determine the proper variant, we can cope with the Docker image tag name by appending the version number. For example, the following command runs the AVX-enabled image of the most recent version:
.. code-block:: bash
docker run -it --rm paddledev/paddle:cpu-latest /bin/bash
To run a GPU-enabled image, you need to install CUDA and let Docker knows about it:
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
The default entry point of all our Docker images starts the OpenSSH server. To run PaddlePaddle and to expose OpenSSH port to 2202 on the host computer:
.. code-block:: bash
docker run -d -p 2202:22 paddledev/paddle:cpu-latest
Then we can login to the container using username :code:`root` and password :code:`root`:
.. code-block:: bash
ssh -p 2202 root@localhost
Build Docker images
-------------------
Developers might want to build Docker images from their local commit or from a tagged version. Suppose that your local repo is at :code:`~/work/Paddle`, the following steps builds a cpu variant from your current work:
.. code-block:: bash
cd ~/Paddle
./paddle/scripts/docker/generates.sh # Use m4 to generate Dockerfiles for each variant.
docker build -t paddle:latest -f ./paddle/scripts/docker/Dockerfile.cpu
As a release engineer, you might want to build Docker images for a certain version and publish them to dockerhub.com. You can do this by switching to the right Git tag, or create a new tag, before running `docker build`. For example, the following commands build Docker images for v0.9.0:
.. code-block:: bash
cd ~/Paddle
git checkout tags/v0.9.0
./paddle/scripts/docker/generates.sh # Use m4 to generate Dockerfiles for each variant.
docker build -t paddle:cpu-v0.9.0 -f ./paddle/scripts/docker/Dockerfile.cpu
......@@ -8,8 +8,6 @@ Install PaddlePaddle
:maxdepth: 1
:glob:
install_*
internal/install_from_jumbo.md
docker_install.rst
ubuntu_install.rst
......@@ -24,5 +22,4 @@ Build from Source
:maxdepth: 1
:glob:
build_from_source.md
contribute_to_paddle.md
build_from_source.md
\ No newline at end of file
GET STARTED
============
.. toctree::
:maxdepth: 2
build_and_install/index.rst
basic_usage/basic_usage.rst
# Distributed Training
# How to Run Distributed Training
In this article, we explain how to run distributed Paddle training jobs on clusters. We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation).
......@@ -9,7 +9,7 @@ In this article, we explain how to run distributed Paddle training jobs on clust
1. Aforementioned scripts use a Python library [fabric](http://www.fabfile.org/) to run SSH commands. We can use `pip` to install fabric:
```bash
pip install fabric
pip install fabric
```
1. We need to install PaddlePaddle on all nodes in the cluster. To enable GPUs, we need to install CUDA in `/usr/local/cuda`; otherwise Paddle would report errors at runtime.
......
# How to Set Command-line Parameters
* [Use Case](use_case.md)
* [Arguments](arguments.md)
* [Detailed Descriptions](detail_introduction.md)
# Contribute Code
# How to Contribute Code
We sincerely appreciate your contributions. You can use fork and pull request
workflow to merge your code.
......
Algorithm Tutorial
==================
How to Configure Deep Models
============================
.. toctree::
:maxdepth: 1
......
......@@ -42,7 +42,7 @@ Simple Gated Recurrent Neural Network
Recurrent neural network process a sequence at each time step sequentially. An example of the architecture of LSTM is listed below.
.. image:: ./bi_lstm.jpg
.. image:: ../../../tutorials/sentiment_analysis/bi_lstm.jpg
:align: center
Generally speaking, a recurrent network perform the following operations from :math:`t=1` to :math:`t=T`, or reversely from :math:`t=T` to :math:`t=1`.
......@@ -101,7 +101,7 @@ Sequence to Sequence Model with Attention
-----------------------------------------
We will use the sequence to sequence model with attention as an example to demonstrate how you can configure complex recurrent neural network models. An illustration of the sequence to sequence model with attention is shown in the following figure.
.. image:: ./encoder-decoder-attention-model.png
.. image:: ../../../tutorials/text_generation/encoder-decoder-attention-model.png
:align: center
In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`.
......
HOW TO
=======
Usage
-------
.. toctree::
:maxdepth: 1
cmd_parameter/index.md
deep_model/index.rst
cluster/cluster_train.md
Development
------------
.. toctree::
:maxdepth: 1
new_layer/index.rst
contribute_to_paddle.md
Optimization
-------------
.. toctree::
:maxdepth: 1
optimization/index.rst
==================
Writing New Layers
==================
=======================
How to Write New Layers
=======================
This tutorial will guide you to write customized layers in PaddlePaddle. We will utilize fully connected layer as an example to guide you through the following steps for writing a new layer.
......
Performance Tuning
==================
How to Tune GPU Performance
===========================
.. toctree::
:maxdepth: 3
......
......@@ -4,8 +4,9 @@ PaddlePaddle Documentation
.. toctree::
:maxdepth: 1
introduction/index.md
user_guide.rst
dev/index.rst
algorithm/index.rst
optimization/index.rst
getstarted/index.rst
tutorials/index.md
howto/index.rst
api/index.rst
about/index.rst
\ No newline at end of file
../../doc_cn/introduction/parameters.png
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册