提交 87d90d2a 编写于 作者: W wanghaoshuang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into seq_error

要显示的变更太多。

To preserve performance only 1000 of 1000+ files are displayed.
......@@ -27,7 +27,6 @@ third_party/
cmake-build-*
# generated while compiling
python/paddle/v2/fluid/core.so
paddle/pybind/pybind.h
CMakeFiles
cmake_install.cmake
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -19,7 +19,7 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
include(system)
project(paddle CXX C Go)
project(paddle CXX C)
message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
......@@ -60,6 +60,7 @@ option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
......@@ -141,11 +142,11 @@ include(external/boost) # download boost
include(external/any) # download libn::any
include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11
include(external/nccl)
include(external/cares)
include(external/grpc)
include(cudnn) # set cudnn libraries, must before configure
include(cupti)
include(configure) # add paddle env configuration
include(generic) # simplify cmake module
include(package) # set paddle packages
......@@ -174,7 +175,7 @@ set(EXTERNAL_LIBS
)
if(WITH_GPU)
include(cuda)
include(cuda)
endif(WITH_GPU)
if(WITH_MKLML)
......@@ -201,17 +202,18 @@ endif()
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# placed after this block, because they depends on it.
if(WITH_GOLANG)
enable_language(Go)
add_subdirectory(go)
endif(WITH_GOLANG)
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
add_subdirectory(paddle)
if(WITH_PYTHON)
add_subdirectory(python)
add_subdirectory(python)
endif()
if(WITH_DOC)
......
......@@ -22,7 +22,8 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \
apt-get install -y \
git python-pip python-dev openssh-server bison libnccl-dev \
git python-pip python-dev openssh-server bison \
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \
......
......@@ -21,16 +21,6 @@ RUN apt-get update && \
wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
apt-get clean -y
# Install Go and glide
RUN wget -qO- go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
tar -xz -C /usr/local && \
mkdir /root/gopath && \
mkdir /root/gopath/bin && \
mkdir /root/gopath/src
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# git credential to skip password typing
RUN git config --global credential.helper store
......
......@@ -188,7 +188,7 @@ Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......
#FROM python:2.7.14
FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04
RUN apt-get update && apt-get install -y python
RUN pip install -U kubernetes opencv-python && apt-get update -y && apt-get install -y iputils-ping libgtk2.0-dev
# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF,
# so we must build one with distribute support to install in this image.
# you can get mirror list here:
# https://launchpad.net/ubuntu/+archivemirrors
ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN apt-get update && apt-get install -y python python-dev python-pip iputils-ping libgtk2.0-dev
RUN pip install -U kubernetes opencv-python
RUN pip install paddlepaddle
# if network is slowly, you may need to add proxy here.
# ENV https_proxy=
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python'
RUN pip uninstall -y paddlepaddle
# unset proxy if it is setted.
# ENV https_proxy=""
# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF,
# so we must build one with distribute support to install in this image.
ADD *.whl /
RUN pip install /*.whl && rm -f /*.whl
ENV LD_LIBRARY_PATH=/usr/local/lib
# tf k8s
RUN pip install tensorflow==1.4.0
ADD tf_k8s /usr/bin
RUN chmod +x /usr/bin/tf_k8s
ADD vgg16_tf.py /workspace/
# below lines may change a lot for debugging
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
ADD *.whl /
RUN pip install /*.whl && rm -f /*.whl && \
chmod +x /usr/bin/paddle_k8s
ENV LD_LIBRARY_PATH=/usr/local/lib
RUN chmod +x /usr/bin/paddle_k8s
ADD vgg16_fluid.py vgg16_v2.py /workspace/
......@@ -8,23 +8,24 @@
- cpu MHz : 2101.000
- cache size : 20480 KB
### Blas settings
Setting environment variable: `MKL_NUM_THREADS=1`.
### Single Node Single Thread
- PServer Count: 10
- Trainer Count: 20
- Metrics: samples / sec
| Batch Size | 32 | 64 | 128 | 256 |
| -- | -- | -- | -- | -- |
| PaddlePaddle Fluid | 15.44 | 16.32 | 16.74 | 16.79 |
| PaddlePaddle v2 | 15.97 | 17.04 | 17.60 | 17.83 |
| TensorFlow | - | - | - | - |
| TensorFlow | 9.09 | 9.10 | 9.24 | 8.66 |
### Different Batch Size
- PServer Count: 10
- Trainer Count: 20
- Per trainer CPU Core: 1
- Metrics: samples / sec
| Batch Size | 32 | 64 | 128 | 256 |
......
......@@ -11,7 +11,7 @@ spec:
paddle-job: vgg16job
spec:
imagePullSecrets:
- name: job-registry-secret
- name: job-registry-secret
hostNetwork: true
containers:
- name: trainer
......
#!/bin/bash
check_trainer_ret() {
ret=$1
stdbuf -oL echo "job returned $ret...setting pod return message..."
stdbuf -oL echo "==============================="
if [ $ret -eq 136 ] ; then
echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log
elif [ $ret -eq 139 ] ; then
echo "Segmentation Fault" > /dev/termination-log
elif [ $ret -eq 1 ] ; then
echo "General Error" > /dev/termination-log
elif [ $ret -eq 134 ] ; then
echo "Program Abort" > /dev/termination-log
fi
stdbuf -oL echo "termination log wroted..."
exit $ret
}
g_pservers=""
g_trainers=""
wait_running_pods(){
pserver_label="tf-job-pserver=${JOB_NAME}"
trainer_label="tf-job-trainer=${JOB_NAME}"
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS_NUM}
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS_NUM}
g_pservers=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PORT})
g_trainers=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PORT})
}
start_tf_pserver(){
wait_running_pods
label="tf-job-pserver=${JOB_NAME}"
pserver_id=$(python /root/k8s_tools.py fetch_id ${label})
cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
--job_name=${TF_JOB_NAME} --task_index=${pserver_id}"
stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
}
start_tf_trainer(){
wait_running_pods
label="tf-job-trainer=${JOB_NAME}"
trainer_id=$(python /root/k8s_tools.py fetch_id ${label})
cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
--job_name=${TF_JOB_NAME} --task_index=${trainer_id} --batch_size=${BATCH_SIZE}"
stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
check_trainer_ret $?
}
start_tf(){
if [[ "${TF_JOB_NAME}" == "worker" ]]; then
start_tf_trainer
else
start_tf_pserver
fi
}
usage() {
echo "usage: tf_k8s [<args>]:"
echo " start_tf Start tensorflow jobs"
}
case "$1" in
start_tf)
start_tf
;;
--help)
usage
;;
*)
usage
;;
esac
apiVersion: extensions/v1beta1
kind: ReplicaSet
metadata:
name: vgg16job-tf-pserver
spec:
replicas: 10
template:
metadata:
labels:
tf-job-pserver: vgg16job-tf
spec:
hostNetwork: true
imagePullSecrets:
- name: job-registry-secret
containers:
- name: pserver
image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
imagePullPolicy: Always
command: ["tf_k8s", "start_tf"]
ports:
- name: jobport-30236
containerPort: 30236
env:
- name: PORT
value: "32036"
- name: ENTRY
value: "python vgg16_tf.py"
- name: JOB_NAME
value: vgg16job-tf
- name: PSERVERS_NUM
value: "10"
- name: TF_JOB_NAME
value: "ps"
- name: TRAINERS_NUM
value: "20"
- name: BATCH_SIZE
value: "128"
- name: TRAINER_PACKAGE
value: "/workspace"
- name: NUM_PASSES
value: "1"
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: "metadata.namespace"
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: "status.podIP"
resources:
requests:
memory: 10Gi
cpu: 4
limits:
memory: 10Gi
cpu: 4
apiVersion: batch/v1
kind: Job
metadata:
name: vgg16job-tf-trainer
spec:
parallelism: 20
completions: 20
template:
metadata:
labels:
tf-job-trainer: vgg16job-tf
spec:
imagePullSecrets:
- name: job-registry-secret
hostNetwork: true
containers:
- name: trainer
image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
imagePullPolicy: Always
command: ["tf_k8s", "start_tf"]
ports:
- name: jobport-30236
containerPort: 30236
env:
- name: PORT
value: "32036"
- name: JOB_NAME
value: vgg16job-tf
- name: TF_JOB_NAME
value: "worker"
- name: ENTRY
value: "python vgg16_tf.py"
- name: PSERVERS_NUM
value: "10"
- name: BATCH_SIZE
value: "128"
- name: TRAINERS_NUM
value: "20"
- name: TRAINER_PACKAGE
value: "/workspace"
- name: NUM_PASSES
value: "1"
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: "metadata.namespace"
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: "status.podIP"
resources:
requests:
memory: 40Gi
cpu: 2
limits:
memory: 40Gi
cpu: 2
restartPolicy: Never
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -68,6 +68,21 @@ parser.add_argument(
type=str2bool,
default=True,
help='Whether to run as local mode.')
parser.add_argument(
"--ps_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
parser.add_argument(
"--trainer_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
# Flags for defining the tf.train.Server
parser.add_argument(
"--task_index", type=int, default=0, help="Index of task within the job")
args = parser.parse_args()
......@@ -180,8 +195,9 @@ def main():
iters += 1
num_samples += len(data)
print(
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, spent %f"
% (pass_id, iters, loss, acc, time.time() - ts)
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed = %.2f img/s"
% (pass_id, iters, loss, acc,
len(data) / (time.time() - ts))
) # The accuracy is the accumulation of batches, but not the current batch.
pass_elapsed = time.time() - start_time
......@@ -209,27 +225,24 @@ def main():
batch_size=args.batch_size)
train_loop(exe, fluid.default_main_program())
else:
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # all pserver endpoints
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, "6174"]))
pserver_endpoints = ",".join(eplist)
print("pserver endpoints: ", pserver_endpoints)
trainers = int(os.getenv("TRAINERS")) # total trainer count
print("trainers total: ", trainers)
current_endpoint = os.getenv(
"POD_IP") + ":6174" # current pserver endpoint
training_role = os.getenv(
"TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
pservers=pserver_endpoints,
trainer_id=args.task_index,
pservers=args.ps_hosts,
trainers=trainers)
if training_role == "PSERVER":
current_endpoint = os.getenv("POD_IP") + ":" + os.getenv(
"PADDLE_INIT_PORT")
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""VGG16 benchmark in TensorFlow
You can get distribution example template structure here:
https://medium.com/clusterone/how-to-write-distributed-tensorflow-code-with-an-example-on-tensorport-70bf3306adcb
https://www.tensorflow.org/deploy/distributed
"""
import tensorflow as tf
import paddle.v2 as paddle
import numpy as np
import argparse
import time
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--batch_size', type=int, default=128, help="Batch size for training.")
parser.add_argument(
'--learning_rate',
type=float,
default=1e-3,
help="Learning rate for training.")
parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.")
parser.add_argument(
'--device',
type=str,
default='CPU',
choices=['CPU', 'GPU'],
help="The device type.")
parser.add_argument(
'--data_format',
type=str,
default='NHWC',
choices=['NCHW', 'NHWC'],
help='The data order, NCHW=[batch, channels, height, width].'
'Only support NHWC right now.')
parser.add_argument(
'--data_set',
type=str,
default='cifar10',
choices=['cifar10', 'flowers'],
help='Optional dataset for benchmark.')
parser.add_argument(
"--ps_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
parser.add_argument(
"--worker_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
parser.add_argument(
"--job_name", type=str, default="", help="One of 'worker', 'ps'")
# Flags for defining the tf.train.Server
parser.add_argument(
"--task_index", type=int, default=0, help="Index of task within the job")
args = parser.parse_args()
class VGG16Model(object):
def __init__(self):
self.parameters = []
def batch_norm_relu(self, inputs, is_training):
"""Performs a batch normalization followed by a ReLU."""
# We set fused=True for a significant speed boost. See
# https://www.tensorflow.org/speed/speed_guide#common_fused_ops
inputs = tf.layers.batch_normalization(
inputs=inputs,
axis=1 if args.data_format == 'NCHW' else -1,
momentum=0.9,
epsilon=1e-05,
center=True,
scale=True,
training=is_training,
fused=True)
inputs = tf.nn.relu(inputs)
return inputs
def conv_bn_layer(self,
name,
images,
kernel_shape,
is_training,
drop_rate=0.0):
with tf.name_scope(name) as scope:
kernel = tf.Variable(
tf.truncated_normal(
kernel_shape, dtype=tf.float32, stddev=1e-1),
name='weights')
conv = tf.nn.conv2d(
images,
kernel, [1, 1, 1, 1],
data_format=args.data_format,
padding='SAME')
biases = tf.Variable(
tf.constant(
0.0, shape=[kernel_shape[-1]], dtype=tf.float32),
trainable=True,
name='biases')
out = tf.nn.bias_add(conv, biases)
out = self.batch_norm_relu(out, is_training)
out = tf.layers.dropout(out, rate=drop_rate, training=is_training)
return out
def fc_layer(self, name, inputs, shape):
with tf.name_scope(name) as scope:
fc_w = tf.Variable(
tf.truncated_normal(
shape, dtype=tf.float32, stddev=1e-1),
name='weights')
fc_b = tf.Variable(
tf.constant(
0.0, shape=[shape[-1]], dtype=tf.float32),
trainable=True,
name='biases')
out = tf.nn.bias_add(tf.matmul(inputs, fc_w), fc_b)
return out
def network(self, images, class_dim, is_training):
""" VGG16 model structure.
TODO(kuke): enable this network to support the 'NCHW' data format
"""
# conv1
conv1_1 = self.conv_bn_layer(
'conv1_1', images, [3, 3, 3, 64], is_training, drop_rate=0.3)
conv1_2 = self.conv_bn_layer(
'conv1_2', conv1_1, [3, 3, 64, 64], is_training, drop_rate=0.0)
# pool1
pool1 = tf.nn.max_pool(
conv1_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1')
# conv2
conv2_1 = self.conv_bn_layer(
'conv2_1', pool1, [3, 3, 64, 128], is_training, drop_rate=0.4)
conv2_2 = self.conv_bn_layer(
'conv2_2', conv2_1, [3, 3, 128, 128], is_training, drop_rate=0.0)
# pool2
pool2 = tf.nn.max_pool(
conv2_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')
# conv3
conv3_1 = self.conv_bn_layer(
'conv3_1', pool2, [3, 3, 128, 256], is_training, drop_rate=0.4)
conv3_2 = self.conv_bn_layer(
'conv3_2', conv3_1, [3, 3, 256, 256], is_training, drop_rate=0.4)
conv3_3 = self.conv_bn_layer(
'conv3_3', conv3_2, [3, 3, 256, 256], is_training, drop_rate=0.0)
# pool3
pool3 = tf.nn.max_pool(
conv3_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool3')
# conv4
conv4_1 = self.conv_bn_layer(
'conv4_1', pool3, [3, 3, 256, 512], is_training, drop_rate=0.4)
conv4_2 = self.conv_bn_layer(
'conv4_2', conv4_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
conv4_3 = self.conv_bn_layer(
'conv4_3', conv4_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
# pool4
pool4 = tf.nn.max_pool(
conv4_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# conv5
conv5_1 = self.conv_bn_layer(
'conv5_1', pool4, [3, 3, 512, 512], is_training, drop_rate=0.4)
conv5_2 = self.conv_bn_layer(
'conv5_2', conv5_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
conv5_3 = self.conv_bn_layer(
'conv5_3', conv5_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
# pool5
pool5 = tf.nn.max_pool(
conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# flatten
shape = int(np.prod(pool5.get_shape()[1:]))
pool5_flat = tf.reshape(pool5, [-1, shape])
# fc1
drop = tf.layers.dropout(pool5_flat, rate=0.5, training=is_training)
fc1 = self.fc_layer('fc1', drop, [shape, 512])
# fc2
bn = self.batch_norm_relu(fc1, is_training)
drop = tf.layers.dropout(bn, rate=0.5, training=is_training)
fc2 = self.fc_layer('fc2', drop, [512, 512])
fc3 = self.fc_layer('fc3', fc2, [512, class_dim])
return fc3
def run_benchmark(cluster_spec, server):
"""Run benchmark on cifar10 or flowers."""
if args.data_set == "cifar10":
class_dim = 10
raw_shape = (3, 32, 32)
dat_shape = (None, 32, 32, 3) if args.data_format == 'NHWC' else (
None, 3, 32, 32)
else:
class_dim = 102
raw_shape = (3, 224, 224)
dat_shape = (None, 224, 224, 3) if args.data_format == 'NHWC' else (
None, 3, 224, 224)
device = tf.train.replica_device_setter(
worker_device="/job:worker/task:{}".format(args.task_index),
cluster=cluster_spec)
with tf.device(device):
images = tf.placeholder(tf.float32, shape=dat_shape)
labels = tf.placeholder(tf.int64, shape=(None, ))
is_training = tf.placeholder('bool')
onehot_labels = tf.one_hot(labels, depth=class_dim)
vgg16 = VGG16Model()
logits = vgg16.network(images, class_dim, is_training)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
avg_loss = tf.reduce_mean(loss)
correct = tf.equal(tf.argmax(logits, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
global_step = tf.Variable(0, name='global_step', trainable=False)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(avg_loss, global_step=global_step)
summary_op = tf.summary.merge_all()
init_op = tf.global_variables_initializer()
# data reader
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
buf_size=5120),
batch_size=args.batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.test10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
buf_size=5120),
batch_size=args.batch_size)
# test
def test():
test_accs = []
for batch_id, data in enumerate(test_reader()):
test_images = np.array(
map(lambda x: np.transpose(x[0].reshape(raw_shape),
axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
test_labels = np.array(map(lambda x: x[1], data)).astype('int64')
test_accs.append(
accuracy.eval(feed_dict={
images: test_images,
labels: test_labels,
is_training: False
}))
return np.mean(test_accs)
config = tf.ConfigProto(
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
hooks = [tf.train.StopAtStepHook(last_step=1000000)]
with tf.train.MonitoredTrainingSession(
master=server.target, is_chief=(args.task_index == 0),
hooks=hooks) as sess:
iters, num_samples, start_time = 0, 0, 0.0
for pass_id in range(args.num_passes):
# train
num_samples = 0
start_time = time.time()
for batch_id, data in enumerate(train_reader()):
train_images = np.array(
map(lambda x: np.transpose(x[0].reshape(raw_shape),
axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
train_labels = np.array(map(lambda x: x[1], data)).astype(
'int64')
iter_begin_time = time.time()
_, loss, acc = sess.run([train_op, avg_loss, accuracy],
feed_dict={
images: train_images,
labels: train_labels,
is_training: True
})
iters += 1
print(
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed=%.2f imgs/sec"
% (pass_id, iters, loss, acc,
len(data) / (time.time() - iter_begin_time)))
num_samples += len(data)
train_elapsed = time.time() - start_time
# test
pass_test_acc = test()
print("Pass = %d, Train speed = %f imgs/s, Test accuracy = %f\n" %
(pass_id, num_samples / train_elapsed, pass_test_acc))
def print_arguments():
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == '__main__':
print_arguments()
ps_hosts = args.ps_hosts.split(",")
worker_hosts = args.worker_hosts.split(",")
# Create a cluster from the parameter server and worker hosts.
cluster_spec = tf.train.ClusterSpec({
"ps": ps_hosts,
"worker": worker_hosts
})
# Create and start a server for the local task.
server = tf.train.Server(
cluster_spec, job_name=args.job_name, task_index=args.task_index)
if args.job_name == "ps":
print("start pserver")
server.join()
elif args.job_name == "worker":
print("start worker")
run_benchmark(cluster_spec, server)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -59,6 +59,7 @@ endif(NOT WITH_GOLANG)
if(NOT WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC)
add_definitions("-DCUPTI_LIB_PATH=\"\"")
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
......@@ -73,7 +74,14 @@ else()
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle needs cudnn to compile")
endif()
if(CUPTI_FOUND)
include_directories(${CUPTI_INCLUDE_DIR})
add_definitions(-DPADDLE_WITH_CUPTI)
add_definitions("-DCUPTI_LIB_PATH=\"${CUPTI_LIBRARY_PATH}\"")
else()
add_definitions("-DCUPTI_LIB_PATH=\"\"")
message(STATUS "Cannot find CUPTI, GPU Profiling is incorrect.")
endif()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
# Include cuda and cudnn
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
......@@ -155,7 +155,8 @@ endif()
include_directories(${CUDA_INCLUDE_DIRS})
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
# TODO(panyx0718): CUPTI only allows DSO?
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
endif(NOT WITH_DSO)
# setting nvcc arch flags
......
if(NOT WITH_GPU)
return()
endif()
set(CUPTI_ROOT "/usr" CACHE PATH "CUPTI ROOT")
find_path(CUPTI_INCLUDE_DIR cupti.h
PATHS ${CUPTI_ROOT} ${CUPTI_ROOT}/include
$ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include
NO_DEFAULT_PATH
)
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
set(TARGET_ARCH "x86_64")
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
endif()
list(APPEND CUPTI_CHECK_LIBRARY_DIRS
${CUPTI_ROOT}
${CUPTI_ROOT}/lib64
${CUPTI_ROOT}/lib
${CUPTI_ROOT}/lib/${TARGET_ARCH}-linux-gnu
$ENV{CUPTI_ROOT}
$ENV{CUPTI_ROOT}/lib64
$ENV{CUPTI_ROOT}/lib
/usr/lib
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/lib64)
find_library(CUPTI_LIBRARY NAMES libcupti.so libcupti.dylib # libcupti_static.a
PATHS ${CUPTI_CHECK_LIBRARY_DIRS} ${CUPTI_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH
DOC "Path to cuPTI library.")
get_filename_component(CUPTI_LIBRARY_PATH ${CUPTI_LIBRARY} DIRECTORY)
if(CUPTI_INCLUDE_DIR AND CUPTI_LIBRARY)
set(CUPTI_FOUND ON)
else()
set(CUPTI_FOUND OFF)
endif()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -15,9 +15,16 @@
include(ExternalProject)
set(BOOST_PROJECT "extern_boost")
# To release PaddlePaddle as a pip package, we have to follow the
# manylinux1 standard, which features as old Linux kernels and
# compilers as possible and recommends CentOS 5. Indeed, the earliest
# CentOS version that works with NVIDIA CUDA is CentOS 6. And a new
# version of boost, say, 1.66.0, doesn't build on CentOS 6. We
# checked that the devtools package of CentOS 6 installs boost 1.41.0.
# So we use 1.41.0 here.
set(BOOST_VER "1.41.0")
set(BOOST_TAR "boost_1_41_0")
set(BOOST_URL "http://sourceforge.net/projects/boost/files/boost/${BOOST_VER}/${BOOST_TAR}.tar.gz")
set(BOOST_URL "http://paddlepaddledeps.s3-website-us-west-1.amazonaws.com/${BOOST_TAR}.tar.gz")
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -56,6 +56,7 @@ ExternalProject_Add(
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}
CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
......@@ -16,12 +16,10 @@ function(copy TARGET)
foreach(index RANGE ${len})
list(GET copy_lib_SRCS ${index} src)
list(GET copy_lib_DSTS ${index} dst)
add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND mkdir -p "${dst}")
if(IS_DIRECTORY ${src})
add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND cp -r "${src}" "${dst}")
else()
add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND cp "${src}" "${dst}")
endif()
add_custom_command(TARGET ${TARGET} PRE_BUILD
COMMAND mkdir -p "${dst}"
COMMAND cp -r "${src}" "${dst}"
COMMENT "copying ${src} -> ${dst}")
endforeach()
endfunction()
......@@ -53,11 +51,11 @@ IF(NOT PROTOBUF_FOUND)
ENDIF(NOT PROTOBUF_FOUND)
# paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle")
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid")
set(module "framework")
copy(framework_lib DEPS framework_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/framework/framework.pb.h
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
)
......@@ -69,7 +67,7 @@ copy(memory_lib
set(module "inference")
copy(inference_lib DEPENDS paddle_fluid_shared
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/inference/libpaddle_fluid.so
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.so
DSTS ${dst_dir}/${module} ${dst_dir}/${module}
)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
......@@ -8,7 +8,7 @@ data_feeder
DataFeeder
----------
.. autoclass:: paddle.v2.fluid.data_feeder.DataFeeder
.. autoclass:: paddle.fluid.data_feeder.DataFeeder
:members:
:noindex:
......@@ -8,14 +8,14 @@ evaluator
Accuracy
--------
.. autoclass:: paddle.v2.fluid.evaluator.Accuracy
.. autoclass:: paddle.fluid.evaluator.Accuracy
:members:
:noindex:
ChunkEvaluator
--------------
.. autoclass:: paddle.v2.fluid.evaluator.ChunkEvaluator
.. autoclass:: paddle.fluid.evaluator.ChunkEvaluator
:members:
:noindex:
......@@ -8,25 +8,25 @@ executor
Executor
--------
.. autoclass:: paddle.v2.fluid.executor.Executor
.. autoclass:: paddle.fluid.executor.Executor
:members:
:noindex:
global_scope
------------
.. autofunction:: paddle.v2.fluid.executor.global_scope
.. autofunction:: paddle.fluid.executor.global_scope
:noindex:
scope_guard
-----------
.. autofunction:: paddle.v2.fluid.executor.scope_guard
.. autofunction:: paddle.fluid.executor.scope_guard
:noindex:
switch_scope
------------
.. autofunction:: paddle.v2.fluid.executor.switch_scope
.. autofunction:: paddle.fluid.executor.switch_scope
:noindex:
......@@ -17,7 +17,7 @@ import argparse
import sys
import types
import paddle.v2.fluid as fluid
import paddle.fluid as fluid
def parse_arg():
......@@ -70,7 +70,7 @@ class DocGenerator(object):
def print_class(self, name):
self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autoclass:: paddle.v2.fluid.{0}.{1}
self.stream.write('''.. autoclass:: paddle.fluid.{0}.{1}
:members:
:noindex:
......@@ -78,7 +78,7 @@ class DocGenerator(object):
def print_method(self, name):
self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autofunction:: paddle.v2.fluid.{0}.{1}
self.stream.write('''.. autofunction:: paddle.fluid.{0}.{1}
:noindex:
'''.format(self.module_name, name))
......
======================
Fluid
======================
.. toctree::
:maxdepth: 1
layers.rst
data_feeder.rst
executor.rst
initializer.rst
evaluator.rst
nets.rst
optimizer.rst
param_attr.rst
profiler.rst
regularizer.rst
io.rst
......@@ -8,28 +8,28 @@ initializer
Constant
--------
.. autoclass:: paddle.v2.fluid.initializer.Constant
.. autoclass:: paddle.fluid.initializer.Constant
:members:
:noindex:
Uniform
-------
.. autoclass:: paddle.v2.fluid.initializer.Uniform
.. autoclass:: paddle.fluid.initializer.Uniform
:members:
:noindex:
Normal
------
.. autoclass:: paddle.v2.fluid.initializer.Normal
.. autoclass:: paddle.fluid.initializer.Normal
:members:
:noindex:
Xavier
------
.. autoclass:: paddle.v2.fluid.initializer.Xavier
.. autoclass:: paddle.fluid.initializer.Xavier
:members:
:noindex:
......@@ -8,54 +8,54 @@ io
save_vars
---------
.. autofunction:: paddle.v2.fluid.io.save_vars
.. autofunction:: paddle.fluid.io.save_vars
:noindex:
save_params
-----------
.. autofunction:: paddle.v2.fluid.io.save_params
.. autofunction:: paddle.fluid.io.save_params
:noindex:
save_persistables
-----------------
.. autofunction:: paddle.v2.fluid.io.save_persistables
.. autofunction:: paddle.fluid.io.save_persistables
:noindex:
load_vars
---------
.. autofunction:: paddle.v2.fluid.io.load_vars
.. autofunction:: paddle.fluid.io.load_vars
:noindex:
load_params
-----------
.. autofunction:: paddle.v2.fluid.io.load_params
.. autofunction:: paddle.fluid.io.load_params
:noindex:
load_persistables
-----------------
.. autofunction:: paddle.v2.fluid.io.load_persistables
.. autofunction:: paddle.fluid.io.load_persistables
:noindex:
save_inference_model
--------------------
.. autofunction:: paddle.v2.fluid.io.save_inference_model
.. autofunction:: paddle.fluid.io.save_inference_model
:noindex:
load_inference_model
--------------------
.. autofunction:: paddle.v2.fluid.io.load_inference_model
.. autofunction:: paddle.fluid.io.load_inference_model
:noindex:
get_inference_program
---------------------
.. autofunction:: paddle.v2.fluid.io.get_inference_program
.. autofunction:: paddle.fluid.io.get_inference_program
:noindex:
......@@ -11,167 +11,167 @@ control_flow
split_lod_tensor
----------------
.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor
.. autofunction:: paddle.fluid.layers.split_lod_tensor
:noindex:
merge_lod_tensor
----------------
.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor
.. autofunction:: paddle.fluid.layers.merge_lod_tensor
:noindex:
BlockGuard
----------
.. autoclass:: paddle.v2.fluid.layers.BlockGuard
.. autoclass:: paddle.fluid.layers.BlockGuard
:members:
:noindex:
BlockGuardWithCompletion
------------------------
.. autoclass:: paddle.v2.fluid.layers.BlockGuardWithCompletion
.. autoclass:: paddle.fluid.layers.BlockGuardWithCompletion
:members:
:noindex:
StaticRNNMemoryLink
-------------------
.. autoclass:: paddle.v2.fluid.layers.StaticRNNMemoryLink
.. autoclass:: paddle.fluid.layers.StaticRNNMemoryLink
:members:
:noindex:
WhileGuard
----------
.. autoclass:: paddle.v2.fluid.layers.WhileGuard
.. autoclass:: paddle.fluid.layers.WhileGuard
:members:
:noindex:
While
-----
.. autoclass:: paddle.v2.fluid.layers.While
.. autoclass:: paddle.fluid.layers.While
:members:
:noindex:
lod_rank_table
--------------
.. autofunction:: paddle.v2.fluid.layers.lod_rank_table
.. autofunction:: paddle.fluid.layers.lod_rank_table
:noindex:
max_sequence_len
----------------
.. autofunction:: paddle.v2.fluid.layers.max_sequence_len
.. autofunction:: paddle.fluid.layers.max_sequence_len
:noindex:
topk
----
.. autofunction:: paddle.v2.fluid.layers.topk
.. autofunction:: paddle.fluid.layers.topk
:noindex:
lod_tensor_to_array
-------------------
.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array
.. autofunction:: paddle.fluid.layers.lod_tensor_to_array
:noindex:
array_to_lod_tensor
-------------------
.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor
.. autofunction:: paddle.fluid.layers.array_to_lod_tensor
:noindex:
increment
---------
.. autofunction:: paddle.v2.fluid.layers.increment
.. autofunction:: paddle.fluid.layers.increment
:noindex:
array_write
-----------
.. autofunction:: paddle.v2.fluid.layers.array_write
.. autofunction:: paddle.fluid.layers.array_write
:noindex:
create_array
------------
.. autofunction:: paddle.v2.fluid.layers.create_array
.. autofunction:: paddle.fluid.layers.create_array
:noindex:
less_than
---------
.. autofunction:: paddle.v2.fluid.layers.less_than
.. autofunction:: paddle.fluid.layers.less_than
:noindex:
array_read
----------
.. autofunction:: paddle.v2.fluid.layers.array_read
.. autofunction:: paddle.fluid.layers.array_read
:noindex:
shrink_memory
-------------
.. autofunction:: paddle.v2.fluid.layers.shrink_memory
.. autofunction:: paddle.fluid.layers.shrink_memory
:noindex:
array_length
------------
.. autofunction:: paddle.v2.fluid.layers.array_length
.. autofunction:: paddle.fluid.layers.array_length
:noindex:
IfElse
------
.. autoclass:: paddle.v2.fluid.layers.IfElse
.. autoclass:: paddle.fluid.layers.IfElse
:members:
:noindex:
DynamicRNN
----------
.. autoclass:: paddle.v2.fluid.layers.DynamicRNN
.. autoclass:: paddle.fluid.layers.DynamicRNN
:members:
:noindex:
ConditionalBlock
----------------
.. autoclass:: paddle.v2.fluid.layers.ConditionalBlock
.. autoclass:: paddle.fluid.layers.ConditionalBlock
:members:
:noindex:
StaticRNN
---------
.. autoclass:: paddle.v2.fluid.layers.StaticRNN
.. autoclass:: paddle.fluid.layers.StaticRNN
:members:
:noindex:
reorder_lod_tensor_by_rank
--------------------------
.. autofunction:: paddle.v2.fluid.layers.reorder_lod_tensor_by_rank
.. autofunction:: paddle.fluid.layers.reorder_lod_tensor_by_rank
:noindex:
ParallelDo
----------
.. autoclass:: paddle.v2.fluid.layers.ParallelDo
.. autoclass:: paddle.fluid.layers.ParallelDo
:members:
:noindex:
Print
-----
.. autofunction:: paddle.v2.fluid.layers.Print
.. autofunction:: paddle.fluid.layers.Print
:noindex:
device
......@@ -180,7 +180,7 @@ device
get_places
----------
.. autofunction:: paddle.v2.fluid.layers.get_places
.. autofunction:: paddle.fluid.layers.get_places
:noindex:
io
......@@ -189,27 +189,27 @@ io
data
----
.. autofunction:: paddle.v2.fluid.layers.data
.. autofunction:: paddle.fluid.layers.data
:noindex:
BlockGuardServ
--------------
.. autoclass:: paddle.v2.fluid.layers.BlockGuardServ
.. autoclass:: paddle.fluid.layers.BlockGuardServ
:members:
:noindex:
ListenAndServ
-------------
.. autoclass:: paddle.v2.fluid.layers.ListenAndServ
.. autoclass:: paddle.fluid.layers.ListenAndServ
:members:
:noindex:
Send
----
.. autofunction:: paddle.v2.fluid.layers.Send
.. autofunction:: paddle.fluid.layers.Send
:noindex:
nn
......@@ -218,259 +218,259 @@ nn
fc
--
.. autofunction:: paddle.v2.fluid.layers.fc
.. autofunction:: paddle.fluid.layers.fc
:noindex:
embedding
---------
.. autofunction:: paddle.v2.fluid.layers.embedding
.. autofunction:: paddle.fluid.layers.embedding
:noindex:
dynamic_lstm
------------
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm
.. autofunction:: paddle.fluid.layers.dynamic_lstm
:noindex:
dynamic_lstmp
-------------
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp
.. autofunction:: paddle.fluid.layers.dynamic_lstmp
:noindex:
dynamic_gru
-----------
.. autofunction:: paddle.v2.fluid.layers.dynamic_gru
.. autofunction:: paddle.fluid.layers.dynamic_gru
:noindex:
gru_unit
--------
.. autofunction:: paddle.v2.fluid.layers.gru_unit
.. autofunction:: paddle.fluid.layers.gru_unit
:noindex:
linear_chain_crf
----------------
.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf
.. autofunction:: paddle.fluid.layers.linear_chain_crf
:noindex:
crf_decoding
------------
.. autofunction:: paddle.v2.fluid.layers.crf_decoding
.. autofunction:: paddle.fluid.layers.crf_decoding
:noindex:
cos_sim
-------
.. autofunction:: paddle.v2.fluid.layers.cos_sim
.. autofunction:: paddle.fluid.layers.cos_sim
:noindex:
cross_entropy
-------------
.. autofunction:: paddle.v2.fluid.layers.cross_entropy
.. autofunction:: paddle.fluid.layers.cross_entropy
:noindex:
square_error_cost
-----------------
.. autofunction:: paddle.v2.fluid.layers.square_error_cost
.. autofunction:: paddle.fluid.layers.square_error_cost
:noindex:
accuracy
--------
.. autofunction:: paddle.v2.fluid.layers.accuracy
.. autofunction:: paddle.fluid.layers.accuracy
:noindex:
chunk_eval
----------
.. autofunction:: paddle.v2.fluid.layers.chunk_eval
.. autofunction:: paddle.fluid.layers.chunk_eval
:noindex:
sequence_conv
-------------
.. autofunction:: paddle.v2.fluid.layers.sequence_conv
.. autofunction:: paddle.fluid.layers.sequence_conv
:noindex:
conv2d
------
.. autofunction:: paddle.v2.fluid.layers.conv2d
.. autofunction:: paddle.fluid.layers.conv2d
:noindex:
sequence_pool
-------------
.. autofunction:: paddle.v2.fluid.layers.sequence_pool
.. autofunction:: paddle.fluid.layers.sequence_pool
:noindex:
pool2d
------
.. autofunction:: paddle.v2.fluid.layers.pool2d
.. autofunction:: paddle.fluid.layers.pool2d
:noindex:
batch_norm
----------
.. autofunction:: paddle.v2.fluid.layers.batch_norm
.. autofunction:: paddle.fluid.layers.batch_norm
:noindex:
layer_norm
----------
.. autofunction:: paddle.v2.fluid.layers.layer_norm
.. autofunction:: paddle.fluid.layers.layer_norm
:noindex:
beam_search_decode
------------------
.. autofunction:: paddle.v2.fluid.layers.beam_search_decode
.. autofunction:: paddle.fluid.layers.beam_search_decode
:noindex:
conv2d_transpose
----------------
.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
.. autofunction:: paddle.fluid.layers.conv2d_transpose
:noindex:
sequence_expand
---------------
.. autofunction:: paddle.v2.fluid.layers.sequence_expand
.. autofunction:: paddle.fluid.layers.sequence_expand
:noindex:
lstm_unit
---------
.. autofunction:: paddle.v2.fluid.layers.lstm_unit
.. autofunction:: paddle.fluid.layers.lstm_unit
:noindex:
reduce_sum
----------
.. autofunction:: paddle.v2.fluid.layers.reduce_sum
.. autofunction:: paddle.fluid.layers.reduce_sum
:noindex:
reduce_mean
-----------
.. autofunction:: paddle.v2.fluid.layers.reduce_mean
.. autofunction:: paddle.fluid.layers.reduce_mean
:noindex:
reduce_max
----------
.. autofunction:: paddle.v2.fluid.layers.reduce_max
.. autofunction:: paddle.fluid.layers.reduce_max
:noindex:
reduce_min
----------
.. autofunction:: paddle.v2.fluid.layers.reduce_min
.. autofunction:: paddle.fluid.layers.reduce_min
:noindex:
sequence_first_step
-------------------
.. autofunction:: paddle.v2.fluid.layers.sequence_first_step
.. autofunction:: paddle.fluid.layers.sequence_first_step
:noindex:
sequence_last_step
------------------
.. autofunction:: paddle.v2.fluid.layers.sequence_last_step
.. autofunction:: paddle.fluid.layers.sequence_last_step
:noindex:
dropout
-------
.. autofunction:: paddle.v2.fluid.layers.dropout
.. autofunction:: paddle.fluid.layers.dropout
:noindex:
split
-----
.. autofunction:: paddle.v2.fluid.layers.split
.. autofunction:: paddle.fluid.layers.split
:noindex:
ctc_greedy_decoder
------------------
.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder
.. autofunction:: paddle.fluid.layers.ctc_greedy_decoder
:noindex:
edit_distance
-------------
.. autofunction:: paddle.v2.fluid.layers.edit_distance
.. autofunction:: paddle.fluid.layers.edit_distance
:noindex:
l2_normalize
------------
.. autofunction:: paddle.v2.fluid.layers.l2_normalize
.. autofunction:: paddle.fluid.layers.l2_normalize
:noindex:
matmul
------
.. autofunction:: paddle.v2.fluid.layers.matmul
.. autofunction:: paddle.fluid.layers.matmul
:noindex:
warpctc
-------
.. autofunction:: paddle.v2.fluid.layers.warpctc
.. autofunction:: paddle.fluid.layers.warpctc
:noindex:
sequence_reshape
----------------
.. autofunction:: paddle.v2.fluid.layers.sequence_reshape
.. autofunction:: paddle.fluid.layers.sequence_reshape
:noindex:
transpose
---------
.. autofunction:: paddle.v2.fluid.layers.transpose
.. autofunction:: paddle.fluid.layers.transpose
:noindex:
im2sequence
-----------
.. autofunction:: paddle.v2.fluid.layers.im2sequence
.. autofunction:: paddle.fluid.layers.im2sequence
:noindex:
nce
---
.. autofunction:: paddle.v2.fluid.layers.nce
.. autofunction:: paddle.fluid.layers.nce
:noindex:
beam_search
-----------
.. autofunction:: paddle.v2.fluid.layers.beam_search
.. autofunction:: paddle.fluid.layers.beam_search
:noindex:
row_conv
--------
.. autofunction:: paddle.v2.fluid.layers.row_conv
.. autofunction:: paddle.fluid.layers.row_conv
:noindex:
multiplex
---------
.. autofunction:: paddle.v2.fluid.layers.multiplex
.. autofunction:: paddle.fluid.layers.multiplex
:noindex:
ops
......@@ -479,259 +479,259 @@ ops
mean
----
.. autofunction:: paddle.v2.fluid.layers.mean
.. autofunction:: paddle.fluid.layers.mean
:noindex:
mul
---
.. autofunction:: paddle.v2.fluid.layers.mul
.. autofunction:: paddle.fluid.layers.mul
:noindex:
reshape
-------
.. autofunction:: paddle.v2.fluid.layers.reshape
.. autofunction:: paddle.fluid.layers.reshape
:noindex:
scale
-----
.. autofunction:: paddle.v2.fluid.layers.scale
.. autofunction:: paddle.fluid.layers.scale
:noindex:
sigmoid_cross_entropy_with_logits
---------------------------------
.. autofunction:: paddle.v2.fluid.layers.sigmoid_cross_entropy_with_logits
.. autofunction:: paddle.fluid.layers.sigmoid_cross_entropy_with_logits
:noindex:
elementwise_add
---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_add
.. autofunction:: paddle.fluid.layers.elementwise_add
:noindex:
elementwise_div
---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_div
.. autofunction:: paddle.fluid.layers.elementwise_div
:noindex:
elementwise_sub
---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_sub
.. autofunction:: paddle.fluid.layers.elementwise_sub
:noindex:
elementwise_mul
---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_mul
.. autofunction:: paddle.fluid.layers.elementwise_mul
:noindex:
elementwise_max
---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_max
.. autofunction:: paddle.fluid.layers.elementwise_max
:noindex:
elementwise_min
---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_min
.. autofunction:: paddle.fluid.layers.elementwise_min
:noindex:
elementwise_pow
---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_pow
.. autofunction:: paddle.fluid.layers.elementwise_pow
:noindex:
clip
----
.. autofunction:: paddle.v2.fluid.layers.clip
.. autofunction:: paddle.fluid.layers.clip
:noindex:
clip_by_norm
------------
.. autofunction:: paddle.v2.fluid.layers.clip_by_norm
.. autofunction:: paddle.fluid.layers.clip_by_norm
:noindex:
sequence_softmax
----------------
.. autofunction:: paddle.v2.fluid.layers.sequence_softmax
.. autofunction:: paddle.fluid.layers.sequence_softmax
:noindex:
sigmoid
-------
.. autofunction:: paddle.v2.fluid.layers.sigmoid
.. autofunction:: paddle.fluid.layers.sigmoid
:noindex:
logsigmoid
----------
.. autofunction:: paddle.v2.fluid.layers.logsigmoid
.. autofunction:: paddle.fluid.layers.logsigmoid
:noindex:
exp
---
.. autofunction:: paddle.v2.fluid.layers.exp
.. autofunction:: paddle.fluid.layers.exp
:noindex:
relu
----
.. autofunction:: paddle.v2.fluid.layers.relu
.. autofunction:: paddle.fluid.layers.relu
:noindex:
tanh
----
.. autofunction:: paddle.v2.fluid.layers.tanh
.. autofunction:: paddle.fluid.layers.tanh
:noindex:
tanh_shrink
-----------
.. autofunction:: paddle.v2.fluid.layers.tanh_shrink
.. autofunction:: paddle.fluid.layers.tanh_shrink
:noindex:
softshrink
----------
.. autofunction:: paddle.v2.fluid.layers.softshrink
.. autofunction:: paddle.fluid.layers.softshrink
:noindex:
sqrt
----
.. autofunction:: paddle.v2.fluid.layers.sqrt
.. autofunction:: paddle.fluid.layers.sqrt
:noindex:
abs
---
.. autofunction:: paddle.v2.fluid.layers.abs
.. autofunction:: paddle.fluid.layers.abs
:noindex:
ceil
----
.. autofunction:: paddle.v2.fluid.layers.ceil
.. autofunction:: paddle.fluid.layers.ceil
:noindex:
floor
-----
.. autofunction:: paddle.v2.fluid.layers.floor
.. autofunction:: paddle.fluid.layers.floor
:noindex:
round
-----
.. autofunction:: paddle.v2.fluid.layers.round
.. autofunction:: paddle.fluid.layers.round
:noindex:
reciprocal
----------
.. autofunction:: paddle.v2.fluid.layers.reciprocal
.. autofunction:: paddle.fluid.layers.reciprocal
:noindex:
log
---
.. autofunction:: paddle.v2.fluid.layers.log
.. autofunction:: paddle.fluid.layers.log
:noindex:
square
------
.. autofunction:: paddle.v2.fluid.layers.square
.. autofunction:: paddle.fluid.layers.square
:noindex:
softplus
--------
.. autofunction:: paddle.v2.fluid.layers.softplus
.. autofunction:: paddle.fluid.layers.softplus
:noindex:
softsign
--------
.. autofunction:: paddle.v2.fluid.layers.softsign
.. autofunction:: paddle.fluid.layers.softsign
:noindex:
brelu
-----
.. autofunction:: paddle.v2.fluid.layers.brelu
.. autofunction:: paddle.fluid.layers.brelu
:noindex:
leaky_relu
----------
.. autofunction:: paddle.v2.fluid.layers.leaky_relu
.. autofunction:: paddle.fluid.layers.leaky_relu
:noindex:
soft_relu
---------
.. autofunction:: paddle.v2.fluid.layers.soft_relu
.. autofunction:: paddle.fluid.layers.soft_relu
:noindex:
elu
---
.. autofunction:: paddle.v2.fluid.layers.elu
.. autofunction:: paddle.fluid.layers.elu
:noindex:
relu6
-----
.. autofunction:: paddle.v2.fluid.layers.relu6
.. autofunction:: paddle.fluid.layers.relu6
:noindex:
pow
---
.. autofunction:: paddle.v2.fluid.layers.pow
.. autofunction:: paddle.fluid.layers.pow
:noindex:
stanh
-----
.. autofunction:: paddle.v2.fluid.layers.stanh
.. autofunction:: paddle.fluid.layers.stanh
:noindex:
hard_shrink
-----------
.. autofunction:: paddle.v2.fluid.layers.hard_shrink
.. autofunction:: paddle.fluid.layers.hard_shrink
:noindex:
thresholded_relu
----------------
.. autofunction:: paddle.v2.fluid.layers.thresholded_relu
.. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex:
hard_sigmoid
------------
.. autofunction:: paddle.v2.fluid.layers.hard_sigmoid
.. autofunction:: paddle.fluid.layers.hard_sigmoid
:noindex:
swish
-----
.. autofunction:: paddle.v2.fluid.layers.swish
.. autofunction:: paddle.fluid.layers.swish
:noindex:
tensor
......@@ -740,66 +740,66 @@ tensor
create_tensor
-------------
.. autofunction:: paddle.v2.fluid.layers.create_tensor
.. autofunction:: paddle.fluid.layers.create_tensor
:noindex:
create_parameter
----------------
.. autofunction:: paddle.v2.fluid.layers.create_parameter
.. autofunction:: paddle.fluid.layers.create_parameter
:noindex:
create_global_var
-----------------
.. autofunction:: paddle.v2.fluid.layers.create_global_var
.. autofunction:: paddle.fluid.layers.create_global_var
:noindex:
cast
----
.. autofunction:: paddle.v2.fluid.layers.cast
.. autofunction:: paddle.fluid.layers.cast
:noindex:
concat
------
.. autofunction:: paddle.v2.fluid.layers.concat
.. autofunction:: paddle.fluid.layers.concat
:noindex:
sums
----
.. autofunction:: paddle.v2.fluid.layers.sums
.. autofunction:: paddle.fluid.layers.sums
:noindex:
assign
------
.. autofunction:: paddle.v2.fluid.layers.assign
.. autofunction:: paddle.fluid.layers.assign
:noindex:
fill_constant_batch_size_like
-----------------------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like
.. autofunction:: paddle.fluid.layers.fill_constant_batch_size_like
:noindex:
fill_constant
-------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant
.. autofunction:: paddle.fluid.layers.fill_constant
:noindex:
ones
----
.. autofunction:: paddle.v2.fluid.layers.ones
.. autofunction:: paddle.fluid.layers.ones
:noindex:
zeros
-----
.. autofunction:: paddle.v2.fluid.layers.zeros
.. autofunction:: paddle.fluid.layers.zeros
:noindex:
......@@ -8,24 +8,24 @@ nets
simple_img_conv_pool
--------------------
.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool
.. autofunction:: paddle.fluid.nets.simple_img_conv_pool
:noindex:
sequence_conv_pool
------------------
.. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool
.. autofunction:: paddle.fluid.nets.sequence_conv_pool
:noindex:
glu
---
.. autofunction:: paddle.v2.fluid.nets.glu
.. autofunction:: paddle.fluid.nets.glu
:noindex:
scaled_dot_product_attention
----------------------------
.. autofunction:: paddle.v2.fluid.nets.scaled_dot_product_attention
.. autofunction:: paddle.fluid.nets.scaled_dot_product_attention
:noindex:
......@@ -8,42 +8,42 @@ optimizer
SGD
---
.. autoclass:: paddle.v2.fluid.optimizer.SGD
.. autoclass:: paddle.fluid.optimizer.SGD
:members:
:noindex:
Momentum
--------
.. autoclass:: paddle.v2.fluid.optimizer.Momentum
.. autoclass:: paddle.fluid.optimizer.Momentum
:members:
:noindex:
Adagrad
-------
.. autoclass:: paddle.v2.fluid.optimizer.Adagrad
.. autoclass:: paddle.fluid.optimizer.Adagrad
:members:
:noindex:
Adam
----
.. autoclass:: paddle.v2.fluid.optimizer.Adam
.. autoclass:: paddle.fluid.optimizer.Adam
:members:
:noindex:
Adamax
------
.. autoclass:: paddle.v2.fluid.optimizer.Adamax
.. autoclass:: paddle.fluid.optimizer.Adamax
:members:
:noindex:
DecayedAdagrad
--------------
.. autoclass:: paddle.v2.fluid.optimizer.DecayedAdagrad
.. autoclass:: paddle.fluid.optimizer.DecayedAdagrad
:members:
:noindex:
......@@ -8,14 +8,14 @@ param_attr
ParamAttr
---------
.. autoclass:: paddle.v2.fluid.param_attr.ParamAttr
.. autoclass:: paddle.fluid.param_attr.ParamAttr
:members:
:noindex:
WeightNormParamAttr
-------------------
.. autoclass:: paddle.v2.fluid.param_attr.WeightNormParamAttr
.. autoclass:: paddle.fluid.param_attr.WeightNormParamAttr
:members:
:noindex:
......@@ -8,18 +8,18 @@ profiler
cuda_profiler
-------------
.. autofunction:: paddle.v2.fluid.profiler.cuda_profiler
.. autofunction:: paddle.fluid.profiler.cuda_profiler
:noindex:
reset_profiler
--------------
.. autofunction:: paddle.v2.fluid.profiler.reset_profiler
.. autofunction:: paddle.fluid.profiler.reset_profiler
:noindex:
profiler
--------
.. autofunction:: paddle.v2.fluid.profiler.profiler
.. autofunction:: paddle.fluid.profiler.profiler
:noindex:
......@@ -8,20 +8,20 @@ regularizer
append_regularization_ops
-------------------------
.. autofunction:: paddle.v2.fluid.regularizer.append_regularization_ops
.. autofunction:: paddle.fluid.regularizer.append_regularization_ops
:noindex:
L1Decay
-------
.. autoclass:: paddle.v2.fluid.regularizer.L1Decay
.. autoclass:: paddle.fluid.regularizer.L1Decay
:members:
:noindex:
L2Decay
-------
.. autoclass:: paddle.v2.fluid.regularizer.L2Decay
.. autoclass:: paddle.fluid.regularizer.L2Decay
:members:
:noindex:
API
===
.. toctree::
:maxdepth: 1
模型配置 <v2/model_configs.rst>
数据访问 <v2/data.rst>
训练与应用 <v2/run_logic.rst>
v2/fluid.rst
......@@ -4,7 +4,8 @@ API
.. toctree::
:maxdepth: 1
overview.rst
v2/model_configs.rst
v2/data.rst
v2/run_logic.rst
v2/fluid.rst
fluid/index.rst
V2 API Overview
================
The PaddlePaddle V2 API is designed to provide a modern user interface for PaddlePaddle V1(the original layer-based platform of PaddlePaddle),
it proposes some high-level concepts such as `Layers <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/layer.html>`_ , `Optimizer <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/optimizer.html>`_ , `Evaluator <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/evaluators.html>`_ and `Data Reader <http://www.paddlepaddle.org/docs/develop/api/en/v2/data/data_reader.html>`_ to make the model configuration more familiar to users.
A model is composed of the computation described by a group of `Layers`, with `Evaluator` to define the error, `Optimizer` to update the parameters and `Data Reader` to feed in the data.
We also provide the `interface for Training and Inference <http://www.paddlepaddle.org/docs/develop/api/en/v2/run_logic.html>`_ to help control the training and inference phrase,
it has several easy to use methods
- `paddle.train`
- `paddle.test`
- `paddle.infer`
to better expose the internal running details, different `events <http://www.paddlepaddle.org/docs/develop/api/en/v2/run_logic.html#event>`_ are available to users by writing some callbacks.
======================
Fluid
======================
.. toctree::
:maxdepth: 1
fluid/layers.rst
fluid/data_feeder.rst
fluid/executor.rst
fluid/initializer.rst
fluid/evaluator.rst
fluid/nets.rst
fluid/optimizer.rst
fluid/param_attr.rst
fluid/profiler.rst
fluid/regularizer.rst
fluid/io.rst
# 用Docker编译和测试PaddlePaddle
## 需要的软硬件
为了开发PaddlePaddle,我们需要
1. 一台电脑,可以装的是 Linux, BSD, Windows 或者 MacOS 操作系统,以及
1. Docker。
不需要依赖其他任何软件了。即便是 Python 和 GCC 都不需要,因为我们会把所有编译工具都安装进一个 Docker image 里。
## 总体流程
1. 获取源码
```bash
git clone https://github.com/paddlepaddle/paddle
```
2. 安装开发工具到 Docker image 里
```bash
cd paddle; docker build -t paddle:dev .
```
请注意这个命令结尾处的 `.`;它表示 `docker build` 应该读取当前目录下的 [`Dockerfile`文件](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile),按照其内容创建一个名为 `paddle:dev` 的 Docker image,并且把各种开发工具安装进去。
3. 编译
以下命令启动一个 Docker container 来执行 `paddle:dev` 这个 Docker image,同时把当前目录(源码树根目录)映射为 container 里的 `/paddle` 目录,并且运行 `Dockerfile` 描述的默认入口程序 [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `cmake``make` 来编译 `/paddle` 里的源码,结果输出到 `/paddle/build`,也就是本地的源码树根目录里的 `build` 子目录。
```bash
docker run --rm -v $PWD:/paddle paddle:dev
```
上述命令编译出一个 CUDA-enabled 版本。如果我们只需要编译一个只支持 CPU 的版本,可以用
```bash
docker run --rm -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
```
4. 运行单元测试
用本机的第一个 GPU 来运行包括 GPU 单元测试在内的所有单元测试:
```bash
NV_GPU=0 nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
```
如果编译的时候我们用了 `WITH_GPU=OFF` 选项,那么编译过程只会产生 CPU-based 单元测试,那么我们也就不需要 nvidia-docker 来运行单元测试了。我们只需要:
```bash
docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
```
有时候我们只想运行一个特定的单元测试,比如 `memory_test`,我们可以
```bash
nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
```
5. 清理
有时候我们会希望清理掉已经下载的第三方依赖以及已经编译的二进制文件。此时只需要:
```bash
rm -rf build
```
## 为什么要 Docker 呀?
- 什么是 Docker?
如果您没有听说 Docker,可以把它想象为一个类似 virtualenv 的系统,但是虚拟的不仅仅是 Python 的运行环境。
- Docker 还是虚拟机?
有人用虚拟机来类比 Docker。需要强调的是:Docker 不会虚拟任何硬件,Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的,性能和把编译工具安装在本机运行一样。
- 为什么用 Docker?
把工具和配置都安装在一个 Docker image 里可以标准化编译环境。这样如果遇到问题,其他人可以复现问题以便帮助。
另外,对于习惯使用Windows和MacOS的开发者来说,使用Docker就不用配置交叉编译环境了。
- 我可以选择不用Docker吗?
当然可以。大家可以用把开发工具安装进入 Docker image 一样的方式,把这些工具安装到本机。这篇文档介绍基于 Docker 的开发流程,是因为这个流程比其他方法都更简便。
- 学习 Docker 有多难?
理解 Docker 并不难,大概花十分钟看一下[这篇文章](https://zhuanlan.zhihu.com/p/19902938)。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
- 我可以用 IDE 吗?
当然可以,因为源码就在本机上。IDE 默认调用 make 之类的程序来编译源码,我们只需要配置 IDE 来调用 Docker 命令编译源码即可。
很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行
```emacs
(global-set-key "\C-cc" 'compile)
(setq compile-command
"docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
```
就可以按 `Ctrl-C``c` 键来启动编译了。
- 可以并行编译吗?
是的。我们的 Docker image 运行一个 [Bash 脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
## 可能碰到的问题
- Docker 需要 sudo
如果用自己的电脑开发,自然也就有管理员权限(sudo)了。如果用公用的电脑开发,需要请管理员安装和配置好 Docker。此外,PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术,比如 rkt。
- 在 Windows/MacOS 上编译很慢
Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)
- 磁盘不够
本文中的例子里,`docker run` 命令里都用了 `--rm` 参数,这样保证运行结束之后的 containers 不会保留在磁盘上。可以用 `docker ps -a` 命令看到停止后但是没有删除的 containers。`docker build` 命令有时候会产生一些中间结果,是没有名字的 images,也会占用磁盘。可以参考[这篇文章](https://zaiste.net/posts/removing_docker_containers/)来清理这些内容。
# Build using Docker
## What Developers Need
To contribute to PaddlePaddle, you need
1. A computer -- Linux, BSD, Windows, MacOS, and
1. Docker.
Nothing else. Not even Python and GCC, because you can install all build tools into a Docker image. We run all the tools by running this image.
## General Process
1. Retrieve source code.
```bash
git clone https://github.com/paddlepaddle/paddle
```
2. Install build tools into a Docker image.
```bash
cd paddle; docker build -t paddle:dev .
```
Please be aware of the `.` at the end of the command, which refers to the [`./Dockerfile` file](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile). `docker build` follows instructions in this file to create a Docker image named `paddle:dev`, and installs building tools into it.
3. Build from source.
This following command starts a Docker container that executes the Docker image `paddle:dev`, mapping the current directory to `/paddle/` in the container, and runs the default entry-point [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh) as specified in the Dockefile. `build.sh` invokes `cmake` and `make` to build PaddlePaddle source code, which had been mapped to `/paddle`, and writes outputs to `/paddle/build`, which maps to `build` in the current source directory on the computer.
```bash
docker run -v $PWD:/paddle paddle:dev
```
Above command builds a CUDA-enabled version. If we want to build a CPU-only version, we can type
```bash
docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev
```
4. Run unit tests.
To run all unit tests using the first GPU of a node:
```bash
NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
```
If we used `WITH_GPU=OFF` at build time, it generates only CPU-based unit tests, and we don't need nvidia-docker to run them. We can just run
```bash
docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest"
```
Sometimes we want to run a specific unit test, say `memory_test`, we can run
```bash
nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test"
```
5. Clean Build.
Sometimes, we might want to clean all thirt-party dependents and built binaries. To do so, just
```bash
rm -rf build
```
## Docker, Or Not?
- What is Docker?
If you haven't heard of it, consider it something like Python's virtualenv.
- Docker or virtual machine?
Some people compare Docker with VMs, but Docker doesn't virtualize any hardware nor running a guest OS, which means there is no compromise on the performance.
- Why Docker?
Using a Docker image of build tools standardizes the building environment, which makes it easier for others to reproduce your problems and to help.
Also, some build tools don't run on Windows or Mac or BSD, but Docker runs almost everywhere, so developers can use whatever computer they want.
- Can I choose not to use Docker?
Sure, you don't have to install build tools into a Docker image; instead, you can install them in your local computer. This document exists because Docker would make the development way easier.
- How difficult is it to learn Docker?
It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, especially when new versions of PaddlePaddle require some new tools. Not even to mention the time saved when other people trying to reproduce the issue you have.
- Can I use my favorite IDE?
Yes, of course. The source code resides on your local computer, and you can edit it using whatever editor you like.
Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file:
```emacs
(global-set-key "\C-cc" 'compile)
(setq compile-command
"docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
```
so they could type `Ctrl-C` and `c` to build PaddlePaddle from source.
- Does Docker do parallel building?
Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your CPU cores.
## Some Gotchas
- Docker requires sudo
An owner of a computer has the administrative privilege, a.k.a., sudo, and Docker requires this privilege to work properly. If you use a shared computer for development, please ask the administrator to install and configure Docker. We will do our best to support rkt, another container technology that doesn't require sudo.
- Docker on Windows/MacOS builds slowly
On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details.
- Not enough disk space
Examples in this article uses option `--rm` with the `docker run` command. This option ensures that stopped containers do not exist on hard disks. We can use `docker ps -a` to list all containers, including stopped. Sometimes `docker build` generates some intermediate dangling images, which also take disk space. To clean them, please refer to [this article](https://zaiste.net/posts/removing_docker_containers/).
从源码编译
======================
.. _requirements:
需要的软硬件
----------------
为了编译PaddlePaddle,我们需要
1. 一台电脑,可以装的是 Linux, Windows 或者 MacOS 操作系统
1. Docker
不需要依赖其他任何软件了。即便是 Python 和 GCC 都不需要,因为我们会把所有编译工具都安装进一个 Docker 镜像里。
.. _build_step:
编译方法
----------------
PaddlePaddle主要使用 `CMake <https://cmake.org>`_ 以及GCC, G++作为编译工具。
我们推荐您使用PaddlePaddle Docker编译环境镜像完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像
可以在 `这里 <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ 找到
PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像
可以在 `这里 <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ 找到。或者
参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像
如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。
......@@ -16,15 +28,19 @@ PaddlePaddle主要使用 `CMake <https://cmake.org>`_ 以及GCC, G++作为编译
.. code-block:: bash
# 1. 获取源码
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
# 如果使用Docker编译环境,执行下面的命令编译CPU-Only的二进制
# 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像
docker build -t paddle:dev .
# 3. 执行下面的命令编译CPU-Only的二进制
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh
# 如果不使用Docker编译环境,执行下面的命令
mkdir build
cd build
cmake -DWITH_GPU=OFF -DWITH_TESTING=OFF ..
make
# 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步)
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。如果使用自行
构建的镜像(上述第4步)会执行 :code:`Dockerfile` 描述的默认入口程序 :code:`build.sh` 可以省略步骤3中
最后的执行脚本的命令。
编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装:
......@@ -50,28 +66,83 @@ PaddlePaddle主要使用 `CMake <https://cmake.org>`_ 以及GCC, G++作为编译
如果您期望在编译完成后立即执行所有的单元测试,可以按照下面的方法:
使用Docker的情况下,设置 :code:`RUN_TEST=ON` 和 :code:`WITH_TESTING=ON` 就会在完成编译之后,立即执行单元测试。
设置 :code:`RUN_TEST=ON` 和 :code:`WITH_TESTING=ON` 就会在完成编译之后,立即执行单元测试。
开启 :code:`WITH_GPU=ON` 可以指定同时执行GPU上的单元测试。
.. code-block:: bash
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh
如果不使用Docker,可以执行ctest命令即可
如果期望执行其中一个单元测试,(比如 :code:`test_sum_op` )
.. code-block:: bash
mkdir build
cd build
cmake -DWITH_GPU=OFF -DWITH_TESTING=OFF ..
make
ctest
# 指定执行其中一个单元测试 test_mul_op
ctest -R test_mul_op
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
bash /paddle/paddle/scripts/docker/build.sh
cd /paddle/build
ctest -R test_sum_op -V
.. _faq_docker:
常见问题
----------------
- 什么是 Docker?
如果您没有听说 Docker,可以把它想象为一个类似 virtualenv 的系统,但是虚拟的不仅仅是 Python 的运行环境。
- Docker 还是虚拟机?
有人用虚拟机来类比 Docker。需要强调的是:Docker 不会虚拟任何硬件,Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的,性能和把编译工具安装在本机运行一样。
- 为什么用 Docker?
把工具和配置都安装在一个 Docker image 里可以标准化编译环境。这样如果遇到问题,其他人可以复现问题以便帮助。
另外,对于习惯使用Windows和MacOS的开发者来说,使用Docker就不用配置交叉编译环境了。
- 我可以选择不用Docker吗?
当然可以。大家可以用把开发工具安装进入 Docker image 一样的方式,把这些工具安装到本机。这篇文档介绍基于 Docker 的开发流程,是因为这个流程比其他方法都更简便。
- 学习 Docker 有多难?
理解 Docker 并不难,大概花十分钟看一下[这篇文章](https://zhuanlan.zhihu.com/p/19902938)。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。
- 我可以用 IDE 吗?
当然可以,因为源码就在本机上。IDE 默认调用 make 之类的程序来编译源码,我们只需要配置 IDE 来调用 Docker 命令编译源码即可。
很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行
```emacs
(global-set-key "\C-cc" 'compile)
(setq compile-command
"docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
```
就可以按 `Ctrl-C` 和 `c` 键来启动编译了。
- 可以并行编译吗?
是的。我们的 Docker image 运行一个 [Bash 脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。
- Docker 需要 sudo
如果用自己的电脑开发,自然也就有管理员权限(sudo)了。如果用公用的电脑开发,需要请管理员安装和配置好 Docker。此外,PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术,比如 rkt。
- 在 Windows/MacOS 上编译很慢
Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。
- 磁盘不够
本文中的例子里,`docker run` 命令里都用了 `--rm` 参数,这样保证运行结束之后的 containers 不会保留在磁盘上。可以用 `docker ps -a` 命令看到停止后但是没有删除的 containers。`docker build` 命令有时候会产生一些中间结果,是没有名字的 images,也会占用磁盘。可以参考[这篇文章](https://zaiste.net/posts/removing_docker_containers/)来清理这些内容。
.. _compile_deps:
编译依赖
附录:编译依赖
----------------
PaddlePaddle编译需要使用到下面的依赖(包含但不限于),其他的依赖软件,会自动在编译时下载。
......@@ -91,7 +162,7 @@ PaddlePaddle编译需要使用到下面的依赖(包含但不限于),其
.. _build_options:
编译选项
附录:编译选项
----------------
PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种BLAS库等。
......@@ -118,7 +189,7 @@ PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种B
"WITH_TESTING", "是否开启单元测试", "OFF"
"WITH_DOC", "是否编译中英文文档", "OFF"
"WITH_SWIG_PY", "是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练", "Auto"
"WITH_GOLANG", "是否编译go语言的可容错parameter server", "ON"
"WITH_GOLANG", "是否编译go语言的可容错parameter server", "OFF"
"WITH_MKL", "是否使用MKL数学库,如果为否则是用OpenBLAS", "ON"
BLAS
......
Build from Sources
==========================
.. _build_step:
.. _requirements:
How To Build
Requirements
----------------
PaddlePaddle mainly uses `CMake <https://cmake.org>`_ and GCC, G++ as compile
tools. We recommend you to use our pre-built Docker image to run the build
to avoid installing dependencies by yourself. We have several build environment
Docker images `here <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ .
To build PaddlePaddle, you need
1. A computer -- Linux, Windows, MacOS.
1. Docker.
Nothing else. Not even Python and GCC, because you can install all build tools into a Docker image.
We run all the tools by running this image.
.. _build_step:
If you choose not to use Docker image for your build, you need to install the
below `Compile Dependencies`_ before run the build.
How To Build
----------------
Then run:
You need to use Docker to build PaddlePaddle
to avoid installing dependencies by yourself. We have several pre-built
Docker images `here <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ ,
Or you can build your own image from source as the optional step below:
.. code-block:: bash
# 1. clone the source code
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
# run the following command to build a CPU-Only binaries if you are using docker
# 2. Optional: build development docker image from source
docker build -t paddle:dev .
# 3. Run the following command to build a CPU-Only binaries
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh
# else run these commands
mkdir build
cd build
cmake -DWITH_GPU=OFF -DWITH_TESTING=OFF ..
make
# 4. Or, use your built Docker image to build PaddlePaddle (must run step 2)
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
NOTE: The above command try to mount the current working directory (root directory of source code)
into :code:`/paddle` directory inside docker container. If you are using your own image
(Step 4) it will run default entry-point :code:`build.sh` , so you could omit the last
command in step 3.
When the compile finishes, you can get the output whl package under
build/python/dist, then you can choose to install the whl on local
......@@ -61,22 +74,75 @@ Set :code:`WITH_GPU=ON` Can also run tests on GPU.
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/paddle/scripts/docker/build.sh
If you don't use Docker, just run ctest will start the tests:
If you wish to run only one unit test, like :code:`test_sum_op`:
.. code-block:: bash
mkdir build
cd build
cmake -DWITH_GPU=OFF -DWITH_TESTING=ON ..
make
ctest
# run a single test like test_mul_op
ctest -R test_mul_op
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
bash /paddle/paddle/scripts/docker/build.sh
cd /paddle/build
ctest -R test_sum_op -V
.. _faq_docker:
Frequently Asked Questions
----------------
- What is Docker?
If you haven't heard of it, consider it something like Python's virtualenv.
- Docker or virtual machine?
Some people compare Docker with VMs, but Docker doesn't virtualize any hardware nor running a guest OS, which means there is no compromise on the performance.
- Why Docker?
Using a Docker image of build tools standardizes the building environment, which makes it easier for others to reproduce your problems and to help.
Also, some build tools don't run on Windows or Mac or BSD, but Docker runs almost everywhere, so developers can use whatever computer they want.
- Can I choose not to use Docker?
Sure, you don't have to install build tools into a Docker image; instead, you can install them on your local computer. This document exists because Docker would make the development way easier.
- How difficult is it to learn Docker?
It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, especially when new versions of PaddlePaddle require some new tools. Not even to mention the time saved when other people trying to reproduce the issue you have.
- Can I use my favorite IDE?
Yes, of course. The source code resides on your local computer, and you can edit it using whatever editor you like.
Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file:
```emacs
(global-set-key "\C-cc" 'compile)
(setq compile-command
"docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
```
so they could type `Ctrl-C` and `c` to build PaddlePaddle from source.
- Does Docker do parallel building?
Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your CPU cores.
- Docker requires sudo
An owner of a computer has the administrative privilege, a.k.a., sudo, and Docker requires this privilege to work properly. If you use a shared computer for development, please ask the administrator to install and configure Docker. We will do our best to support rkt, another container technology that doesn't require sudo.
- Docker on Windows/MacOS builds slowly
On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details.
- Not enough disk space
Examples in this article use option `--rm` with the `docker run` command. This option ensures that stopped containers do not exist on hard disks. We can use `docker ps -a` to list all containers, including stopped. Sometimes `docker build` generates some intermediate dangling images, which also take disk space. To clean them, please refer to [this article](https://zaiste.net/posts/removing_docker_containers/).
.. _compile_deps:
Compile Dependencies
Appendix: Compile Dependencies
----------------
PaddlePaddle need the following dependencies when compiling, other dependencies
......@@ -97,17 +163,13 @@ will be downloaded automatically.
.. _build_options:
Build Options
Appendix: Build Options
----------------
Build options include whether build binaries for CPU or GPU, which BLAS
library to use etc. You may pass these settings when running cmake.
For detailed cmake tutorial please refer to `here <https://cmake.org/cmake-tutorial>`_ 。
.. _build_options_bool:
Bool Type Options
----------------
You can add :code:`-D` argument to pass such options, like:
......@@ -129,7 +191,7 @@ You can add :code:`-D` argument to pass such options, like:
"WITH_TESTING", "Build unit tests", "OFF"
"WITH_DOC", "Build documentations", "OFF"
"WITH_SWIG_PY", "Build Python SWIG interface for V2 API", "Auto"
"WITH_GOLANG", "Build fault-tolerant parameter server written in go", "ON"
"WITH_GOLANG", "Build fault-tolerant parameter server written in go", "OFF"
"WITH_MKL", "Use MKL as BLAS library, else use OpenBLAS", "ON"
......
......@@ -13,7 +13,6 @@ PaddlePaddle提供pip和Docker的安装方式:
pip_install_cn.rst
docker_install_cn.rst
build_cn.md
编译流程
++++++++
......
......@@ -13,8 +13,6 @@ You can choose either pip or Docker to complete your install:
pip_install_en.rst
docker_install_en.rst
build_en.md
Build from Source
-----------------
......
......@@ -12,7 +12,7 @@ The following table compares concepts in Fluid and Go
| Go | Fluid |
|----|-------|
|user-defined functions | [layers](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid) |
|user-defined functions | [layers](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid) |
| control-flow and built-in functions | [intrinsics/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators) |
| goroutines, channels | [class ThreadPool](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework/thread_pool.h) |
| runtime | [class Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h) |
......
......@@ -89,7 +89,7 @@ with train_loop.block():
h[t] = the_step(input[t])
```
An actual Fluid example is described [here](https://github.com/PaddlePaddle/Paddle/blob/a91efdde6910ce92a78e3aa7157412c4c88d9ee8/python/paddle/v2/fluid/tests/test_while_op.py#L36-L44).
An actual Fluid example is described [here](https://github.com/PaddlePaddle/Paddle/blob/bde090a97564b9c61a6aaa38b72ccc4889d102d9/python/paddle/fluid/tests/unittests/test_while_op.py#L50-L58).
From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop.
......
......@@ -101,7 +101,7 @@ In-place is a built-in attribute of an operator. Since we treat in-place and oth
#### contruct control flow graph
Following is the ProgramDesc protobuf of [machine translation](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book/test_machine_translation.py) example.
Following is the ProgramDesc protobuf of [machine translation](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_machine_translation.py) example.
- Block0:
......
# Design Doc: Parallel_Do in PaddlePaddle
In PaddlePaddle, we use parallel_do primitive to represent multithread data parallel processing.
## Design overview
The definition of a parallel_do op looks like the following
```c++
AddInput(kInputs, "Inputs needed to be split onto different devices").AsDuplicable();
AddInput(kParameters, "Parameters are duplicated over different devices")
.AsDuplicable();
AddInput(kPlaces, "Devices used for parallel processing");
AddOutput(kOutputs, "Outputs needed to be merged from different devices").AsDuplicable();
AddOutput(kParallelScopes,
"Scopes for all local variables in forward pass. One scope for each device");
AddAttr<framework::BlockDesc *>(kParallelBlock,
"List of operaters to be executed in parallel");
```
A vanilla implementation of parallel_do can be shown as the following (`|` means single thread and
`||||` means multiple threads)
```
In the forward pass
| Split input onto different devices
| Copy parameter onto different devices
|||| Compute forward pass in parallel
| Merge output from different devices
In the backward pass
| Split output@grad onto different devices
|||| Compute backward pass in parallel
| accumulate param@grad from different devices to the first device
| Merge input@grad from different devices
 | Copy param@grad to the place of parallel_do_op
```
This implementation allows to write mixed device program like this
```python
# get embedding feature on CPU
feature = some_cpu_only_op(data)
gpu_places = get_place(use_gpu=True)
# parallel processing on multiple GPUs
pd = ParallelDo(gpu_places)
with pd.do():
read_input(feature)
prediction = my_net(feature)
write_output(prediction)
prediction = pd()
loss = cross_entropy(prediction, label)
```
And the programDesc are like the following
```
# start_program will be run by executor(CPUPlace), all w1, w2 will be allocated on CPU
start_program
{
vars: w1, w2
ops: init(w1), init(w2)
}
main_program
{
block0 {
vars: data, places, w1, w2
ops: data, get_place, parallel_do(block1),
parallel_do_grad(block2),
sgd(w2, w2_grad),
sgd(w1, w1_grad)
}
block1 {
parent_block: 0
vars: data, h1, h2, loss
ops: fc, fc, softmax
}
block2 {
parent_block: 1
vars: data_grad, h1_grad, h2_grad, loss_gard, w1_grad, w2_grad
ops: softmax_grad,
fc_grad
fc_grad
}
}
```
## Performance Imporvement
There are serial places we can make this parallel_do faster.
### forward: split input onto different devices
If the input of the parallel_do is independent from any prior opeartors, we can avoid this step by
prefetching the input onto different devices in a seperate background thread. And the python code
looks like this.
```python
pd = ParallelDo(gpu_places)
with pd.do():
   feature = get_data_from_prefetch_queue(gpu_places)
prediction = my_net(feature)
write_output(activation)
```
### forward: Copy parameter to onto different devices
We can avoid this step by making each device have a copy of the parameter. This requires:
1. `fluid.default_start_up_program()` to be run on all devices
1. In the backward, allreduce param@grad at different devices, this requires
1. `backward.py` add `allreduce` operators at parallel_do_grad
1. `allreduce` operators need to be called in async mode to achieve maximum throughput
1. apply gradients related op(i.e. cliping, normalization, decay, sgd) on different devices in parallel
By doing so, we also avoided "backward: accumulate param@grad from different devices to the first device".
And the ProgramDesc looks like the following
```
# w1, w2 will be allocated on all GPUs
start_program
{
block0 {
parallel_do(block1)
}
block1 {
parent_block: 0
vars: w1, w2
ops: init(w1), init(w2)
}
}
main_program
{
block0 {
vars: data, places, w1, w2
ops: data, get_place, parallel_do(block1),
parallel_do_grad(block2), # append_backward
parallel_do(block3) # append_optimization
}
block1 {
parent_block: 0
vars: data, h1, h2, loss
ops: fc, fc, softmax
}
block2 {
parent_block: 1
vars: data_grad, h1_grad, h2_grad, loss_gard, w1_grad, w2_grad
ops: softmax_grad,
fc_grad, allreduce(places, scopes, w1_grad),
fc_grad, allreduce(places, scopes, w2_grad)
}
block3 {
parent_block: 0
vars: lr
ops: sgd(w2, w2_grad),
sgd(w1, w1_grad)
}
}
```
## Background
PaddlePaddle divides the description of neural network computation into two stages: compile time and runtime. At compile time, the neural network computation is described as a `ProgramDesc` whereas at runtime an `Executor` interprets the `ProgramDesc` to compute the operations.
PaddlePaddle use proto message to describe compile time program because
PaddlePaddle uses proto message to describe compile time program because :
1. The computation program description must be serializable and saved in a file.
1. During distributed training, the sreialized program will be sent to multiple workers. It should also be possible to break the program into different components, each of which can be executed on different workers.
1. During distributed training, the serialized program will be sent to multiple workers. It should also be possible to break the program into different components, each of which can be executed on a different worker.
The computation `Program` consists of nested `Blocks`. Each `Block` will consist of data(i.e. `Variable`) and `Operations`. The concept to represent them is in the table below.
......@@ -14,28 +14,33 @@ The computation `Program` consists of nested `Blocks`. Each `Block` will consist
|Operation|OpDesc(proto)|Operator(cpp)|
## Definition of VarDesc
## Definition of VarType
A VarDesc should have a name, and value. The are two kinds of variable type in compile time, they are `LoDTensor` and `SelectedRows`.
A VarDesc should have a name, type and whether or not it is persistable. The are different kinds of variable types supported in PaddlePaddle, apart from the POD_Types like: `LOD_TENSOR`, `SELECTED_ROWS`, `FEED_MINIBATCH`, `FETCH_LIST`, `STEP_SCOPES`, `LOD_RANK_TABLE`, `LOD_TENSOR_ARRAY`, `PLACE_LIST`, `READER` and `CHANNEL`. These are declared inside `VarType`. A `VarDesc` then looks as the following:
```proto
message VarDesc {
required string name = 1;
enum VarType {
LOD_TENSOR = 0;
SELECTED_ROWS = 1;
}
required VarType type = 2;
optional LoDTensorDesc lod_desc = 3;
optional TensorDesc selected_rows_desc = 4;
optional bool persistable = 5 [ default = false ];
optional bool persistable = 3 [ default = false ];
}
```
## Definition of TensorDesc
```proto
enum DataType {
message TensorDesc {
// Should only be PODType. Is enforced in C++
required Type data_type = 1;
repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
}
```
The `Type` here comes from the enum defined inside of `VarType` :
```proto
enum Type {
// Pod Types
BOOL = 0;
INT16 = 1;
INT32 = 2;
......@@ -43,11 +48,18 @@ enum DataType {
FP16 = 4;
FP32 = 5;
FP64 = 6;
}
message TensorDesc {
required DataType data_type = 1;
repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
// Other types that may need additional descriptions
LOD_TENSOR = 7;
SELECTED_ROWS = 8;
FEED_MINIBATCH = 9;
FETCH_LIST = 10;
STEP_SCOPES = 11;
LOD_RANK_TABLE = 12;
LOD_TENSOR_ARRAY = 13;
PLACE_LIST = 14;
READER = 15;
CHANNEL = 16;
}
```
......@@ -58,7 +70,7 @@ A TensorDesc describes `SelectedRows` and `LoDTensor`. For details of `SelectedR
```proto
message LoDTensorDesc {
required TensorDesc tensor = 1;
optional int lod_level = 2;
optional int32 lod_level = 2 [ default = 0 ];
}
```
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
C-API预测库
==================
当我们训练完一个神经网络模型之后,下一步就是用模型来做预测。预测就是准备输入数据,经过模型处理之后,得到预测结果的过程。
相比于模型训练,预测有如下特点:
#. 预测不需要训练过程中反向传播和参数更新的部分。
#. 预测不需要标签(label)。
#. 预测很多时候需要和用户系统整合在一起。
因为上述特点,模型预测SDK需要单独设计,并具备以下特点:
#. 预测SDK不包含反向传播和参数更新部分,以减小SDK的体积。
#. 预测SDK需要提供一个简洁的用户接口,方便使用。
#. 因为输入数据可能有多种结构,对输入数据的格式做清晰简洁的封装。
#. 为了和用户系统兼容,SDK的接口需要是满足C标准的接口。
PaddlePaddle提供了C-API,用于解决上述问题。关于C-API的使用,我们提供了如下指南:
.. toctree::
:maxdepth: 1
......
......@@ -65,6 +65,7 @@
output_file = "output.paddle.model"
merge_v2_model(net, param_file, output_file)
```
对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)这个示例,可直接运行 `python` [merge_v2_model.py](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense/merge_v2_model.py)。序列化结果会写入当前运行目录下的`output.paddle.model`文件中。使用这种方式,运行时C-API可以通过指定`output.paddle.model`文件的路径来加载预测模型。
#### 注意事项
......
......@@ -32,7 +32,7 @@ The non-cluster version of this demo with fluid API is as follows:
``` python
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.fluid as fluid
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
......@@ -125,11 +125,11 @@ for pass_id in range(100):
### E2E demo
Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py).
Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book_distribute/notest_dist_fit_a_line.py).
First `cd` into the folder that contains the `python` files. In this case:
```bash
cd /paddle/python/paddle/v2/fluid/tests/book_distribute
cd /paddle/python/paddle/fluid/tests/book_distribute
```
In parameter server node run the following in the command line:
......
在不同集群中运行
================
用户的集群环境不尽相同,为了方便大家的部署,我们提供了多种的集群部署方式,方便提交集群训练任务,以下将一一介绍:
PaddlePaddle可以使用多种分布式计算平台构建分布式计算任务,包括:
- `Kubernetes <http://kubernetes.io>`_ Google开源的容器集群的调度框架,支持大规模集群生产环境的完整集群方案。
- `OpenMPI <https://www.open-mpi.org>`_ 成熟的高性能并行计算框架。
- `Fabric <http://www.fabfile.org>`_ 集群管理工具。可以使用`Fabric`编写集群任务提交和管理脚本。
`Kubernetes <http://kubernetes.io>`_ 是Google开源的容器集群的调度框架,支持大规模集群生产环境的完整集群方案。以下指南展示了PaddlePaddle对Kubernetes的支持:
对于不同的集群平台,会分别介绍集群作业的启动和停止方法。这些例子都可以在 `cluster_train_v2 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/scripts/cluster_train_v2>`_ 找到。
.. toctree::
:maxdepth: 1
k8s_cn.md
k8s_distributed_cn.md
在使用分布式计算平台进行训练时,任务被调度在集群中时,分布式计算平台通常会通过API或者环境变量提供任务运行需要的参数,比如节点的ID、IP和任务节点个数等。
`OpenMPI <https://www.open-mpi.org>`_ 是成熟的高性能并行计算框架,在HPC领域使用非常的广泛。以下指南介绍了如何使用OpenMPI来搭建PaddlePaddle的集群训练任务:
.. toctree::
:maxdepth: 1
fabric_cn.md
openmpi_cn.md
k8s_cn.md
k8s_distributed_cn.md
`Fabric <http://www.fabfile.org>`_ 是一个方便的程序部署和管理工具。我们提供了使用Fabric 进行部署、管理的方法,如果想详细了解,请阅读以下指南:
.. toctree::
:maxdepth: 1
fabric_cn.md
我们也支持在AWS上部署PaddlePaddle,详细请了解:
.. toctree::
:maxdepth: 1
k8s_aws_cn.md
您可以在 `cluster_train_v2 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/scripts/cluster_train_v2>`_ 找到以上相关的例子。
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
......@@ -35,7 +35,7 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py
```
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.284 0.284 29.514 29.514 main.py:1(<module>)
4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/executor.py:20(run)
4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/executor.py:20(run)
4696 12.040 0.003 12.040 0.003 {built-in method run}
1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14(<module>)
```
......@@ -61,9 +61,9 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py
```text
4696 12.040 0.003 12.040 0.003 {built-in method run}
300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader)
107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:219(__init__)
4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp)
1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/__init__.py:1(<module>)
107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:219(__init__)
4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp)
1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/__init__.py:1(<module>)
```
可以看到最耗时的函数是C++端的`run`函数。这需要联合我们第二节`Python``C++`混合代码的性能分析来进行调优。而`sync_with_cpp`函数的总共耗时很长,每次调用的耗时也很长。于是我们可以点击`sync_with_cpp`的详细信息,了解其调用关系。
......@@ -76,9 +76,9 @@ Called By:
Function was called by...
ncalls tottime cumtime
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:487(clone)
1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:534(append_backward)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:487(clone)
1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:534(append_backward)
Called:
......
......@@ -49,7 +49,7 @@ port, we will see the output like the following:
```
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.284 0.284 29.514 29.514 main.py:1(<module>)
4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/executor.py:20(run)
4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/executor.py:20(run)
4696 12.040 0.003 12.040 0.003 {built-in method run}
1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14(<module>)
```
......@@ -74,9 +74,9 @@ focus on. We can sort above profiling file by tottime:
```text
4696 12.040 0.003 12.040 0.003 {built-in method run}
300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader)
107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:219(__init__)
4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp)
1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/__init__.py:1(<module>)
107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:219(__init__)
4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp)
1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/__init__.py:1(<module>)
```
We can see that the most time-consuming function is the `built-in
......@@ -93,9 +93,9 @@ Called By:
Function was called by...
ncalls tottime cumtime
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:487(clone)
1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:534(append_backward)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:487(clone)
1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:534(append_backward)
Called:
......
# PaddlePaddle Fluid Source Code Overview
Examples: https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid/tests/book
Examples: https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/tests/book
Core: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework
......@@ -26,16 +26,16 @@ sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)
```
- Variables: `x`, `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/framework.py#)
- Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid/layers)
- Variables: `x`, `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/framework.py#)
- Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/layers)
- Every Layer has one or more operators and variables/parameters
- All the operators are defined at [`paddle/operators/`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators). Other worth-looking files:
- Base class: [`paddle/framework/operator.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h)
- Operator Registration: [`paddle/framework/op_registry.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h)
- Operator Lookup: [`paddle/framework/op_info.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_info.h)
- Optimizer: `fluid.optimizer.SGD`. It does the following
- Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/backward.py)]
- Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/optimizer.py)]
- Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/backward.py)]
- Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/optimizer.py)]
# Run Time
......@@ -57,7 +57,7 @@ exe.run(fluid.default_main_program(),
- Place: `place`. one of CPU, GPU or FPGA. [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h)
- The device handle are at [paddle/platform/device_context.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h)
- Executor: `fluid.Executor(place)`. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/executor.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)]
- Executor: `fluid.Executor(place)`. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/executor.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)]
- Feeds the data: `feed=feeder.feed(data)`
- Evaluates all the operators
- Fetches the result: `fetch_list=[avg_cost]`
......
......@@ -23,6 +23,12 @@ $ docker build -t username/paddle-android:dev . -f Dockerfile.android
$ docker pull paddlepaddle/paddle:latest-dev-android
```
对于国内用户,我们提供了加速访问的镜像源:
```bash
$ docker pull docker.paddlepaddlehub.com/paddle:latest-dev-android
```
### 编译PaddlePaddle C-API库
构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。
Android的Docker开发镜像向用户提供两个可配置的参数:
......@@ -56,15 +62,15 @@ Android的Docker开发镜像向用户提供两个可配置的参数:
- 编译`armeabi-v7a``Android API 21`的PaddlePaddle库
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev
```
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev
```
- 编译`arm64-v8a``Android API 21`的PaddlePaddle库
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev
```
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev
```
执行上述`docker run`命令时,容器默认执行[paddle/scripts/docker/build_android.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh)脚本。该脚本中记录了交叉编译Android版PaddlePaddle库常用的CMake配置,并且会根据`ANDROID_ABI``ANDROID_API`自动构建独立工具链、进行编译和安装。由于arm64架构要求Android API不小于21。因此当`ANDROID_ABI=arm64-v8a``ANDROID_API<21`时,Docker容器中将默认使用`Android API 21`的编译工具链。用户可以参考下文[配置交叉编译参数](#配置交叉编译参数)章节,根据个人的需求修改定制Docker容器所执行的脚本。编译安装结束之后,PaddlePaddle的C-API库将被安装到`$PWD/install_android`目录,所依赖的第三方库同时也被安装到`$PWD/install_android/third_party`目录。
......@@ -155,7 +161,11 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
..
```
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE``MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE``Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS`来影响PaddlePaddle的编译过程。
用户还可根据自己的需求设置其他编译参数。
- 设置`CMAKE_BUILD_TYPE``MinSizeRel`,最小化生成的库的大小。
- 设置`CMAKE_BUILD_TYPE``Release`,获得最快的执行速度,
- 用户亦可以通过手动设置`CMAKE_C/CXX_FLAGS`来影响PaddlePaddle的编译过程。
**性能TIPS**,为了达到最快的计算速度,在CMake参数配置上,有以下建议:
......
......@@ -25,6 +25,12 @@ Users can directly use the published Docker image.
$ docker pull paddlepaddle/paddle:latest-dev-android
```
For users in China, we provide a faster mirror.
```bash
$ docker pull docker.paddlepaddlehub.com/paddle:latest-dev-android
```
### Build the Inference Library
We can run the Docker image we just created to build the inference library of PaddlePaddle for Android using the command below:
......@@ -86,19 +92,19 @@ Android NDK includes everything we need to build the [*standalone toolchain*](ht
- To build the standalone toolchain for `armeabi-v7a` and Android API level 21:
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm --platform=android-21 --install-dir=your/path/to/arm_standalone_toolchain
```
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm --platform=android-21 --install-dir=your/path/to/arm_standalone_toolchain
```
The generated standalone toolchain will be in `your/path/to/arm_standalone_toolchain`.
- To build the standalone toolchain for `arm64-v8a` and Android API level 21:
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm64 --platform=android-21 --install-dir=your/path/to/arm64_standalone_toolchain
```
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm64 --platform=android-21 --install-dir=your/path/to/arm64_standalone_toolchain
```
The generated standalone toolchain will be in `your/path/to/arm64_standalone_toolchain`.
......
......@@ -82,7 +82,7 @@ language = 'zh_CN'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_en*', '*_en*']
exclude_patterns = ['_build', '**/*_en*', '*_en*', 'api/*']
# The reST default role (used for this markup: `text`) to use for all
# documents.
......
......@@ -82,7 +82,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_cn*', '*_cn*']
exclude_patterns = ['_build', '**/*_cn*', '*_cn*', 'api/*']
# The reST default role (used for this markup: `text`) to use for all
# documents.
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册