diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4fe4f9a0e4b90e34b95ddfba52e22ee762273a0..cfaab206e1f321a55119d4a8d65c4a99d3819fff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,7 +41,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
-option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
@@ -58,8 +57,10 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
+option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
+option(WITH_CONTRIB "Compile the third-party contributation" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
@@ -156,7 +157,6 @@ include(cupti)
include(configure) # add paddle env configuration
include(generic) # simplify cmake module
include(package) # set paddle packages
-include(cpplint) # set paddle c++ style
include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(rdma) # set rdma libraries
@@ -205,7 +205,7 @@ endif(USE_NNPACK)
add_subdirectory(proto)
-if(NOT MOBILE_INFERENCE)
+if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
@@ -233,3 +233,7 @@ if(WITH_DOC)
find_python_module(recommonmark REQUIRED)
add_subdirectory(doc)
endif()
+
+if (WITH_CONTRIB)
+ add_subdirectory(paddle/contrib)
+endif()
diff --git a/Dockerfile b/Dockerfile
index ea39efd00bb5c0a7deb3f6d57083d83a673b883c..e5508486d6df6a7465998b7e2926b21a1604dfb4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -101,6 +101,3 @@ RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
-
-# development image default do build work
-CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"]
diff --git a/Dockerfile.android b/Dockerfile.android
index 848a7eba6f1421432addae8acff407b611adb4ae..48db2efea21a648657e3f490c95429b9a29ede52 100644
--- a/Dockerfile.android
+++ b/Dockerfile.android
@@ -40,5 +40,3 @@ RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \
unzip -q android-ndk-r14b-linux-x86_64.zip && \
mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
rm -rf /opt/android-ndk-tmp
-
-CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"]
diff --git a/benchmark/cluster/README.md b/benchmark/cluster/README.md
deleted file mode 100644
index 64816098a524f064ec12474a736cd4c721227a70..0000000000000000000000000000000000000000
--- a/benchmark/cluster/README.md
+++ /dev/null
@@ -1,196 +0,0 @@
-# Cluster Training Benchmark
-
-## Setup
-
-- Platform
- - Kubernetes: v1.6.2
- - Linux Kernel: v3.10.0
-
-- Resource
- - CPU: 10 Cores per Pod
- - Memory: 5GB per Pod
-
-- Docker Image
-
- We use different base Docker Image to run the benchmark on Kubernetes:
- - PaddlePaddle v2: paddlepaddle/paddle:0.11.0
- - PaddlePaddle Fluid: paddlepaddle/paddle:[commit-id]
- - TensorFlow: tensorflow/tensorflow:1.5.0-rc0
-
-- Model
- vgg16 is used in this benchmark.
-
-## Cases
-
-- Variable
- - Batch Size of training data.
- - PServer count of the training job.
- - The number of trainers.
-
-- Invariant
- - The resource of trainer/pserver Pod.
-
-### Measure the Performance for Different Batch Size
-
-- PServer Count: 40
-- Trainer Count: 100
-- Metrics: mini-batch / sec
-
-
-
-
-
-Batch Size |
- 32 |
-64 |
-128 |
-256 |
-
-
-
-
- PaddlePaddle Fluid |
-- |
-- |
-- |
-- |
-
-
-PaddlePaddle v2 |
-- |
-- |
-- |
-- |
-
-
-TensorFlow |
-- |
-- |
-- |
-- |
-
-
-
-
-### Measure the Performance for Different PServer Count
-
-- Trainer Count: 100
-- Batch Size: 64
-- Metrics: mini-batch / sec
-
-
-
-
-
-PServer Count |
-10 |
-20 |
-40 |
-60 |
-
-
-
-
- PaddlePaddle Fluid |
-- |
-- |
-- |
-- |
-
-
-PaddlePaddle v2 |
-- |
-- |
-- |
-- |
-
-
-TensorFlow |
-- |
-- |
-- |
-- |
-
-
-
-
-### Measure Parallel Efficiency By Increasing Trainer Count
-
-- PServer Count: 20
-- Batch Size: 64
-- Metrics:
-
-$S = \div(T1, TN)$
-
-which S is the ratio of T1 over TN, training time of 1 and N trainers.
-The parallel efficiency is:
-
-$E = \div(S, N)$
-
-
-
-
-Trainer Counter |
-1 |
-10 |
-20 |
-30 |
-40 |
-50 |
-60 |
-70 |
-80 |
-90 |
-100 |
-
-
-
-
- PaddlePaddle Fluid |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-
-
-PaddlePaddle v2 |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-
-
-TensorFlow |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-
-
-
-
-
-## Reproduce the benchmark
-
-TODO
diff --git a/benchmark/cluster/vgg16/Dockerfile b/benchmark/cluster/vgg16/Dockerfile
deleted file mode 100644
index 13ad8e1b6237e6f41a076c4fb54311728832ae33..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/Dockerfile
+++ /dev/null
@@ -1,35 +0,0 @@
-FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04
-
-# you can get mirror list here:
-# https://launchpad.net/ubuntu/+archivemirrors
-ARG UBUNTU_MIRROR
-RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
-
-RUN apt-get update && apt-get install -y python python-dev python-pip iputils-ping libgtk2.0-dev
-RUN pip install -U kubernetes opencv-python
-
-RUN pip install paddlepaddle
-# if network is slowly, you may need to add proxy here.
-# ENV https_proxy=
-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python'
-RUN pip uninstall -y paddlepaddle
-# unset proxy if it is setted.
-# ENV https_proxy=""
-
-# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF,
-# so we must build one with distribute support to install in this image.
-ADD *.whl /
-RUN pip install /*.whl && rm -f /*.whl
-ENV LD_LIBRARY_PATH=/usr/local/lib
-
-# tf k8s
-RUN pip install tensorflow==1.4.0
-ADD tf_k8s /usr/bin
-RUN chmod +x /usr/bin/tf_k8s
-ADD vgg16_tf.py /workspace/
-
-# below lines may change a lot for debugging
-ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
-ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
-RUN chmod +x /usr/bin/paddle_k8s
-ADD vgg16_fluid.py vgg16_v2.py /workspace/
diff --git a/benchmark/cluster/vgg16/README.md b/benchmark/cluster/vgg16/README.md
deleted file mode 100644
index d56a912b9b03986e32693363f82df05a34b779e9..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/README.md
+++ /dev/null
@@ -1,195 +0,0 @@
-# Performance for Distributed vgg16
-
-## Test Result
-
-### Hardware Infomation
-
-- CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
-- cpu MHz : 2101.000
-- cache size : 20480 KB
-
-### Blas settings
-
-Setting environment variable: `MKL_NUM_THREADS=1`.
-
-### Single Node Single Thread
-
-- Metrics: samples / sec
-
-
-
-
-Batch Size |
- 32 |
-64 |
-128 |
-256 |
-
-
-
-
- PaddlePaddle Fluid |
- 15.44 |
- 16.32 |
- 16.74 |
- 16.79 |
-
-
-PaddlePaddle v2 |
- 15.97 |
- 17.04 |
- 17.60 |
- 17.83 |
-
-
-TensorFlow |
- 9.09 |
- 9.10 |
- 9.24 |
- 8.66 |
-
-
-
-
-
-### Different Batch Size
-
-- PServer Count: 10
-- Trainer Count: 20
-- Metrics: samples / sec
-
-
-
-
-Batch Size |
- 32 |
-64 |
-128 |
-256 |
-
-
-
-
- PaddlePaddle Fluid |
- 190.20 |
- 222.15 |
- 247.40 |
- 258.18 |
-
-
-PaddlePaddle v2 |
- 170.96 |
- 233.71 |
- 256.14 |
- 329.23 |
-
-
-TensorFlow |
- - |
- - |
- - |
- - |
-
-
-
-
-### Accelerate Rate
-
-- Pserver Count: 20
-- Batch Size: 128
-- Metrics: samples / sec
-
-
-
-
-Trainer Count |
-20 |
-40 |
-80 |
-100 |
-
-
-
-
- PaddlePaddle Fluid |
- 263.29 (78.64%) |
- 518.80 (77.47%) |
- 836.26 (62.44%) |
- 1019.29 (60.89%) |
-
-
-PaddlePaddle v2 (need more tests) |
- 326.85 (92.85%) |
- 534.58 (75.93%) |
- 853.30 (60.60%) |
- 1041.99 (59.20%) |
-
-
-TensorFlow |
- - |
- - |
- - |
- - |
-
-
-
-
-
-### Different Pserver Count
-
-- Trainer Count: 60
-- Batch Size: 128
-- Metrics: samples/ sec
-
-
-
-
-PServer Count |
-3 |
-6 |
-10 |
-20 |
-
-
-
-
- PaddlePaddle Fluid(should fix in next PR) |
- 589.1 |
- 592.6 |
- 656.4 |
- 655.8 |
-
-
-PaddlePaddle v2 (need more tests) |
- 593.4 |
- 791.3 |
- 729.7 |
- 821.7 |
-
-
-TensorFlow |
- - |
- - |
- - |
- - |
-
-
-
-
-
-*The performance gap between Fuild and v2 comes from the network interference.*
-
-
-## Steps to Run the Performance Test
-
-1. You must re-compile PaddlePaddle and enable `-DWITH_DISTRIBUTE` to build PaddlePaddle with distributed support.
-1. When the build finishes, copy the output `whl` package located under `build/python/dist` to current directory.
-1. Run `docker build -t [image:tag] .` to build the docker image and run `docker push [image:tag]` to push the image to reponsitory so kubernetes can find it.
-1. Run `kubectl create -f pserver.yaml && kubectl create -f trainer.yaml` to start the job on your kubernetes cluster (you must configure the `kubectl` client before this step).
-1. Run `kubectl get po` to get running pods, and run `kubectl logs [podID]` to fetch the pod log of pservers and trainers.
-
-Check the logs for the distributed training progress and analyze the performance.
-
-## Enable Verbos Logs
-
-Edit `pserver.yaml` and `trainer.yaml` and add an environment variable `GLOG_v=3` and `GLOG_logtostderr=1` to see what happend in detail.
diff --git a/benchmark/cluster/vgg16/fluid_pserver.yaml b/benchmark/cluster/vgg16/fluid_pserver.yaml
deleted file mode 100644
index ee8b0763b62fc011f40f6197e929a68b48a93e47..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/fluid_pserver.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-apiVersion: extensions/v1beta1
-kind: ReplicaSet
-metadata:
- name: vgg16job-pserver
-spec:
- replicas: 10
- template:
- metadata:
- labels:
- paddle-job-pserver: vgg16job
- spec:
- hostNetwork: true
- imagePullSecrets:
- - name: job-registry-secret
- containers:
- - name: pserver
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16job
- - name: MKL_NUM_THREADS
- value: "1"
- - name: TRAINING_ROLE
- value: "PSERVER"
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "1"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- command: ["paddle_k8s", "start_fluid"]
- resources:
- requests:
- memory: 10Gi
- cpu: 4
- limits:
- memory: 10Gi
- cpu: 4
diff --git a/benchmark/cluster/vgg16/fluid_trainer.yaml b/benchmark/cluster/vgg16/fluid_trainer.yaml
deleted file mode 100644
index 3d56caac009464d1073423bb63abff1f8b0cf28f..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/fluid_trainer.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: vgg16job-trainer
-spec:
- parallelism: 20
- completions: 20
- template:
- metadata:
- labels:
- paddle-job: vgg16job
- spec:
- imagePullSecrets:
- - name: job-registry-secret
- hostNetwork: true
- containers:
- - name: trainer
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- command: ["paddle_k8s", "start_fluid"]
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16job
- - name: TRAINING_ROLE
- value: "TRAINER"
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0 --batch_size 128"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "1"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- resources:
- requests:
- memory: 40Gi
- cpu: 2
- limits:
- memory: 40Gi
- cpu: 2
- restartPolicy: Never
diff --git a/benchmark/cluster/vgg16/run_vgg_dist.sh b/benchmark/cluster/vgg16/run_vgg_dist.sh
deleted file mode 100644
index 8c0501439e9d5fa175f5aa9b62d286e690a10904..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/run_vgg_dist.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# Update to point to the source file.
-VGG_SRC="vgg16_fluid.py"
-
-export TRAINING_ROLE=PSERVER
-export TRAINERS=2
-export POD_IP=127.0.0.1
-export PADDLE_INIT_PORT=6174
-MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 &
-
-# Need to wait for the ps to start first.
-sleep 10
-echo "done start ps"
-
-export TRAINING_ROLE=TRAINER
-export TRAINERS=2
-export POD_IP=127.0.0.1
-export PADDLE_INIT_PORT=6174
-CUDA_VISIBLE_DEVICES=4 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=0 &
-CUDA_VISIBLE_DEVICES=5 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=1 &
diff --git a/benchmark/cluster/vgg16/tf_k8s b/benchmark/cluster/vgg16/tf_k8s
deleted file mode 100644
index 4fc263d5f681aeabfa71f1758714d269d987b272..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/tf_k8s
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/bin/bash
-check_trainer_ret() {
- ret=$1
- stdbuf -oL echo "job returned $ret...setting pod return message..."
- stdbuf -oL echo "==============================="
-
- if [ $ret -eq 136 ] ; then
- echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log
- elif [ $ret -eq 139 ] ; then
- echo "Segmentation Fault" > /dev/termination-log
- elif [ $ret -eq 1 ] ; then
- echo "General Error" > /dev/termination-log
- elif [ $ret -eq 134 ] ; then
- echo "Program Abort" > /dev/termination-log
- fi
- stdbuf -oL echo "termination log wroted..."
- exit $ret
-}
-
-g_pservers=""
-g_trainers=""
-
-wait_running_pods(){
- pserver_label="tf-job-pserver=${JOB_NAME}"
- trainer_label="tf-job-trainer=${JOB_NAME}"
-
- stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS_NUM}
- stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS_NUM}
-
- g_pservers=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PORT})
- g_trainers=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PORT})
-}
-
-start_tf_pserver(){
- wait_running_pods
-
- label="tf-job-pserver=${JOB_NAME}"
- pserver_id=$(python /root/k8s_tools.py fetch_id ${label})
-
- cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
- --job_name=${TF_JOB_NAME} --task_index=${pserver_id}"
-
- stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
-}
-
-start_tf_trainer(){
- wait_running_pods
-
- label="tf-job-trainer=${JOB_NAME}"
- trainer_id=$(python /root/k8s_tools.py fetch_id ${label})
-
- cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
- --job_name=${TF_JOB_NAME} --task_index=${trainer_id} --batch_size=${BATCH_SIZE}"
-
- stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
- check_trainer_ret $?
-}
-
-start_tf(){
- if [[ "${TF_JOB_NAME}" == "worker" ]]; then
- start_tf_trainer
- else
- start_tf_pserver
- fi
-}
-
-usage() {
- echo "usage: tf_k8s []:"
- echo " start_tf Start tensorflow jobs"
-}
-
-case "$1" in
- start_tf)
- start_tf
- ;;
- --help)
- usage
- ;;
- *)
- usage
- ;;
-esac
diff --git a/benchmark/cluster/vgg16/tf_pserver.yaml b/benchmark/cluster/vgg16/tf_pserver.yaml
deleted file mode 100644
index 5e37c700819119c8af05c40fe4b8d13911efc3e1..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/tf_pserver.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-apiVersion: extensions/v1beta1
-kind: ReplicaSet
-metadata:
- name: vgg16job-tf-pserver
-spec:
- replicas: 10
- template:
- metadata:
- labels:
- tf-job-pserver: vgg16job-tf
- spec:
- hostNetwork: true
- imagePullSecrets:
- - name: job-registry-secret
- containers:
- - name: pserver
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
- imagePullPolicy: Always
- command: ["tf_k8s", "start_tf"]
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PORT
- value: "32036"
- - name: ENTRY
- value: "python vgg16_tf.py"
- - name: JOB_NAME
- value: vgg16job-tf
- - name: PSERVERS_NUM
- value: "10"
- - name: TF_JOB_NAME
- value: "ps"
- - name: TRAINERS_NUM
- value: "20"
- - name: BATCH_SIZE
- value: "128"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: NUM_PASSES
- value: "1"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- resources:
- requests:
- memory: 10Gi
- cpu: 4
- limits:
- memory: 10Gi
- cpu: 4
diff --git a/benchmark/cluster/vgg16/tf_trainer.yaml b/benchmark/cluster/vgg16/tf_trainer.yaml
deleted file mode 100644
index 08795df3addfa7b618db24a65e57be190e268f06..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/tf_trainer.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: vgg16job-tf-trainer
-spec:
- parallelism: 20
- completions: 20
- template:
- metadata:
- labels:
- tf-job-trainer: vgg16job-tf
- spec:
- imagePullSecrets:
- - name: job-registry-secret
- hostNetwork: true
- containers:
- - name: trainer
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
- imagePullPolicy: Always
- command: ["tf_k8s", "start_tf"]
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PORT
- value: "32036"
- - name: JOB_NAME
- value: vgg16job-tf
- - name: TF_JOB_NAME
- value: "worker"
- - name: ENTRY
- value: "python vgg16_tf.py"
- - name: PSERVERS_NUM
- value: "10"
- - name: BATCH_SIZE
- value: "128"
- - name: TRAINERS_NUM
- value: "20"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: NUM_PASSES
- value: "1"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- resources:
- requests:
- memory: 40Gi
- cpu: 2
- limits:
- memory: 40Gi
- cpu: 2
- restartPolicy: Never
diff --git a/benchmark/cluster/vgg16/v2_pserver.yaml b/benchmark/cluster/vgg16/v2_pserver.yaml
deleted file mode 100644
index dd1271e0cf399184134c06b3200ee1202c65cef0..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/v2_pserver.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-apiVersion: extensions/v1beta1
-kind: ReplicaSet
-metadata:
- name: vgg16v2job-pserver
-spec:
- replicas: 10
- template:
- metadata:
- labels:
- paddle-job-pserver: vgg16v2job
- spec:
- hostNetwork: true
- imagePullSecrets:
- - name: job-registry-secret
- containers:
- - name: pserver
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16v2job
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "python train.py"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "1"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- command: ["paddle_k8s", "start_pserver"]
- resources:
- requests:
- memory: 10Gi
- cpu: 4
- limits:
- memory: 10Gi
- cpu: 4
diff --git a/benchmark/cluster/vgg16/v2_trainer.yaml b/benchmark/cluster/vgg16/v2_trainer.yaml
deleted file mode 100644
index 12c8964066cbcfe8d2a44de2f51a3d12ea422fe2..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/v2_trainer.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: vgg16v2job-trainer
-spec:
- parallelism: 20
- completions: 20
- template:
- metadata:
- labels:
- paddle-job: vgg16v2job
- spec:
- imagePullSecrets:
- - name: job-registry-secret
- hostNetwork: true
- containers:
- - name: trainer
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- command: ["paddle_k8s", "start_trainer", "v2"]
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16v2job
- - name: BATCH_SIZE
- value: "256"
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "cd /workspace && MKL_NUM_THREADS=1 python /workspace/vgg16_v2.py"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "2"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- resources:
- requests:
- memory: 40Gi
- cpu: 2
- limits:
- memory: 40Gi
- cpu: 2
- restartPolicy: Never
diff --git a/benchmark/cluster/vgg16/vgg16_fluid.py b/benchmark/cluster/vgg16/vgg16_fluid.py
deleted file mode 100644
index 05b5f3977cbed2f08df73c6d8ba2fff687db3313..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/vgg16_fluid.py
+++ /dev/null
@@ -1,308 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""VGG16 benchmark in Fluid"""
-from __future__ import print_function
-
-import sys
-import time
-import numpy as np
-import paddle.v2 as paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.profiler as profiler
-import argparse
-import functools
-import os
-from paddle.fluid import debuger
-
-
-def str2bool(v):
- if v.lower() in ('yes', 'true', 't', 'y', '1'):
- return True
- elif v.lower() in ('no', 'false', 'f', 'n', '0'):
- return False
- else:
- raise argparse.ArgumentTypeError('Boolean value expected.')
-
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument(
- '--batch_size', type=int, default=128, help="Batch size for training.")
-parser.add_argument(
- '--learning_rate',
- type=float,
- default=1e-3,
- help="Learning rate for training.")
-parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.")
-parser.add_argument(
- '--device',
- type=str,
- default='CPU',
- choices=['CPU', 'GPU'],
- help="The device type.")
-parser.add_argument('--device_id', type=int, default=0, help="The device id.")
-parser.add_argument(
- '--data_format',
- type=str,
- default='NCHW',
- choices=['NCHW', 'NHWC'],
- help='The data order, now only support NCHW.')
-parser.add_argument(
- '--data_set',
- type=str,
- default='cifar10',
- choices=['cifar10', 'flowers'],
- help='Optional dataset for benchmark.')
-parser.add_argument(
- '--local',
- type=str2bool,
- default=True,
- help='Whether to run as local mode.')
-
-parser.add_argument(
- "--ps_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--trainer_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--profile", action='store_true', help="If set, profile a few steps.")
-
-# Flags for defining the tf.train.Server
-parser.add_argument(
- "--task_index", type=int, default=0, help="Index of task within the job")
-args = parser.parse_args()
-
-
-def vgg16_bn_drop(input):
- def conv_block(input, num_filter, groups, dropouts):
- return fluid.nets.img_conv_group(
- input=input,
- pool_size=2,
- pool_stride=2,
- conv_num_filter=[num_filter] * groups,
- conv_filter_size=3,
- conv_act='relu',
- conv_with_batchnorm=True,
- conv_batchnorm_drop_rate=dropouts,
- pool_type='max')
-
- conv1 = conv_block(input, 64, 2, [0.3, 0])
- conv2 = conv_block(conv1, 128, 2, [0.4, 0])
- conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
- conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
- conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-
- drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
- fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
- bn = fluid.layers.batch_norm(input=fc1, act='relu')
- drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
- fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
- return fc2
-
-
-def main():
- if args.data_set == "cifar10":
- classdim = 10
- if args.data_format == 'NCHW':
- data_shape = [3, 32, 32]
- else:
- data_shape = [32, 32, 3]
- else:
- classdim = 102
- if args.data_format == 'NCHW':
- data_shape = [3, 224, 224]
- else:
- data_shape = [224, 224, 3]
-
- # Input data
- images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
- label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-
- # Train program
- net = vgg16_bn_drop(images)
- predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
- cost = fluid.layers.cross_entropy(input=predict, label=label)
- avg_cost = fluid.layers.mean(x=cost)
-
- # Evaluator
- batch_size = fluid.layers.create_tensor(dtype='int64')
- batch_acc = fluid.layers.accuracy(
- input=predict, label=label, total=batch_size)
-
- # inference program
- inference_program = fluid.default_main_program().clone()
- with fluid.program_guard(inference_program):
- inference_program = fluid.io.get_inference_program(batch_acc)
-
- # Optimization
- optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
- optimize_ops, params_grads = optimizer.minimize(avg_cost)
-
- # Initialize executor
- place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(
- args.device_id)
- exe = fluid.Executor(place)
-
- # test
- def test(exe):
- test_pass_acc = fluid.average.WeightedAverage()
- for batch_id, data in enumerate(test_reader()):
- img_data = np.array(map(lambda x: x[0].reshape(data_shape),
- data)).astype("float32")
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
- y_data = y_data.reshape([-1, 1])
-
- outs = exe.run(inference_program,
- feed={"pixel": img_data,
- "label": y_data},
- fetch_list=[batch_acc, batch_size])
- test_pass_acc.add(value=np.array(outs[0]), weight=np.array(outs[1]))
-
- return test_pass_acc.eval()
-
- def train_loop(exe, trainer_prog):
- iters = 0
- ts = time.time()
- train_pass_acc = fluid.average.WeightedAverage()
- for pass_id in range(args.num_passes):
- # train
- start_time = time.time()
- num_samples = 0
- train_pass_acc.reset()
-
- def run_step(batch_id, data):
- img_data = np.array(
- map(lambda x: x[0].reshape(data_shape), data)).astype(
- "float32")
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
- y_data = y_data.reshape([-1, 1])
-
- loss, acc, b_size = exe.run(
- trainer_prog,
- feed={"pixel": img_data,
- "label": y_data},
- fetch_list=[avg_cost, batch_acc, batch_size])
- return loss, acc, b_size
-
- if args.profile and args.task_index == 0:
- # warmup.
- for batch_id, data in enumerate(train_reader()):
- if batch_id > 5: break
- run_step(batch_id, data)
- with profiler.profiler('All', 'total', '/tmp/profile_vgg'):
- for batch_id, data in enumerate(train_reader()):
- if batch_id > 5: break
- run_step(batch_id, data)
-
- for batch_id, data in enumerate(train_reader()):
- ts = time.time()
- loss, acc, b_size = run_step(batch_id, data)
- iters += 1
- num_samples += len(data)
- train_pass_acc.add(value=acc, weight=b_size)
- print(
- "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, "
- "Speed = %.2f img/s" % (pass_id, iters, loss, acc,
- len(data) / (time.time() - ts))
- ) # The accuracy is the accumulation of batches, but not the current batch.
-
- pass_elapsed = time.time() - start_time
- pass_train_acc = train_pass_acc.eval()
- pass_test_acc = test(exe)
- print("Task:%d Pass = %d, Training performance = %f imgs/s, "
- "Train accuracy = %f, Test accuracy = %f\n" %
- (args.task_index, pass_id, num_samples / pass_elapsed,
- pass_train_acc, pass_test_acc))
-
- if args.local:
- # Parameter initialization
- exe.run(fluid.default_startup_program())
-
- # data reader
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.train10() if args.data_set == 'cifar10'
- else paddle.dataset.flowers.train(),
- buf_size=5120),
- batch_size=args.batch_size)
- test_reader = paddle.batch(
- paddle.dataset.cifar.test10()
- if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
- batch_size=args.batch_size)
- train_loop(exe, fluid.default_main_program())
- else:
- trainers = int(os.getenv("TRAINERS")) # total trainer count
- print("trainers total: ", trainers)
-
- training_role = os.getenv(
- "TRAINING_ROLE",
- "TRAINER") # get the training role: trainer/pserver
-
- t = fluid.DistributeTranspiler()
- t.transpile(
- trainer_id=args.task_index,
- pservers=args.ps_hosts,
- trainers=trainers)
-
- if training_role == "PSERVER":
- current_endpoint = os.getenv("POD_IP") + ":" + os.getenv(
- "PADDLE_INIT_PORT")
- if not current_endpoint:
- print("need env SERVER_ENDPOINT")
- exit(1)
- pserver_prog = t.get_pserver_program(current_endpoint)
- pserver_startup = t.get_startup_program(current_endpoint,
- pserver_prog)
- exe.run(pserver_startup)
- exe.run(pserver_prog)
- elif training_role == "TRAINER":
- # Parameter initialization
- exe.run(fluid.default_startup_program())
-
- # data reader
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.train10() if args.data_set == 'cifar10'
- else paddle.dataset.flowers.train(),
- buf_size=5120),
- batch_size=args.batch_size)
- test_reader = paddle.batch(
- paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else
- paddle.dataset.flowers.test(),
- batch_size=args.batch_size)
-
- trainer_prog = t.get_trainer_program()
- feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
- # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver
- exe.run(fluid.default_startup_program())
- train_loop(exe, trainer_prog)
- else:
- print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
-
-
-def print_arguments():
- print('----------- Configuration Arguments -----------')
- for arg, value in sorted(vars(args).iteritems()):
- print('%s: %s' % (arg, value))
- print('------------------------------------------------')
-
-
-if __name__ == "__main__":
- print_arguments()
- main()
diff --git a/benchmark/cluster/vgg16/vgg16_tf.py b/benchmark/cluster/vgg16/vgg16_tf.py
deleted file mode 100644
index 2d220478acae46566760209dbc012cff316946aa..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/vgg16_tf.py
+++ /dev/null
@@ -1,366 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""VGG16 benchmark in TensorFlow
-You can get distribution example template structure here:
-https://medium.com/clusterone/how-to-write-distributed-tensorflow-code-with-an-example-on-tensorport-70bf3306adcb
-https://www.tensorflow.org/deploy/distributed
-"""
-
-import tensorflow as tf
-import paddle.v2 as paddle
-import numpy as np
-import argparse
-import time
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument(
- '--batch_size', type=int, default=128, help="Batch size for training.")
-parser.add_argument(
- '--learning_rate',
- type=float,
- default=1e-3,
- help="Learning rate for training.")
-parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.")
-parser.add_argument(
- '--device',
- type=str,
- default='CPU',
- choices=['CPU', 'GPU'],
- help="The device type.")
-parser.add_argument(
- '--data_format',
- type=str,
- default='NHWC',
- choices=['NCHW', 'NHWC'],
- help='The data order, NCHW=[batch, channels, height, width].'
- 'Only support NHWC right now.')
-parser.add_argument(
- '--data_set',
- type=str,
- default='cifar10',
- choices=['cifar10', 'flowers'],
- help='Optional dataset for benchmark.')
-
-parser.add_argument(
- "--ps_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--worker_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--job_name", type=str, default="", help="One of 'worker', 'ps'")
-# Flags for defining the tf.train.Server
-parser.add_argument(
- "--task_index", type=int, default=0, help="Index of task within the job")
-
-args = parser.parse_args()
-
-
-class VGG16Model(object):
- def __init__(self):
- self.parameters = []
-
- def batch_norm_relu(self, inputs, is_training):
- """Performs a batch normalization followed by a ReLU."""
- # We set fused=True for a significant speed boost. See
- # https://www.tensorflow.org/speed/speed_guide#common_fused_ops
- inputs = tf.layers.batch_normalization(
- inputs=inputs,
- axis=1 if args.data_format == 'NCHW' else -1,
- momentum=0.9,
- epsilon=1e-05,
- center=True,
- scale=True,
- training=is_training,
- fused=True)
- inputs = tf.nn.relu(inputs)
- return inputs
-
- def conv_bn_layer(self,
- name,
- images,
- kernel_shape,
- is_training,
- drop_rate=0.0):
- with tf.name_scope(name) as scope:
- kernel = tf.Variable(
- tf.truncated_normal(
- kernel_shape, dtype=tf.float32, stddev=1e-1),
- name='weights')
- conv = tf.nn.conv2d(
- images,
- kernel, [1, 1, 1, 1],
- data_format=args.data_format,
- padding='SAME')
- biases = tf.Variable(
- tf.constant(
- 0.0, shape=[kernel_shape[-1]], dtype=tf.float32),
- trainable=True,
- name='biases')
- out = tf.nn.bias_add(conv, biases)
- out = self.batch_norm_relu(out, is_training)
- out = tf.layers.dropout(out, rate=drop_rate, training=is_training)
- return out
-
- def fc_layer(self, name, inputs, shape):
- with tf.name_scope(name) as scope:
- fc_w = tf.Variable(
- tf.truncated_normal(
- shape, dtype=tf.float32, stddev=1e-1),
- name='weights')
- fc_b = tf.Variable(
- tf.constant(
- 0.0, shape=[shape[-1]], dtype=tf.float32),
- trainable=True,
- name='biases')
- out = tf.nn.bias_add(tf.matmul(inputs, fc_w), fc_b)
- return out
-
- def network(self, images, class_dim, is_training):
- """ VGG16 model structure.
-
- TODO(kuke): enable this network to support the 'NCHW' data format
- """
-
- # conv1
- conv1_1 = self.conv_bn_layer(
- 'conv1_1', images, [3, 3, 3, 64], is_training, drop_rate=0.3)
- conv1_2 = self.conv_bn_layer(
- 'conv1_2', conv1_1, [3, 3, 64, 64], is_training, drop_rate=0.0)
- # pool1
- pool1 = tf.nn.max_pool(
- conv1_2,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool1')
- # conv2
- conv2_1 = self.conv_bn_layer(
- 'conv2_1', pool1, [3, 3, 64, 128], is_training, drop_rate=0.4)
- conv2_2 = self.conv_bn_layer(
- 'conv2_2', conv2_1, [3, 3, 128, 128], is_training, drop_rate=0.0)
- # pool2
- pool2 = tf.nn.max_pool(
- conv2_2,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool2')
- # conv3
- conv3_1 = self.conv_bn_layer(
- 'conv3_1', pool2, [3, 3, 128, 256], is_training, drop_rate=0.4)
- conv3_2 = self.conv_bn_layer(
- 'conv3_2', conv3_1, [3, 3, 256, 256], is_training, drop_rate=0.4)
- conv3_3 = self.conv_bn_layer(
- 'conv3_3', conv3_2, [3, 3, 256, 256], is_training, drop_rate=0.0)
- # pool3
- pool3 = tf.nn.max_pool(
- conv3_3,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool3')
- # conv4
- conv4_1 = self.conv_bn_layer(
- 'conv4_1', pool3, [3, 3, 256, 512], is_training, drop_rate=0.4)
- conv4_2 = self.conv_bn_layer(
- 'conv4_2', conv4_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
- conv4_3 = self.conv_bn_layer(
- 'conv4_3', conv4_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
- # pool4
- pool4 = tf.nn.max_pool(
- conv4_3,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool4')
- # conv5
- conv5_1 = self.conv_bn_layer(
- 'conv5_1', pool4, [3, 3, 512, 512], is_training, drop_rate=0.4)
- conv5_2 = self.conv_bn_layer(
- 'conv5_2', conv5_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
- conv5_3 = self.conv_bn_layer(
- 'conv5_3', conv5_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
- # pool5
- pool5 = tf.nn.max_pool(
- conv5_3,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool4')
- # flatten
- shape = int(np.prod(pool5.get_shape()[1:]))
- pool5_flat = tf.reshape(pool5, [-1, shape])
- # fc1
- drop = tf.layers.dropout(pool5_flat, rate=0.5, training=is_training)
- fc1 = self.fc_layer('fc1', drop, [shape, 512])
- # fc2
- bn = self.batch_norm_relu(fc1, is_training)
- drop = tf.layers.dropout(bn, rate=0.5, training=is_training)
- fc2 = self.fc_layer('fc2', drop, [512, 512])
-
- fc3 = self.fc_layer('fc3', fc2, [512, class_dim])
-
- return fc3
-
-
-def run_benchmark(cluster_spec, server):
- """Run benchmark on cifar10 or flowers."""
-
- if args.data_set == "cifar10":
- class_dim = 10
- raw_shape = (3, 32, 32)
- dat_shape = (None, 32, 32, 3) if args.data_format == 'NHWC' else (
- None, 3, 32, 32)
- else:
- class_dim = 102
- raw_shape = (3, 224, 224)
- dat_shape = (None, 224, 224, 3) if args.data_format == 'NHWC' else (
- None, 3, 224, 224)
-
- device = tf.train.replica_device_setter(
- worker_device="/job:worker/task:{}".format(args.task_index),
- cluster=cluster_spec)
-
- with tf.device(device):
- images = tf.placeholder(tf.float32, shape=dat_shape)
- labels = tf.placeholder(tf.int64, shape=(None, ))
- is_training = tf.placeholder('bool')
- onehot_labels = tf.one_hot(labels, depth=class_dim)
-
- vgg16 = VGG16Model()
- logits = vgg16.network(images, class_dim, is_training)
- loss = tf.losses.softmax_cross_entropy(
- onehot_labels=onehot_labels, logits=logits)
- avg_loss = tf.reduce_mean(loss)
-
- correct = tf.equal(tf.argmax(logits, 1), labels)
- accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
-
- optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
- update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- with tf.control_dependencies(update_ops):
- train_op = optimizer.minimize(avg_loss, global_step=global_step)
-
- summary_op = tf.summary.merge_all()
- init_op = tf.global_variables_initializer()
-
- # data reader
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.train10()
- if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
- buf_size=5120),
- batch_size=args.batch_size)
- test_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.test10()
- if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
- buf_size=5120),
- batch_size=args.batch_size)
-
- # test
- def test():
- test_accs = []
- for batch_id, data in enumerate(test_reader()):
- test_images = np.array(
- map(lambda x: np.transpose(x[0].reshape(raw_shape),
- axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
- test_labels = np.array(map(lambda x: x[1], data)).astype('int64')
- test_accs.append(
- accuracy.eval(feed_dict={
- images: test_images,
- labels: test_labels,
- is_training: False
- }))
- return np.mean(test_accs)
-
- config = tf.ConfigProto(
- intra_op_parallelism_threads=1,
- inter_op_parallelism_threads=1,
- log_device_placement=True)
- config.gpu_options.allow_growth = True
-
- hooks = [tf.train.StopAtStepHook(last_step=1000000)]
-
- with tf.train.MonitoredTrainingSession(
- master=server.target,
- is_chief=(args.task_index == 0),
- hooks=hooks,
- config=config) as sess:
- iters, num_samples, start_time = 0, 0, 0.0
- for pass_id in range(args.num_passes):
- # train
- num_samples = 0
- start_time = time.time()
- for batch_id, data in enumerate(train_reader()):
- train_images = np.array(
- map(lambda x: np.transpose(x[0].reshape(raw_shape),
- axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
- train_labels = np.array(map(lambda x: x[1], data)).astype(
- 'int64')
- iter_begin_time = time.time()
- _, loss, acc = sess.run([train_op, avg_loss, accuracy],
- feed_dict={
- images: train_images,
- labels: train_labels,
- is_training: True
- })
- iters += 1
- print(
- "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed=%.2f imgs/sec"
- % (pass_id, iters, loss, acc,
- len(data) / (time.time() - iter_begin_time)))
- num_samples += len(data)
- train_elapsed = time.time() - start_time
- # test
- pass_test_acc = test()
- print("Pass = %d, Train speed = %f imgs/s, Test accuracy = %f\n" %
- (pass_id, num_samples / train_elapsed, pass_test_acc))
-
-
-def print_arguments():
- print('----------- Configuration Arguments -----------')
- for arg, value in sorted(vars(args).iteritems()):
- print('%s: %s' % (arg, value))
- print('------------------------------------------------')
-
-
-if __name__ == '__main__':
- print_arguments()
-
- ps_hosts = args.ps_hosts.split(",")
- worker_hosts = args.worker_hosts.split(",")
-
- # Create a cluster from the parameter server and worker hosts.
- cluster_spec = tf.train.ClusterSpec({
- "ps": ps_hosts,
- "worker": worker_hosts
- })
-
- # Create and start a server for the local task.
- server = tf.train.Server(
- cluster_spec, job_name=args.job_name, task_index=args.task_index)
-
- if args.job_name == "ps":
- print("start pserver")
- server.join()
- elif args.job_name == "worker":
- print("start worker")
- run_benchmark(cluster_spec, server)
diff --git a/benchmark/cluster/vgg16/vgg16_v2.py b/benchmark/cluster/vgg16/vgg16_v2.py
deleted file mode 100644
index 1a66af32d7131997c63bd3c3042875f33a467084..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/vgg16_v2.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import gzip
-
-import paddle.v2.dataset.cifar as cifar
-import paddle.v2 as paddle
-import time
-import os
-
-DATA_DIM = 3 * 32 * 32
-CLASS_DIM = 10
-BATCH_SIZE = os.getenv("BATCH_SIZE")
-if BATCH_SIZE:
- BATCH_SIZE = int(BATCH_SIZE)
-else:
- BATCH_SIZE = 128
-print "batch_size", BATCH_SIZE
-NODE_COUNT = int(os.getenv("TRAINERS"))
-ts = 0
-
-
-def vgg(input, nums, class_dim):
- def conv_block(input, num_filter, groups, num_channels=None):
- return paddle.networks.img_conv_group(
- input=input,
- num_channels=num_channels,
- pool_size=2,
- pool_stride=2,
- conv_num_filter=[num_filter] * groups,
- conv_filter_size=3,
- conv_act=paddle.activation.Relu(),
- pool_type=paddle.pooling.Max())
-
- assert len(nums) == 5
- # the channel of input feature is 3
- conv1 = conv_block(input, 64, nums[0], 3)
- conv2 = conv_block(conv1, 128, nums[1])
- conv3 = conv_block(conv2, 256, nums[2])
- conv4 = conv_block(conv3, 512, nums[3])
- conv5 = conv_block(conv4, 512, nums[4])
-
- fc_dim = 512
- fc1 = paddle.layer.fc(input=conv5,
- size=fc_dim,
- act=paddle.activation.Relu(),
- layer_attr=paddle.attr.Extra(drop_rate=0.5))
- fc2 = paddle.layer.fc(input=fc1,
- size=fc_dim,
- act=paddle.activation.Relu(),
- layer_attr=paddle.attr.Extra(drop_rate=0.5))
- out = paddle.layer.fc(input=fc2,
- size=class_dim,
- act=paddle.activation.Softmax())
- return out
-
-
-def vgg13(input, class_dim):
- nums = [2, 2, 2, 2, 2]
- return vgg(input, nums, class_dim)
-
-
-def vgg16(input, class_dim):
- nums = [2, 2, 3, 3, 3]
- return vgg(input, nums, class_dim)
-
-
-def vgg19(input, class_dim):
- nums = [2, 2, 4, 4, 4]
- return vgg(input, nums, class_dim)
-
-
-def main():
- global ts
- paddle.init(use_gpu=False)
- image = paddle.layer.data(
- name="image", type=paddle.data_type.dense_vector(DATA_DIM))
- lbl = paddle.layer.data(
- name="label", type=paddle.data_type.integer_value(CLASS_DIM))
-
- extra_layers = None
- # NOTE: for v2 distributed training need averaging updates.
- learning_rate = 1e-3 / NODE_COUNT
- out = vgg16(image, class_dim=CLASS_DIM)
- cost = paddle.layer.classification_cost(input=out, label=lbl)
-
- # Create parameters
- parameters = paddle.parameters.create(cost)
-
- # Create optimizer
- optimizer = paddle.optimizer.Momentum(
- momentum=0.9,
- regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
- BATCH_SIZE),
- learning_rate=learning_rate / BATCH_SIZE,
- learning_rate_decay_a=0.1,
- learning_rate_decay_b=128000 * 35,
- learning_rate_schedule="discexp", )
-
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- cifar.train10(),
- # To use other data, replace the above line with:
- # reader.train_reader('train.list'),
- buf_size=1000),
- batch_size=BATCH_SIZE)
- test_reader = paddle.batch(
- cifar.test10(),
- # To use other data, replace the above line with:
- # reader.test_reader('val.list'),
- batch_size=BATCH_SIZE)
-
- # Create trainer
- trainer = paddle.trainer.SGD(cost=cost,
- parameters=parameters,
- update_equation=optimizer,
- extra_layers=extra_layers,
- is_local=False)
-
- # End batch and end pass event handler
- def event_handler(event):
- global ts, ts_pass
- if isinstance(event, paddle.event.BeginPass):
- ts_pass = time.time()
- if isinstance(event, paddle.event.BeginIteration):
- ts = time.time()
- if isinstance(event, paddle.event.EndIteration):
- if event.batch_id % 1 == 0:
- print "\nPass %d, Batch %d, Cost %f, %s, spent: %f" % (
- event.pass_id, event.batch_id, event.cost, event.metrics,
- time.time() - ts)
- if isinstance(event, paddle.event.EndPass):
- print "Pass %d end, spent: %f" % (event.pass_id,
- time.time() - ts_pass)
- result = trainer.test(reader=test_reader)
- print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-
- trainer.train(
- reader=train_reader, num_passes=200, event_handler=event_handler)
-
-
-if __name__ == '__main__':
- main()
diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py
index 1d8f27440d0f1438e0520684ee3e90e8a5891a17..30b070e4acac60caa97a4e8ffd07462cb347ee93 100644
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@@ -94,6 +94,10 @@ def parse_args():
'--memory_optimize',
action='store_true',
help='If set, optimize runtime memory before start.')
+ parser.add_argument(
+ '--use_fake_data',
+ action='store_true',
+ help='If set ommit the actual read data operators.')
parser.add_argument(
'--update_method',
type=str,
@@ -198,6 +202,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
exe.run(train_prog)
return
+ if args.use_fake_data:
+ raise Exception(
+ "fake data is not supported in single GPU test for now.")
+
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_prog)
@@ -244,7 +252,31 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
batch_acc, args, train_prog, startup_prog, nccl_id_var,
num_trainers, trainer_id):
+ feed_var_list = [
+ var for var in train_prog.global_block().vars.itervalues()
+ if var.is_data
+ ]
+ # generate fake:
+ if args.use_fake_data:
+ for var in feed_var_list:
+ v = startup_prog.global_block().clone_variable(var)
+ var.persistable = True
+ v.persistable = True
+
+ real_shape = list(var.shape)
+ real_shape[0] = args.batch_size / args.gpus
+ startup_prog.global_block().append_op(
+ outputs={"Out": v},
+ type="fill_constant",
+ attrs={"shape": real_shape,
+ "value": 1.0,
+ "dtype": var.dtype})
+
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
+ if nccl_id_var and trainer_id == 0:
+ #FIXME(wuyi): wait other trainer to start listening
+ time.sleep(30)
+
startup_exe = fluid.Executor(place)
startup_exe.run(startup_prog)
strategy = fluid.ExecutionStrategy()
@@ -256,10 +288,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
exec_strategy=strategy,
num_trainers=num_trainers,
trainer_id=trainer_id)
- feed_var_list = [
- var for var in train_prog.global_block().vars.itervalues()
- if var.is_data
- ]
+
feeder = fluid.DataFeeder(feed_var_list, place)
for pass_id in range(args.pass_num):
num_samples = 0
@@ -271,7 +300,10 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
num_samples = 0
if iters == args.iterations:
break
- loss, = exe.run([avg_loss.name], feed=feeder.feed(data))
+ if args.use_fake_data:
+ loss, = exe.run([avg_loss.name])
+ else:
+ loss, = exe.run([avg_loss.name], feed=feeder.feed(data))
if args.update_method == "pserver":
exe.bcast_params()
num_samples += len(data)
diff --git a/benchmark/fluid/kube_gen_job.py b/benchmark/fluid/kube_gen_job.py
index 3dbb4b8c5dd13657f8d1853003b321ad047e1349..39ba207fd96f71563504017e77dc0e87c249b3f8 100644
--- a/benchmark/fluid/kube_gen_job.py
+++ b/benchmark/fluid/kube_gen_job.py
@@ -112,6 +112,7 @@ def gen_job():
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
envs.append({"name": "ENTRY", "value": args.entry})
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
+ envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
envs.append({
diff --git a/benchmark/fluid/kube_templates/__init__.py b/benchmark/fluid/kube_templates/__init__.py
index b64a7f78ff10d03987ea4a8c13a0e34bb433f64c..2d09d940a5ee638e4b55405d05924e2d76006cfc 100644
--- a/benchmark/fluid/kube_templates/__init__.py
+++ b/benchmark/fluid/kube_templates/__init__.py
@@ -54,5 +54,13 @@ envs = [
"fieldPath": "status.podIP"
}
}
+ },
+ {
+ "name": "PADDLE_CURRENT_IP",
+ "valueFrom": {
+ "fieldRef": {
+ "fieldPath": "status.podIP"
+ }
+ }
}
]
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index e490397cc0624c310949a4b571bd00cac6e8953b..682614742cf1bd3130c638020a2545e16226d4d6 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS)
add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS)
endif(USE_EIGEN_FOR_BLAS)
+if(EIGEN_USE_THREADS)
+ add_definitions(-DEIGEN_USE_THREADS)
+endif(EIGEN_USE_THREADS)
+
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake
deleted file mode 100644
index 4823dc3e91390002aefac70f7931b4197db05789..0000000000000000000000000000000000000000
--- a/cmake/cpplint.cmake
+++ /dev/null
@@ -1,62 +0,0 @@
-# util to check C++ file style
-# * it basically use google cpplint.py.
-# * It provide "add_style_check_target" for cmake.
-# Usage see add_style_check_target's document
-#
-# TODO(yuyang18): Add python style check.
-
-set(STYLE_FILTER)
-
-# diable unwanted filters
-
-# paddle do not indent public/potected/private in class
-set(STYLE_FILTER "${STYLE_FILTER}-whitespace/indent,")
-# paddle use mutable reference. BUT IT IS NOT RECOMMANDED
-set(STYLE_FILTER "${STYLE_FILTER}-runtime/references,")
-# paddle use relative path for include.
-set(STYLE_FILTER "${STYLE_FILTER}-build/include,")
-# paddle use , , etc.
-set(STYLE_FILTER "${STYLE_FILTER}-build/c++11,")
-# paddle use c style casting. BUT IT IS NOT RECOMMANDED
-set(STYLE_FILTER "${STYLE_FILTER}-readability/casting")
-
-
-# IGNORE SOME FILES
-set(IGNORE_PATTERN
- .*ImportanceSampler.*
- .*cblas\\.h.*
- .*\\.pb\\.txt
- .*MultiDataProvider.*
- .*pb.*
- .*pybind.h)
-
-# add_style_check_target
-#
-# attach check code style step for target.
-#
-# first argument: target name to attach
-# rest arguments: source list to check code style.
-#
-# NOTE: If WITH_STYLE_CHECK is OFF, then this macro just do nothing.
-macro(add_style_check_target TARGET_NAME)
- if(WITH_STYLE_CHECK)
- set(SOURCES_LIST ${ARGN})
- list(REMOVE_DUPLICATES SOURCES_LIST)
- foreach(filename ${SOURCES_LIST})
- foreach(pattern ${IGNORE_PATTERN})
- if(filename MATCHES ${pattern})
- list(REMOVE_ITEM SOURCES_LIST ${filename})
- endif()
- endforeach()
- endforeach()
-
- if(SOURCES_LIST)
- add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
- COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py"
- "--filter=${STYLE_FILTER}"
- ${SOURCES_LIST}
- COMMENT "cpplint: Checking source code style"
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
- endif()
- endif()
-endmacro()
diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake
index e90948782bb5e333bbdb47ef9d61c1e37e3cf9e4..9459f1ddfe85f5607880d3fdd968b494d6af592a 100644
--- a/cmake/external/grpc.cmake
+++ b/cmake/external/grpc.cmake
@@ -23,17 +23,20 @@ SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc)
SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
+
+include(ProcessorCount)
+ProcessorCount(NUM_OF_PROCESSOR)
+
IF(APPLE)
- SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
+ SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j ${NUM_OF_PROCESSOR} static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE()
- SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin)
+ SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j ${NUM_OF_PROCESSOR} static grpc_cpp_plugin)
ENDIF()
ExternalProject_Add(
extern_grpc
DEPENDS protobuf zlib
- GIT_REPOSITORY "https://github.com/grpc/grpc.git"
- GIT_TAG "v1.10.x"
+ URL "http://paddlepaddledeps.bj.bcebos.com/grpc.tar.xz"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index 0fde4373a4be58e71ff1a305bd4991cc554d7a34..2665996432b1f6681927320a85d6835094abe4cd 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -212,6 +212,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake
${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF
+ -DCMAKE_SKIP_RPATH=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 65d61b7a38dde870a9217c8a68e81f7e593f88ec..9ddd05b3d9404df29ca1bf634105314b7e6a5b70 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -206,8 +206,6 @@ function(cc_library TARGET_NAME)
list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
- add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS})
-
else(cc_library_SRCS)
if(cc_library_DEPS)
merge_static_libs(${TARGET_NAME} ${cc_library_DEPS})
@@ -271,7 +269,6 @@ function(nv_library TARGET_NAME)
list(APPEND nv_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
- add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS})
else(nv_library_SRCS)
if (nv_library_DEPS)
merge_static_libs(${TARGET_NAME} ${nv_library_DEPS})
@@ -344,7 +341,6 @@ function(hip_library TARGET_NAME)
list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
- add_style_check_target(${TARGET_NAME} ${hip_library_SRCS} ${hip_library_HEADERS})
else(hip_library_SRCS)
if (hip_library_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_DEPS})
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index b730ab43c49af005c00218c7430ab3c4d1a89510..3b13b2150514bd615667241272d287c7e55d4e74 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -172,6 +172,7 @@ add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep})
# paddle fluid version
execute_process(
COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
+ WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
set(version_file ${FLUID_INSTALL_DIR}/version.txt)
file(WRITE ${version_file}
diff --git a/doc/fluid/CMakeLists.txt b/doc/fluid/CMakeLists.txt
index 8086507bb4b7e870ad6d6091945ed07a00b5100b..be92af3902769a65c77953c9f3cb1f3aa3738d79 100644
--- a/doc/fluid/CMakeLists.txt
+++ b/doc/fluid/CMakeLists.txt
@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
+set(IMPORT_PADDLE_STRING "")
+set(IMPORT_PADDLEV2_STRING "")
+
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
@@ -27,8 +30,6 @@ sphinx_add_target(paddle_fluid_docs
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN})
-add_dependencies(paddle_fluid_docs gen_proto_py paddle_python)
-
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
@@ -50,6 +51,4 @@ sphinx_add_target(paddle_fluid_docs_cn
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN})
-add_dependencies(paddle_fluid_docs_cn gen_proto_py paddle_python)
-
add_subdirectory(api)
diff --git a/doc/fluid/api/CMakeLists.txt b/doc/fluid/api/CMakeLists.txt
index 48b396f0786adad1ba6cd41f72497f853e54bc38..435d6e10fb02e9b2a8147f37da33e8848cc9b98a 100644
--- a/doc/fluid/api/CMakeLists.txt
+++ b/doc/fluid/api/CMakeLists.txt
@@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
+set(IMPORT_PADDLE_STRING "import paddle")
+set(IMPORT_PADDLEV2_STRING "import paddle.v2")
+
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
diff --git a/doc/fluid/api/clip.rst b/doc/fluid/api/clip.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3ba096388fc87dda3096a9030fe5749e61112c06
--- /dev/null
+++ b/doc/fluid/api/clip.rst
@@ -0,0 +1,47 @@
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
+====
+clip
+====
+
+ErrorClipByValue
+----------------
+
+.. autoclass:: paddle.fluid.clip.ErrorClipByValue
+ :members:
+ :noindex:
+
+GradientClipByValue
+-------------------
+
+.. autoclass:: paddle.fluid.clip.GradientClipByValue
+ :members:
+ :noindex:
+
+GradientClipByNorm
+------------------
+
+.. autoclass:: paddle.fluid.clip.GradientClipByNorm
+ :members:
+ :noindex:
+
+GradientClipByGlobalNorm
+------------------------
+
+.. autoclass:: paddle.fluid.clip.GradientClipByGlobalNorm
+ :members:
+ :noindex:
+
+append_gradient_clip_ops
+------------------------
+
+.. autofunction:: paddle.fluid.clip.append_gradient_clip_ops
+ :noindex:
+
+error_clip_callback
+-------------------
+
+.. autofunction:: paddle.fluid.clip.error_clip_callback
+ :noindex:
+
diff --git a/doc/fluid/api/evaluator.rst b/doc/fluid/api/evaluator.rst
index f80b87c7d2704a144c02028c4925530a67d11289..c0dc9a0d1d9f2f70948dc3c905dca25d7dd43742 100644
--- a/doc/fluid/api/evaluator.rst
+++ b/doc/fluid/api/evaluator.rst
@@ -5,24 +5,3 @@
evaluator
=========
-ChunkEvaluator
---------------
-
-.. autoclass:: paddle.fluid.evaluator.ChunkEvaluator
- :members:
- :noindex:
-
-EditDistance
---------------
-
-.. autoclass:: paddle.fluid.evaluator.EditDistance
- :members:
- :noindex:
-
-DetectionMAP
---------------
-
-.. autoclass:: paddle.fluid.evaluator.DetectionMAP
- :members:
- :noindex:
-
diff --git a/doc/fluid/api/executor.rst b/doc/fluid/api/executor.rst
index a9cdf264e49691afc4b9425b7bfe54f8157ae6c2..f67a14c49f372e67d18ec8e6f87da01109376d22 100644
--- a/doc/fluid/api/executor.rst
+++ b/doc/fluid/api/executor.rst
@@ -30,3 +30,9 @@ switch_scope
.. autofunction:: paddle.fluid.executor.switch_scope
:noindex:
+fetch_var
+---------
+
+.. autofunction:: paddle.fluid.executor.fetch_var
+ :noindex:
+
diff --git a/doc/fluid/api/gen_doc.sh b/doc/fluid/api/gen_doc.sh
index ba7b7ba8e51399deb852b0a7c8ddd3128f521e85..0f0539355559446fd91f659d61b636db214b5a40 100755
--- a/doc/fluid/api/gen_doc.sh
+++ b/doc/fluid/api/gen_doc.sh
@@ -1,7 +1,7 @@
#!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst
-for module in io data_feeder evaluator executor initializer io nets optimizer param_attr profiler regularizer
+for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
do
python gen_doc.py ${module} > ${module}.rst
done
diff --git a/doc/fluid/api/index_en.rst b/doc/fluid/api/index_en.rst
index 06c686d9508635abd41571983e00be174e94743e..29cea9c68221b921939e8e09072d87f9f604e21b 100644
--- a/doc/fluid/api/index_en.rst
+++ b/doc/fluid/api/index_en.rst
@@ -9,8 +9,9 @@ Fluid
data_feeder.rst
executor.rst
initializer.rst
- evaluator.rst
+ metrics.rst
nets.rst
+ clip.rst
optimizer.rst
param_attr.rst
profiler.rst
diff --git a/doc/fluid/api/initializer.rst b/doc/fluid/api/initializer.rst
index 2f02c5de097945a45a3e053427104bd17bea1279..c49a98c744cdf907630ea8c74791ff2021d996e8 100644
--- a/doc/fluid/api/initializer.rst
+++ b/doc/fluid/api/initializer.rst
@@ -33,11 +33,16 @@ Xavier
:members:
:noindex:
-MSRA
-------
+force_init_on_cpu
+-----------------
-.. autoclass:: paddle.fluid.initializer.MSRA
- :members:
+.. autofunction:: paddle.fluid.initializer.force_init_on_cpu
+ :noindex:
+
+init_on_cpu
+-----------
+
+.. autofunction:: paddle.fluid.initializer.init_on_cpu
:noindex:
ConstantInitializer
@@ -68,9 +73,3 @@ XavierInitializer
:members:
:noindex:
-
-MSRAInitializer
------------------
-.. autoclass:: paddle.fluid.initializer.MSRAInitializer
- :members:
- :noindex:
diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst
index 9ae7ffb2604250aebfd9ecd8966384c3ef05f97b..f53da4d194f8d2428b4121fa1bb31f3fc95a9f64 100644
--- a/doc/fluid/api/layers.rst
+++ b/doc/fluid/api/layers.rst
@@ -55,6 +55,13 @@ While
:members:
:noindex:
+Switch
+------
+
+.. autoclass:: paddle.fluid.layers.Switch
+ :members:
+ :noindex:
+
lod_rank_table
--------------
@@ -67,12 +74,6 @@ max_sequence_len
.. autofunction:: paddle.fluid.layers.max_sequence_len
:noindex:
-topk
-----
-
-.. autofunction:: paddle.fluid.layers.topk
- :noindex:
-
lod_tensor_to_array
-------------------
@@ -109,6 +110,12 @@ less_than
.. autofunction:: paddle.fluid.layers.less_than
:noindex:
+equal
+-----
+
+.. autofunction:: paddle.fluid.layers.equal
+ :noindex:
+
array_read
----------
@@ -212,6 +219,42 @@ Send
.. autofunction:: paddle.fluid.layers.Send
:noindex:
+open_recordio_file
+------------------
+
+.. autofunction:: paddle.fluid.layers.open_recordio_file
+ :noindex:
+
+open_files
+----------
+
+.. autofunction:: paddle.fluid.layers.open_files
+ :noindex:
+
+read_file
+---------
+
+.. autofunction:: paddle.fluid.layers.read_file
+ :noindex:
+
+shuffle
+-------
+
+.. autofunction:: paddle.fluid.layers.shuffle
+ :noindex:
+
+batch
+-----
+
+.. autofunction:: paddle.fluid.layers.batch
+ :noindex:
+
+double_buffer
+-------------
+
+.. autofunction:: paddle.fluid.layers.double_buffer
+ :noindex:
+
nn
==
@@ -281,12 +324,6 @@ square_error_cost
.. autofunction:: paddle.fluid.layers.square_error_cost
:noindex:
-accuracy
---------
-
-.. autofunction:: paddle.fluid.layers.accuracy
- :noindex:
-
chunk_eval
----------
@@ -311,6 +348,18 @@ sequence_pool
.. autofunction:: paddle.fluid.layers.sequence_pool
:noindex:
+sequence_softmax
+----------------
+
+.. autofunction:: paddle.fluid.layers.sequence_softmax
+ :noindex:
+
+softmax
+-------
+
+.. autofunction:: paddle.fluid.layers.softmax
+ :noindex:
+
pool2d
------
@@ -323,12 +372,6 @@ batch_norm
.. autofunction:: paddle.fluid.layers.batch_norm
:noindex:
-layer_norm
-----------
-
-.. autofunction:: paddle.fluid.layers.layer_norm
- :noindex:
-
beam_search_decode
------------------
@@ -377,6 +420,12 @@ reduce_min
.. autofunction:: paddle.fluid.layers.reduce_min
:noindex:
+reduce_prod
+-----------
+
+.. autofunction:: paddle.fluid.layers.reduce_prod
+ :noindex:
+
sequence_first_step
-------------------
@@ -425,6 +474,12 @@ matmul
.. autofunction:: paddle.fluid.layers.matmul
:noindex:
+topk
+----
+
+.. autofunction:: paddle.fluid.layers.topk
+ :noindex:
+
warpctc
-------
@@ -473,6 +528,60 @@ multiplex
.. autofunction:: paddle.fluid.layers.multiplex
:noindex:
+layer_norm
+----------
+
+.. autofunction:: paddle.fluid.layers.layer_norm
+ :noindex:
+
+softmax_with_cross_entropy
+--------------------------
+
+.. autofunction:: paddle.fluid.layers.softmax_with_cross_entropy
+ :noindex:
+
+smooth_l1
+---------
+
+.. autofunction:: paddle.fluid.layers.smooth_l1
+ :noindex:
+
+one_hot
+-------
+
+.. autofunction:: paddle.fluid.layers.one_hot
+ :noindex:
+
+autoincreased_step_counter
+--------------------------
+
+.. autofunction:: paddle.fluid.layers.autoincreased_step_counter
+ :noindex:
+
+reshape
+-------
+
+.. autofunction:: paddle.fluid.layers.reshape
+ :noindex:
+
+lod_reset
+---------
+
+.. autofunction:: paddle.fluid.layers.lod_reset
+ :noindex:
+
+lrn
+---
+
+.. autofunction:: paddle.fluid.layers.lrn
+ :noindex:
+
+pad
+---
+
+.. autofunction:: paddle.fluid.layers.pad
+ :noindex:
+
label_smooth
------------
@@ -480,7 +589,7 @@ label_smooth
:noindex:
roi_pool
----------
+--------
.. autofunction:: paddle.fluid.layers.roi_pool
:noindex:
@@ -501,18 +610,6 @@ mul
.. autofunction:: paddle.fluid.layers.mul
:noindex:
-reshape
--------
-
-.. autofunction:: paddle.fluid.layers.reshape
- :noindex:
-
-pad
----
-
-.. autofunction:: paddle.fluid.layers.pad
- :noindex:
-
scale
-----
@@ -579,10 +676,70 @@ clip_by_norm
.. autofunction:: paddle.fluid.layers.clip_by_norm
:noindex:
-sequence_softmax
-----------------
+logical_and
+-----------
-.. autofunction:: paddle.fluid.layers.sequence_softmax
+.. autofunction:: paddle.fluid.layers.logical_and
+ :noindex:
+
+logical_or
+----------
+
+.. autofunction:: paddle.fluid.layers.logical_or
+ :noindex:
+
+logical_xor
+-----------
+
+.. autofunction:: paddle.fluid.layers.logical_xor
+ :noindex:
+
+logical_not
+-----------
+
+.. autofunction:: paddle.fluid.layers.logical_not
+ :noindex:
+
+uniform_random
+--------------
+
+.. autofunction:: paddle.fluid.layers.uniform_random
+ :noindex:
+
+uniform_random_batch_size_like
+------------------------------
+
+.. autofunction:: paddle.fluid.layers.uniform_random_batch_size_like
+ :noindex:
+
+gaussian_random
+---------------
+
+.. autofunction:: paddle.fluid.layers.gaussian_random
+ :noindex:
+
+gaussian_random_batch_size_like
+-------------------------------
+
+.. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like
+ :noindex:
+
+cumsum
+------
+
+.. autofunction:: paddle.fluid.layers.cumsum
+ :noindex:
+
+scatter
+-------
+
+.. autofunction:: paddle.fluid.layers.scatter
+ :noindex:
+
+sum
+---
+
+.. autofunction:: paddle.fluid.layers.sum
:noindex:
sigmoid
@@ -651,6 +808,18 @@ floor
.. autofunction:: paddle.fluid.layers.floor
:noindex:
+cos
+---
+
+.. autofunction:: paddle.fluid.layers.cos
+ :noindex:
+
+sin
+---
+
+.. autofunction:: paddle.fluid.layers.sin
+ :noindex:
+
round
-----
@@ -834,4 +1003,9 @@ dice_loss
.. autofunction:: paddle.fluid.layers.dice_loss
:noindex:
+upsampling_bilinear2d
+____
+
+.. autofunction:: paddle.fluid.layers.upsampling_bilinear2d
+ :noindex:
diff --git a/doc/fluid/api/metrics.rst b/doc/fluid/api/metrics.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ddf07775d7ea293acd421b8549d03b277ff0611d
--- /dev/null
+++ b/doc/fluid/api/metrics.rst
@@ -0,0 +1,56 @@
+.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
+ !DO NOT EDIT THIS FILE MANUALLY!
+
+=======
+metrics
+=======
+
+MetricBase
+----------
+
+.. autoclass:: paddle.fluid.metrics.MetricBase
+ :members:
+ :noindex:
+
+CompositeMetric
+---------------
+
+.. autoclass:: paddle.fluid.metrics.CompositeMetric
+ :members:
+ :noindex:
+
+Accuracy
+--------
+
+.. autoclass:: paddle.fluid.metrics.Accuracy
+ :members:
+ :noindex:
+
+ChunkEvaluator
+--------------
+
+.. autoclass:: paddle.fluid.metrics.ChunkEvaluator
+ :members:
+ :noindex:
+
+EditDistance
+------------
+
+.. autoclass:: paddle.fluid.metrics.EditDistance
+ :members:
+ :noindex:
+
+DetectionMAP
+------------
+
+.. autoclass:: paddle.fluid.metrics.DetectionMAP
+ :members:
+ :noindex:
+
+Auc
+---
+
+.. autoclass:: paddle.fluid.metrics.Auc
+ :members:
+ :noindex:
+
diff --git a/doc/fluid/api/optimizer.rst b/doc/fluid/api/optimizer.rst
index b90d481d9d91519d302ada7b3d22671382d71105..df2bd2eace52e78805433bea320f5de95d45bfc7 100644
--- a/doc/fluid/api/optimizer.rst
+++ b/doc/fluid/api/optimizer.rst
@@ -111,6 +111,7 @@ DecayedAdagradOptimizer
:members:
:noindex:
+
AdadeltaOptimizer
-----------------
@@ -118,9 +119,17 @@ AdadeltaOptimizer
:members:
:noindex:
+
RMSPropOptimizer
-----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
+
+Optimizer
+---------
+
+.. autoclass:: paddle.fluid.optimizer.Optimizer
+ :members:
+ :noindex:
diff --git a/doc/fluid/api/regularizer.rst b/doc/fluid/api/regularizer.rst
index 837c67111c6e98e6a3859be802addc20a1c64f2b..756bc53baa0625aef48dad0c35e7ae57421a70d0 100644
--- a/doc/fluid/api/regularizer.rst
+++ b/doc/fluid/api/regularizer.rst
@@ -11,6 +11,13 @@ append_regularization_ops
.. autofunction:: paddle.fluid.regularizer.append_regularization_ops
:noindex:
+WeightDecayRegularizer
+----------------------
+
+.. autoclass:: paddle.fluid.regularizer.WeightDecayRegularizer
+ :members:
+ :noindex:
+
L1Decay
-------
@@ -26,15 +33,16 @@ L2Decay
:noindex:
L1DecayRegularizer
----------------------
+------------------
.. autoclass:: paddle.fluid.regularizer.L1DecayRegularizer
:members:
:noindex:
L2DecayRegularizer
----------------------
+------------------
.. autoclass:: paddle.fluid.regularizer.L2DecayRegularizer
:members:
:noindex:
+
diff --git a/doc/mobile/CMakeLists.txt b/doc/mobile/CMakeLists.txt
index b104a6318d474d6531670b8ac3569448774850c7..7b34ba8d0768427802b11614c6962f3c3f6ef4e3 100644
--- a/doc/mobile/CMakeLists.txt
+++ b/doc/mobile/CMakeLists.txt
@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
+set(IMPORT_PADDLE_STRING "")
+set(IMPORT_PADDLEV2_STRING "")
+
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
@@ -27,8 +30,6 @@ sphinx_add_target(paddle_mobile_docs
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN})
-add_dependencies(paddle_mobile_docs gen_proto_py paddle_python)
-
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
@@ -49,5 +50,3 @@ sphinx_add_target(paddle_mobile_docs_cn
${SPHINX_CACHE_DIR_CN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN})
-
-add_dependencies(paddle_mobile_docs_cn gen_proto_py paddle_python)
diff --git a/doc/mobile/index_cn.rst b/doc/mobile/index_cn.rst
index 8297316e8fbb2b8f41954030293feadbcd81295e..56d1515005f6e40b084c6b2184c6a0b3e3a00496 100644
--- a/doc/mobile/index_cn.rst
+++ b/doc/mobile/index_cn.rst
@@ -1,9 +1,9 @@
移动端
-=====
+======
.. toctree::
:maxdepth: 1
cross_compiling_for_android_cn.md
cross_compiling_for_ios_cn.md
- cross_compiling_for_raspberry_cn.md
\ No newline at end of file
+ cross_compiling_for_raspberry_cn.md
diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in
index 76b82fd97f1ed642696c4414676b694ebda9ad81..890f70615538af23cd05b9ffd685e870a5644cdb 100644
--- a/doc/templates/conf.py.cn.in
+++ b/doc/templates/conf.py.cn.in
@@ -16,8 +16,8 @@ import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python'))
import shlex
from recommonmark import parser, transform
-import paddle
-import paddle.v2
+@IMPORT_PADDLE_STRING@
+@IMPORT_PADDLEV2_STRING@
MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify
diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in
index 5aa5c1381fa3fad4ebc181c7868da03ae0138016..5b09464cb991f96127edec40f7dbbc97a8d82582 100644
--- a/doc/templates/conf.py.en.in
+++ b/doc/templates/conf.py.en.in
@@ -16,8 +16,8 @@ import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python'))
import shlex
from recommonmark import parser, transform
-import paddle
-import paddle.v2
+@IMPORT_PADDLE_STRING@
+@IMPORT_PADDLEV2_STRING@
MarkdownParser = parser.CommonMarkParser
diff --git a/doc/v2/CMakeLists.txt b/doc/v2/CMakeLists.txt
index be957d37b14c618e9346251b3bd3dbaf1541773f..d230a1b9217eea6740419822f350096e361a4435 100644
--- a/doc/v2/CMakeLists.txt
+++ b/doc/v2/CMakeLists.txt
@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
+set(IMPORT_PADDLE_STRING "")
+set(IMPORT_PADDLEV2_STRING "")
+
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
@@ -27,8 +30,6 @@ sphinx_add_target(paddle_v2_docs
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN})
-add_dependencies(paddle_v2_docs gen_proto_py paddle_python)
-
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
@@ -50,6 +51,4 @@ sphinx_add_target(paddle_v2_docs_cn
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN})
-add_dependencies(paddle_v2_docs_cn gen_proto_py paddle_python)
-
add_subdirectory(api)
diff --git a/doc/v2/api/CMakeLists.txt b/doc/v2/api/CMakeLists.txt
index 2670a21a227546ffcee4f10f395feef3c58df9b4..0c74522cb089b17c8419e9058f76631b0fe0df93 100644
--- a/doc/v2/api/CMakeLists.txt
+++ b/doc/v2/api/CMakeLists.txt
@@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
+set(IMPORT_PADDLE_STRING "import paddle")
+set(IMPORT_PADDLEV2_STRING "import paddle.v2")
+
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst
index 330e84346e28db30d16d4a95490ddcab431228a0..741c01ce5428c0046daa5a784da70d4bb492438c 100644
--- a/doc/v2/build_and_install/build_from_source_cn.rst
+++ b/doc/v2/build_and_install/build_from_source_cn.rst
@@ -19,8 +19,8 @@
----------------
PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像
-可以在 `这里 `_ 找到,您也可以
-在 `这里 `_ 找到 paddle_manylinux_devel
+可以在 `这里 `__ 找到,您也可以
+在 `这里 `__ 找到 paddle_manylinux_devel
镜像的编译以及使用方法。或者参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。
如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。
@@ -35,13 +35,11 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
# 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像
docker build -t paddle:dev .
# 3. 执行下面的命令编译CPU-Only的二进制
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步)
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
-注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。如果使用自行
-构建的镜像(上述第4步)会执行 :code:`Dockerfile` 描述的默认入口程序 :code:`build.sh` 可以省略步骤3中
-最后的执行脚本的命令。
+注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。
编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装:
@@ -72,15 +70,15 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test
如果期望执行其中一个单元测试,(比如 :code:`test_sum_op` ):
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
- bash /paddle/paddle/scripts/docker/build.sh
- cd /paddle/build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
+ ./paddle/scripts/paddle_build.sh build
+ cd build
ctest -R test_sum_op -V
.. _faq_docker:
@@ -116,11 +114,10 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行
- ```emacs
- (global-set-key "\C-cc" 'compile)
- (setq compile-command
- "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
- ```
+ .. code-block:: emacs
+
+ (global-set-key "\C-cc" 'compile)
+ (setq compile-command "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
就可以按 `Ctrl-C` 和 `c` 键来启动编译了。
diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst
index 0a6c33985ed65e24e507744c49cf929c9481195c..b06c43e19dcfc52ad0f074a85517a16744895a3a 100644
--- a/doc/v2/build_and_install/build_from_source_en.rst
+++ b/doc/v2/build_and_install/build_from_source_en.rst
@@ -23,7 +23,7 @@ You need to use Docker to build PaddlePaddle
to avoid installing dependencies by yourself. We have several pre-built
Docker images `here `_ ,
you can also find how to build and use paddle_manylinux_devel Docker image from
-`here `_
+`here `__
Or you can build your own image from source as the optional step below:
.. code-block:: bash
@@ -34,14 +34,12 @@ Or you can build your own image from source as the optional step below:
# 2. Optional: build development docker image from source
docker build -t paddle:dev .
# 3. Run the following command to build a CPU-Only binaries
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. Or, use your built Docker image to build PaddlePaddle (must run step 2)
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
NOTE: The above command try to mount the current working directory (root directory of source code)
-into :code:`/paddle` directory inside docker container. If you are using your own image
-(Step 4) it will run default entry-point :code:`build.sh` , so you could omit the last
-command in step 3.
+into :code:`/paddle` directory inside docker container.
When the compile finishes, you can get the output whl package under
build/python/dist, then you can choose to install the whl on local
@@ -74,21 +72,21 @@ Set :code:`WITH_GPU=ON` Can also run tests on GPU.
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/paddle/scripts/docker/build.sh
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test
If you wish to run only one unit test, like :code:`test_sum_op`:
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
- bash /paddle/paddle/scripts/docker/build.sh
- cd /paddle/build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
+ ./paddle/scripts/paddle_build.sh build
+ cd build
ctest -R test_sum_op -V
.. _faq_docker:
Frequently Asked Questions
-----------------
+---------------------------
- What is Docker?
@@ -118,11 +116,10 @@ Frequently Asked Questions
Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file:
- ```emacs
- (global-set-key "\C-cc" 'compile)
- (setq compile-command
- "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
- ```
+ .. code-block:: emacs
+
+ (global-set-key "\C-cc" 'compile)
+ (setq compile-command "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
so they could type `Ctrl-C` and `c` to build PaddlePaddle from source.
@@ -145,7 +142,7 @@ Frequently Asked Questions
.. _compile_deps:
Appendix: Compile Dependencies
-----------------
+-------------------------------
PaddlePaddle need the following dependencies when compiling, other dependencies
will be downloaded automatically.
@@ -166,11 +163,11 @@ will be downloaded automatically.
.. _build_options:
Appendix: Build Options
-----------------
+-------------------------
Build options include whether build binaries for CPU or GPU, which BLAS
library to use etc. You may pass these settings when running cmake.
-For detailed cmake tutorial please refer to `here `_ 。
+For detailed cmake tutorial please refer to `here `__ 。
You can add :code:`-D` argument to pass such options, like:
@@ -219,7 +216,7 @@ keep on with latest cuDNN versions. Be sure to run with the same version of cuDN
you built.
Pass Compile Options
-++++++++++++++
+++++++++++++++++++++++
You can pass compile options to use intended BLAS/CUDA/Cudnn libraries.
When running cmake command, it will search system paths like
diff --git a/doc/v2/build_and_install/docker_install_cn.rst b/doc/v2/build_and_install/docker_install_cn.rst
index 79d214635a069a739060e0b79424729f6ff90387..106c86bace075764c84bc2a7f7cb09d466fa8794 100644
--- a/doc/v2/build_and_install/docker_install_cn.rst
+++ b/doc/v2/build_and_install/docker_install_cn.rst
@@ -73,6 +73,7 @@
当然,您也可以进入到Docker容器中,以交互式的方式执行或调试您的代码:
.. code-block:: bash
+
docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash
cd /work
python train.py
@@ -97,7 +98,7 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note
国内用户可以使用下面的镜像源来加速访问:
- .. code-block: bash
+ .. code-block:: bash
docker run -p 8888:8888 docker.paddlepaddlehub.com/book
diff --git a/doc/v2/build_and_install/docker_install_en.rst b/doc/v2/build_and_install/docker_install_en.rst
index e0e0559fb858a093db96a9b4ec1c5a45d6c71a38..25aecb8d0da9feb00006da6259b529b7011d91cb 100644
--- a/doc/v2/build_and_install/docker_install_en.rst
+++ b/doc/v2/build_and_install/docker_install_en.rst
@@ -80,6 +80,7 @@ Also, you can go into the container shell, run or debug your code
interactively:
.. code-block:: bash
+
docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash
cd /work
python train.py
@@ -104,7 +105,7 @@ We provide a packaged book image, simply issue the command:
For users in China, we provide a faster mirror:
- .. code-block: bash
+ .. code-block:: bash
docker run -p 8888:8888 docker.paddlepaddlehub.com/book
diff --git a/doc/v2/build_and_install/index_cn.rst b/doc/v2/build_and_install/index_cn.rst
index e079bb661f3a5141a09dfbc6893d1bf945697bc9..1a9305ac4b6578c14a962f223c647a71e3b8a72b 100644
--- a/doc/v2/build_and_install/index_cn.rst
+++ b/doc/v2/build_and_install/index_cn.rst
@@ -6,7 +6,7 @@
PaddlePaddle针对不同的用户群体提供了多种安装方式。
专注深度学习模型开发
------------------
+--------------------
PaddlePaddle提供了多种python wheel包,可通过pip一键安装:
@@ -18,7 +18,7 @@ PaddlePaddle提供了多种python wheel包,可通过pip一键安装:
这是最便捷的安装方式,请根据机器配置和系统选择对应的安装包。
关注底层框架
-----------
+-------------
PaddlePaddle提供了基于Docker的安装方式,请参照以下教程:
@@ -45,7 +45,7 @@ PaddlePaddle提供了基于Docker的安装方式,请参照以下教程:
常见问题汇总
------------
+--------------
如果在安装过程中遇到了问题,请先尝试在下面的页面寻找答案:
diff --git a/doc/v2/build_and_install/index_en.rst b/doc/v2/build_and_install/index_en.rst
index 5b3de0f8c3e5496060646b5ddb080d0d338a8bfa..7990bacbd6966e88e8763e9c5709e410f7e9fed4 100644
--- a/doc/v2/build_and_install/index_en.rst
+++ b/doc/v2/build_and_install/index_en.rst
@@ -1,12 +1,12 @@
install and Compile
-==========
+======================
.. _install_steps:
PaddlePaddle provides various methods of installation for many different users
Focus on Deep Learning Model Development
------------------
+----------------------------------------
PaddlePaddle provides lots of packages of python wheel , that pip can install:
@@ -18,7 +18,7 @@ PaddlePaddle provides lots of packages of python wheel , that pip can install:
This is the most convenient way of installation. Please choose the right installation package with machine configure and system.
Follow the Bottom Frame
-----------
+------------------------
PaddlePaddle also supports installation using Docker. Please refer to the tutorial below:
diff --git a/doc/v2/build_and_install/pip_install_cn.rst b/doc/v2/build_and_install/pip_install_cn.rst
index 9b84bb6425af1eeb94a4f2f5d6c2b1e28c62e3c8..853bdb21bbcf07ae1742d2196dbcfe4668828b7b 100644
--- a/doc/v2/build_and_install/pip_install_cn.rst
+++ b/doc/v2/build_and_install/pip_install_cn.rst
@@ -55,11 +55,11 @@ paddlepaddle-gpu==0.11.0 使用CUDA 7.5和cuDNN 5编译的0.11.0版
:header: "版本说明", "cp27-cp27mu", "cp27-cp27m"
:widths: 1, 3, 3
- "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_"
+ "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__"
+ "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__"
+ "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_"
+ "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__"
+ "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__"
.. _pip_dependency:
diff --git a/doc/v2/build_and_install/pip_install_en.rst b/doc/v2/build_and_install/pip_install_en.rst
index fcac76d6a24eb4905a20f797d614db8f743342d7..fecf6d3712feac3265100a6121901ba784f7d5cc 100644
--- a/doc/v2/build_and_install/pip_install_en.rst
+++ b/doc/v2/build_and_install/pip_install_en.rst
@@ -58,11 +58,11 @@ If the links below shows up the login form, just click "Log in as guest" to star
:header: "version", "cp27-cp27mu", "cp27-cp27m"
:widths: 1, 3, 3
- "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_"
- "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_"
+ "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__"
+ "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__"
+ "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__"
+ "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__"
+ "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__"
.. _pip_dependency:
diff --git a/doc/v2/howto/capi/workflow_of_capi_cn.md b/doc/v2/howto/capi/workflow_of_capi_cn.md
index 1968c1099ac5734cd68b437f2f7aa428d7b5265e..3acdbae28e9b35f8a9104a89c9a5799f8c892334 100644
--- a/doc/v2/howto/capi/workflow_of_capi_cn.md
+++ b/doc/v2/howto/capi/workflow_of_capi_cn.md
@@ -59,7 +59,7 @@
代码示例如下:
```python
- from paddle.utils.merge_model import merge_v2_modelss
+ from paddle.utils.merge_model import merge_v2_model
from mnist_v2 import network
net = network(is_infer=True)
diff --git a/go/pserver/client/c/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt
index 411dc50332672143d7a1f7bd0556ae86dc37f6f3..4500b1f288372ed0e2d9d383234df97ae976c60b 100644
--- a/go/pserver/client/c/test/CMakeLists.txt
+++ b/go/pserver/client/c/test/CMakeLists.txt
@@ -13,4 +13,3 @@
# limitations under the License.
#
cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer)
-add_style_check_target(test_cclient test_cclient.c)
diff --git a/paddle/.gitignore b/paddle/.gitignore
index 1c1c0c2c829f088d7e3f52ca007fcb8f33a16a36..01904aa6ef2057afee95ddd6e30cde064b06c52e 100644
--- a/paddle/.gitignore
+++ b/paddle/.gitignore
@@ -11,7 +11,6 @@ GTAGS
*.pb.cc
*.pb.h
*_pb2.py
-paddle_*
output/
google/
Makefile
diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt
index e06e9a2b363d1ffc6876b98bcb7304b0a54dbcaa..957b1a3e6b07b058a76605992da387b43657146a 100644
--- a/paddle/capi/CMakeLists.txt
+++ b/paddle/capi/CMakeLists.txt
@@ -33,9 +33,6 @@ add_library(paddle_capi STATIC ${CAPI_HEADERS} ${CAPI_PRIVATE_HEADER}
target_include_directories(paddle_capi PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
-add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER}
- ${CAPI_PRIVATE_HEADER})
-
add_dependencies(paddle_capi paddle_proto paddle_gserver)
# TODO: paddle_capi_whole will be removed.
diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b19256ef4533a09162edf907f6cd51146517e46
--- /dev/null
+++ b/paddle/contrib/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(inference)
diff --git a/paddle/contrib/float16/README.md b/paddle/contrib/float16/README.md
index ded959c47cb81b9384abbb9815773e25969344ec..58b4a50666bfb622af8acbce29355f2a4a870a82 100644
--- a/paddle/contrib/float16/README.md
+++ b/paddle/contrib/float16/README.md
@@ -89,7 +89,7 @@ cd Paddle
# to `FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04` and similarly for other configurations
nvidia-docker build -t paddle:float16 .
# After running this, different results will be written to different log files in Paddle/contrib/float16/
-nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/contrib/float16/run_float16_demo.sh
+nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/paddle/contrib/float16/run_float16_demo.sh
```
#### Accuracy
diff --git a/paddle/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh
index d8a34ee67b8fab214fa6e96104304689211f84da..031225a85dabb26e5d9ea06f58909c049e7f0c08 100755
--- a/paddle/contrib/float16/run_float16_demo.sh
+++ b/paddle/contrib/float16/run_float16_demo.sh
@@ -3,7 +3,7 @@
BUILD_PATH=/paddle/fp16_build
WHEEL_PATH=$BUILD_PATH/python/dist
INFER_PATH=$BUILD_PATH/paddle/fluid/inference/tests/book
-DEMO_PATH=/paddle/contrib/float16
+DEMO_PATH=/paddle/paddle/contrib/float16
# Use the single most powerful CUDA GPU on your machine
export CUDA_VISIBLE_DEVICES=0
@@ -50,7 +50,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
- --data_set=imagenet \
--dirname=$DEMO_PATH/image_classification_imagenet_vgg.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_vgg.inference.model \
--repeat=$REPEAT \
@@ -68,7 +67,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_resnet \
- --data_set=imagenet \
--dirname=$DEMO_PATH/image_classification_imagenet_resnet.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_resnet.inference.model \
--repeat=$REPEAT \
@@ -86,7 +84,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
- --data_set=cifar10 \
--dirname=$DEMO_PATH/image_classification_cifar10_vgg.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_vgg.inference.model \
--repeat=$REPEAT \
@@ -104,7 +101,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
- --data_set=cifar10 \
--dirname=$DEMO_PATH/image_classification_cifar10_resnet.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_resnet.inference.model \
--repeat=$REPEAT \
diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4fe10f708e5bb8b28e34b2d91b2254c346c467f
--- /dev/null
+++ b/paddle/contrib/inference/CMakeLists.txt
@@ -0,0 +1,57 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST)
+ set(options "")
+ set(oneValueArgs "")
+ set(multiValueArgs ARGS)
+ cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)
+ set(arg_list "")
+ if(inference_test_ARGS)
+ foreach(arg ${inference_test_ARGS})
+ list(APPEND arg_list "_${arg}")
+ endforeach()
+ else()
+ list(APPEND arg_list "_")
+ endif()
+ foreach(arg ${arg_list})
+ string(REGEX REPLACE "^_$" "" arg "${arg}")
+ cc_test(${TARGET_NAME}
+ SRCS ${TEST_SRC}
+ DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl
+ ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
+ # set_tests_properties(${TARGET_NAME}
+ # PROPERTIES DEPENDS ${DEP_TEST})
+ endforeach()
+endfunction(inference_api_test)
+
+
+cc_library(paddle_inference_api
+ SRCS paddle_inference_api.cc
+ DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
+
+cc_library(paddle_inference_api_impl
+ SRCS paddle_inference_api_impl.cc
+ DEPS paddle_inference_api paddle_fluid_api)
+
+cc_test(test_paddle_inference_api
+ SRCS test_paddle_inference_api.cc
+ DEPS paddle_inference_api)
+
+inference_api_test(test_paddle_inference_api_impl
+ test_paddle_inference_api_impl.cc
+ test_word2vec)
diff --git a/paddle/contrib/inference/paddle_inference_api.cc b/paddle/contrib/inference/paddle_inference_api.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d67e1e7667800d6dd00cb8915b0d6dc7c664970b
--- /dev/null
+++ b/paddle/contrib/inference/paddle_inference_api.cc
@@ -0,0 +1,15 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/contrib/inference/paddle_inference_api.h"
diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h
index dbaa7c95b97e954537707566e5b7458e6afd14c8..9ac8ebdef8151f2a144b479fa258b8bc830fc2e9 100644
--- a/paddle/contrib/inference/paddle_inference_api.h
+++ b/paddle/contrib/inference/paddle_inference_api.h
@@ -12,49 +12,74 @@
See the License for the specific language governing permissions and
limitations under the License. */
+/*
+ * This file contains the definition of a simple Inference API for Paddle.
+ *
+ * ATTENTION: It requires some C++ features, for lower version C++ or C, we
+ * might release another API.
+ */
+
#pragma once
+#include
#include
#include
namespace paddle {
-class Predictor {
-public:
- struct Attr;
- Predictor() = default;
+enum PaddleDType {
+ FLOAT32,
+ INT64,
+};
- // Build the network before inference.
- bool Init(const Attr& attr);
+struct PaddleBuf {
+ void* data; // pointer to the data memory.
+ size_t length; // number of memory bytes.
+};
+
+struct PaddleTensor {
+ std::string name; // variable name.
+ std::vector shape;
+ PaddleBuf data; // blob of data.
+ PaddleDType dtype;
+};
+
+/*
+* A simple Inference API for Paddle. Currently this API might just be used by
+* non-sequence scenerios.
+* TODO(Superjomn) Prepare another API for NLP-related usages.
+*/
+class PaddlePredictor {
+public:
+ struct Config;
+ PaddlePredictor() = default;
+ PaddlePredictor(const PaddlePredictor&) = delete;
// Predict an record.
- // Arguments:
- // inputs: the name of the input variables.
- // outputs: the name of the output varaibles.
- // input_shapes: the shape of the input variables.
- // output_shapes: the shape of the output variables.
- // input_data: the data of the input variables.
- // output_data: the data of the output variables.
- bool Run(const std::vector& inputs,
- const std::vector& outputs,
- const std::vector>& input_shapes,
- const std::vector>& output_shapes,
- const std::vector>& input_data,
- std::vector>* output_data);
-
- // Clone a predictor that share the model weights.
- Predictor* Clone();
+ // The caller should be responsible for allocating and releasing the memory of
+ // `inputs`. `inputs` should be alive until Run returns. caller should be
+ // responsible for releasing the memory of `output_data`.
+ virtual bool Run(const std::vector& inputs,
+ std::vector* output_data) = 0;
+
+ // Clone a predictor that share the model weights, the Cloned predictor should
+ // be thread-safe.
+ virtual std::unique_ptr Clone() = 0;
// Destroy the Predictor.
- ~Predictor();
+ virtual ~PaddlePredictor() {}
- struct Attr {
+ friend std::unique_ptr CreatePaddlePredictor(
+ const PaddlePredictor::Config& config);
+
+ // The common configs for all the predictors.
+ struct Config {
enum class EngineKind;
std::string model_dir; // path to the model directory.
bool enable_engine{false}; // Enable to execute (part of) the model on
- // third-party engines.
- EngineKind engine_kind{Attr::EngineKind::kNone};
+ // third-party engines.
+ EngineKind engine_kind{Config::EngineKind::kNone};
enum class EngineKind {
kNone = -1, // Use the native Fluid facility.
@@ -66,4 +91,8 @@ public:
};
};
+// A factory to help create difference predictor.
+template
+std::unique_ptr CreatePaddlePredictor(const ConfigT& config);
+
} // namespace paddle
diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ecca16d3f82bbeee6858883a0f9e577a479f9d06
--- /dev/null
+++ b/paddle/contrib/inference/paddle_inference_api_impl.cc
@@ -0,0 +1,309 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include
+#include
+#include