diff --git a/CMakeLists.txt b/CMakeLists.txt
index 710b4774ca021c2e916460e7253d4fbf979a38cc..cfaab206e1f321a55119d4a8d65c4a99d3819fff 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,7 +57,10 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
+option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
+option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
+option(WITH_CONTRIB "Compile the third-party contributation" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
@@ -202,7 +205,7 @@ endif(USE_NNPACK)
add_subdirectory(proto)
-if(NOT MOBILE_INFERENCE)
+if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
@@ -230,3 +233,7 @@ if(WITH_DOC)
find_python_module(recommonmark REQUIRED)
add_subdirectory(doc)
endif()
+
+if (WITH_CONTRIB)
+ add_subdirectory(paddle/contrib)
+endif()
diff --git a/Dockerfile b/Dockerfile
index ea39efd00bb5c0a7deb3f6d57083d83a673b883c..e5508486d6df6a7465998b7e2926b21a1604dfb4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -101,6 +101,3 @@ RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
-
-# development image default do build work
-CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"]
diff --git a/Dockerfile.android b/Dockerfile.android
index 848a7eba6f1421432addae8acff407b611adb4ae..48db2efea21a648657e3f490c95429b9a29ede52 100644
--- a/Dockerfile.android
+++ b/Dockerfile.android
@@ -40,5 +40,3 @@ RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \
unzip -q android-ndk-r14b-linux-x86_64.zip && \
mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
rm -rf /opt/android-ndk-tmp
-
-CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"]
diff --git a/benchmark/cluster/README.md b/benchmark/cluster/README.md
deleted file mode 100644
index 64816098a524f064ec12474a736cd4c721227a70..0000000000000000000000000000000000000000
--- a/benchmark/cluster/README.md
+++ /dev/null
@@ -1,196 +0,0 @@
-# Cluster Training Benchmark
-
-## Setup
-
-- Platform
- - Kubernetes: v1.6.2
- - Linux Kernel: v3.10.0
-
-- Resource
- - CPU: 10 Cores per Pod
- - Memory: 5GB per Pod
-
-- Docker Image
-
- We use different base Docker Image to run the benchmark on Kubernetes:
- - PaddlePaddle v2: paddlepaddle/paddle:0.11.0
- - PaddlePaddle Fluid: paddlepaddle/paddle:[commit-id]
- - TensorFlow: tensorflow/tensorflow:1.5.0-rc0
-
-- Model
- vgg16 is used in this benchmark.
-
-## Cases
-
-- Variable
- - Batch Size of training data.
- - PServer count of the training job.
- - The number of trainers.
-
-- Invariant
- - The resource of trainer/pserver Pod.
-
-### Measure the Performance for Different Batch Size
-
-- PServer Count: 40
-- Trainer Count: 100
-- Metrics: mini-batch / sec
-
-
-
-
-
-Batch Size |
- 32 |
-64 |
-128 |
-256 |
-
-
-
-
- PaddlePaddle Fluid |
-- |
-- |
-- |
-- |
-
-
-PaddlePaddle v2 |
-- |
-- |
-- |
-- |
-
-
-TensorFlow |
-- |
-- |
-- |
-- |
-
-
-
-
-### Measure the Performance for Different PServer Count
-
-- Trainer Count: 100
-- Batch Size: 64
-- Metrics: mini-batch / sec
-
-
-
-
-
-PServer Count |
-10 |
-20 |
-40 |
-60 |
-
-
-
-
- PaddlePaddle Fluid |
-- |
-- |
-- |
-- |
-
-
-PaddlePaddle v2 |
-- |
-- |
-- |
-- |
-
-
-TensorFlow |
-- |
-- |
-- |
-- |
-
-
-
-
-### Measure Parallel Efficiency By Increasing Trainer Count
-
-- PServer Count: 20
-- Batch Size: 64
-- Metrics:
-
-$S = \div(T1, TN)$
-
-which S is the ratio of T1 over TN, training time of 1 and N trainers.
-The parallel efficiency is:
-
-$E = \div(S, N)$
-
-
-
-
-Trainer Counter |
-1 |
-10 |
-20 |
-30 |
-40 |
-50 |
-60 |
-70 |
-80 |
-90 |
-100 |
-
-
-
-
- PaddlePaddle Fluid |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-
-
-PaddlePaddle v2 |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-
-
-TensorFlow |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-- |
-
-
-
-
-
-## Reproduce the benchmark
-
-TODO
diff --git a/benchmark/cluster/vgg16/Dockerfile b/benchmark/cluster/vgg16/Dockerfile
deleted file mode 100644
index 13ad8e1b6237e6f41a076c4fb54311728832ae33..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/Dockerfile
+++ /dev/null
@@ -1,35 +0,0 @@
-FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04
-
-# you can get mirror list here:
-# https://launchpad.net/ubuntu/+archivemirrors
-ARG UBUNTU_MIRROR
-RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
-
-RUN apt-get update && apt-get install -y python python-dev python-pip iputils-ping libgtk2.0-dev
-RUN pip install -U kubernetes opencv-python
-
-RUN pip install paddlepaddle
-# if network is slowly, you may need to add proxy here.
-# ENV https_proxy=
-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python'
-RUN pip uninstall -y paddlepaddle
-# unset proxy if it is setted.
-# ENV https_proxy=""
-
-# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF,
-# so we must build one with distribute support to install in this image.
-ADD *.whl /
-RUN pip install /*.whl && rm -f /*.whl
-ENV LD_LIBRARY_PATH=/usr/local/lib
-
-# tf k8s
-RUN pip install tensorflow==1.4.0
-ADD tf_k8s /usr/bin
-RUN chmod +x /usr/bin/tf_k8s
-ADD vgg16_tf.py /workspace/
-
-# below lines may change a lot for debugging
-ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
-ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
-RUN chmod +x /usr/bin/paddle_k8s
-ADD vgg16_fluid.py vgg16_v2.py /workspace/
diff --git a/benchmark/cluster/vgg16/README.md b/benchmark/cluster/vgg16/README.md
deleted file mode 100644
index d56a912b9b03986e32693363f82df05a34b779e9..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/README.md
+++ /dev/null
@@ -1,195 +0,0 @@
-# Performance for Distributed vgg16
-
-## Test Result
-
-### Hardware Infomation
-
-- CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
-- cpu MHz : 2101.000
-- cache size : 20480 KB
-
-### Blas settings
-
-Setting environment variable: `MKL_NUM_THREADS=1`.
-
-### Single Node Single Thread
-
-- Metrics: samples / sec
-
-
-
-
-Batch Size |
- 32 |
-64 |
-128 |
-256 |
-
-
-
-
- PaddlePaddle Fluid |
- 15.44 |
- 16.32 |
- 16.74 |
- 16.79 |
-
-
-PaddlePaddle v2 |
- 15.97 |
- 17.04 |
- 17.60 |
- 17.83 |
-
-
-TensorFlow |
- 9.09 |
- 9.10 |
- 9.24 |
- 8.66 |
-
-
-
-
-
-### Different Batch Size
-
-- PServer Count: 10
-- Trainer Count: 20
-- Metrics: samples / sec
-
-
-
-
-Batch Size |
- 32 |
-64 |
-128 |
-256 |
-
-
-
-
- PaddlePaddle Fluid |
- 190.20 |
- 222.15 |
- 247.40 |
- 258.18 |
-
-
-PaddlePaddle v2 |
- 170.96 |
- 233.71 |
- 256.14 |
- 329.23 |
-
-
-TensorFlow |
- - |
- - |
- - |
- - |
-
-
-
-
-### Accelerate Rate
-
-- Pserver Count: 20
-- Batch Size: 128
-- Metrics: samples / sec
-
-
-
-
-Trainer Count |
-20 |
-40 |
-80 |
-100 |
-
-
-
-
- PaddlePaddle Fluid |
- 263.29 (78.64%) |
- 518.80 (77.47%) |
- 836.26 (62.44%) |
- 1019.29 (60.89%) |
-
-
-PaddlePaddle v2 (need more tests) |
- 326.85 (92.85%) |
- 534.58 (75.93%) |
- 853.30 (60.60%) |
- 1041.99 (59.20%) |
-
-
-TensorFlow |
- - |
- - |
- - |
- - |
-
-
-
-
-
-### Different Pserver Count
-
-- Trainer Count: 60
-- Batch Size: 128
-- Metrics: samples/ sec
-
-
-
-
-PServer Count |
-3 |
-6 |
-10 |
-20 |
-
-
-
-
- PaddlePaddle Fluid(should fix in next PR) |
- 589.1 |
- 592.6 |
- 656.4 |
- 655.8 |
-
-
-PaddlePaddle v2 (need more tests) |
- 593.4 |
- 791.3 |
- 729.7 |
- 821.7 |
-
-
-TensorFlow |
- - |
- - |
- - |
- - |
-
-
-
-
-
-*The performance gap between Fuild and v2 comes from the network interference.*
-
-
-## Steps to Run the Performance Test
-
-1. You must re-compile PaddlePaddle and enable `-DWITH_DISTRIBUTE` to build PaddlePaddle with distributed support.
-1. When the build finishes, copy the output `whl` package located under `build/python/dist` to current directory.
-1. Run `docker build -t [image:tag] .` to build the docker image and run `docker push [image:tag]` to push the image to reponsitory so kubernetes can find it.
-1. Run `kubectl create -f pserver.yaml && kubectl create -f trainer.yaml` to start the job on your kubernetes cluster (you must configure the `kubectl` client before this step).
-1. Run `kubectl get po` to get running pods, and run `kubectl logs [podID]` to fetch the pod log of pservers and trainers.
-
-Check the logs for the distributed training progress and analyze the performance.
-
-## Enable Verbos Logs
-
-Edit `pserver.yaml` and `trainer.yaml` and add an environment variable `GLOG_v=3` and `GLOG_logtostderr=1` to see what happend in detail.
diff --git a/benchmark/cluster/vgg16/fluid_pserver.yaml b/benchmark/cluster/vgg16/fluid_pserver.yaml
deleted file mode 100644
index ee8b0763b62fc011f40f6197e929a68b48a93e47..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/fluid_pserver.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-apiVersion: extensions/v1beta1
-kind: ReplicaSet
-metadata:
- name: vgg16job-pserver
-spec:
- replicas: 10
- template:
- metadata:
- labels:
- paddle-job-pserver: vgg16job
- spec:
- hostNetwork: true
- imagePullSecrets:
- - name: job-registry-secret
- containers:
- - name: pserver
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16job
- - name: MKL_NUM_THREADS
- value: "1"
- - name: TRAINING_ROLE
- value: "PSERVER"
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "1"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- command: ["paddle_k8s", "start_fluid"]
- resources:
- requests:
- memory: 10Gi
- cpu: 4
- limits:
- memory: 10Gi
- cpu: 4
diff --git a/benchmark/cluster/vgg16/fluid_trainer.yaml b/benchmark/cluster/vgg16/fluid_trainer.yaml
deleted file mode 100644
index 3d56caac009464d1073423bb63abff1f8b0cf28f..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/fluid_trainer.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: vgg16job-trainer
-spec:
- parallelism: 20
- completions: 20
- template:
- metadata:
- labels:
- paddle-job: vgg16job
- spec:
- imagePullSecrets:
- - name: job-registry-secret
- hostNetwork: true
- containers:
- - name: trainer
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- command: ["paddle_k8s", "start_fluid"]
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16job
- - name: TRAINING_ROLE
- value: "TRAINER"
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0 --batch_size 128"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "1"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- resources:
- requests:
- memory: 40Gi
- cpu: 2
- limits:
- memory: 40Gi
- cpu: 2
- restartPolicy: Never
diff --git a/benchmark/cluster/vgg16/run_vgg_dist.sh b/benchmark/cluster/vgg16/run_vgg_dist.sh
deleted file mode 100644
index 8c0501439e9d5fa175f5aa9b62d286e690a10904..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/run_vgg_dist.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-
-# Update to point to the source file.
-VGG_SRC="vgg16_fluid.py"
-
-export TRAINING_ROLE=PSERVER
-export TRAINERS=2
-export POD_IP=127.0.0.1
-export PADDLE_INIT_PORT=6174
-MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 &
-
-# Need to wait for the ps to start first.
-sleep 10
-echo "done start ps"
-
-export TRAINING_ROLE=TRAINER
-export TRAINERS=2
-export POD_IP=127.0.0.1
-export PADDLE_INIT_PORT=6174
-CUDA_VISIBLE_DEVICES=4 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=0 &
-CUDA_VISIBLE_DEVICES=5 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=1 &
diff --git a/benchmark/cluster/vgg16/tf_k8s b/benchmark/cluster/vgg16/tf_k8s
deleted file mode 100644
index 4fc263d5f681aeabfa71f1758714d269d987b272..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/tf_k8s
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/bin/bash
-check_trainer_ret() {
- ret=$1
- stdbuf -oL echo "job returned $ret...setting pod return message..."
- stdbuf -oL echo "==============================="
-
- if [ $ret -eq 136 ] ; then
- echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log
- elif [ $ret -eq 139 ] ; then
- echo "Segmentation Fault" > /dev/termination-log
- elif [ $ret -eq 1 ] ; then
- echo "General Error" > /dev/termination-log
- elif [ $ret -eq 134 ] ; then
- echo "Program Abort" > /dev/termination-log
- fi
- stdbuf -oL echo "termination log wroted..."
- exit $ret
-}
-
-g_pservers=""
-g_trainers=""
-
-wait_running_pods(){
- pserver_label="tf-job-pserver=${JOB_NAME}"
- trainer_label="tf-job-trainer=${JOB_NAME}"
-
- stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS_NUM}
- stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS_NUM}
-
- g_pservers=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PORT})
- g_trainers=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PORT})
-}
-
-start_tf_pserver(){
- wait_running_pods
-
- label="tf-job-pserver=${JOB_NAME}"
- pserver_id=$(python /root/k8s_tools.py fetch_id ${label})
-
- cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
- --job_name=${TF_JOB_NAME} --task_index=${pserver_id}"
-
- stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
-}
-
-start_tf_trainer(){
- wait_running_pods
-
- label="tf-job-trainer=${JOB_NAME}"
- trainer_id=$(python /root/k8s_tools.py fetch_id ${label})
-
- cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
- --job_name=${TF_JOB_NAME} --task_index=${trainer_id} --batch_size=${BATCH_SIZE}"
-
- stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
- check_trainer_ret $?
-}
-
-start_tf(){
- if [[ "${TF_JOB_NAME}" == "worker" ]]; then
- start_tf_trainer
- else
- start_tf_pserver
- fi
-}
-
-usage() {
- echo "usage: tf_k8s []:"
- echo " start_tf Start tensorflow jobs"
-}
-
-case "$1" in
- start_tf)
- start_tf
- ;;
- --help)
- usage
- ;;
- *)
- usage
- ;;
-esac
diff --git a/benchmark/cluster/vgg16/tf_pserver.yaml b/benchmark/cluster/vgg16/tf_pserver.yaml
deleted file mode 100644
index 5e37c700819119c8af05c40fe4b8d13911efc3e1..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/tf_pserver.yaml
+++ /dev/null
@@ -1,56 +0,0 @@
-apiVersion: extensions/v1beta1
-kind: ReplicaSet
-metadata:
- name: vgg16job-tf-pserver
-spec:
- replicas: 10
- template:
- metadata:
- labels:
- tf-job-pserver: vgg16job-tf
- spec:
- hostNetwork: true
- imagePullSecrets:
- - name: job-registry-secret
- containers:
- - name: pserver
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
- imagePullPolicy: Always
- command: ["tf_k8s", "start_tf"]
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PORT
- value: "32036"
- - name: ENTRY
- value: "python vgg16_tf.py"
- - name: JOB_NAME
- value: vgg16job-tf
- - name: PSERVERS_NUM
- value: "10"
- - name: TF_JOB_NAME
- value: "ps"
- - name: TRAINERS_NUM
- value: "20"
- - name: BATCH_SIZE
- value: "128"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: NUM_PASSES
- value: "1"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- resources:
- requests:
- memory: 10Gi
- cpu: 4
- limits:
- memory: 10Gi
- cpu: 4
diff --git a/benchmark/cluster/vgg16/tf_trainer.yaml b/benchmark/cluster/vgg16/tf_trainer.yaml
deleted file mode 100644
index 08795df3addfa7b618db24a65e57be190e268f06..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/tf_trainer.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: vgg16job-tf-trainer
-spec:
- parallelism: 20
- completions: 20
- template:
- metadata:
- labels:
- tf-job-trainer: vgg16job-tf
- spec:
- imagePullSecrets:
- - name: job-registry-secret
- hostNetwork: true
- containers:
- - name: trainer
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
- imagePullPolicy: Always
- command: ["tf_k8s", "start_tf"]
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PORT
- value: "32036"
- - name: JOB_NAME
- value: vgg16job-tf
- - name: TF_JOB_NAME
- value: "worker"
- - name: ENTRY
- value: "python vgg16_tf.py"
- - name: PSERVERS_NUM
- value: "10"
- - name: BATCH_SIZE
- value: "128"
- - name: TRAINERS_NUM
- value: "20"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: NUM_PASSES
- value: "1"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- - name: POD_IP
- valueFrom:
- fieldRef:
- fieldPath: "status.podIP"
- resources:
- requests:
- memory: 40Gi
- cpu: 2
- limits:
- memory: 40Gi
- cpu: 2
- restartPolicy: Never
diff --git a/benchmark/cluster/vgg16/v2_pserver.yaml b/benchmark/cluster/vgg16/v2_pserver.yaml
deleted file mode 100644
index dd1271e0cf399184134c06b3200ee1202c65cef0..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/v2_pserver.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-apiVersion: extensions/v1beta1
-kind: ReplicaSet
-metadata:
- name: vgg16v2job-pserver
-spec:
- replicas: 10
- template:
- metadata:
- labels:
- paddle-job-pserver: vgg16v2job
- spec:
- hostNetwork: true
- imagePullSecrets:
- - name: job-registry-secret
- containers:
- - name: pserver
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- ports:
- - name: jobport-30236
- containerPort: 30236
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16v2job
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "python train.py"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "1"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- command: ["paddle_k8s", "start_pserver"]
- resources:
- requests:
- memory: 10Gi
- cpu: 4
- limits:
- memory: 10Gi
- cpu: 4
diff --git a/benchmark/cluster/vgg16/v2_trainer.yaml b/benchmark/cluster/vgg16/v2_trainer.yaml
deleted file mode 100644
index 12c8964066cbcfe8d2a44de2f51a3d12ea422fe2..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/v2_trainer.yaml
+++ /dev/null
@@ -1,65 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
- name: vgg16v2job-trainer
-spec:
- parallelism: 20
- completions: 20
- template:
- metadata:
- labels:
- paddle-job: vgg16v2job
- spec:
- imagePullSecrets:
- - name: job-registry-secret
- hostNetwork: true
- containers:
- - name: trainer
- image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
- imagePullPolicy: Always
- command: ["paddle_k8s", "start_trainer", "v2"]
- env:
- - name: PADDLE_JOB_NAME
- value: vgg16v2job
- - name: BATCH_SIZE
- value: "256"
- - name: TRAINERS
- value: "20"
- - name: PSERVERS
- value: "10"
- - name: TOPOLOGY
- value: ""
- - name: ENTRY
- value: "cd /workspace && MKL_NUM_THREADS=1 python /workspace/vgg16_v2.py"
- - name: TRAINER_PACKAGE
- value: "/workspace"
- - name: PADDLE_INIT_PORT
- value: "30236"
- - name: PADDLE_INIT_NICS
- value: "xgbe0"
- - name: PADDLE_INIT_TRAINER_COUNT
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM
- value: "1"
- - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
- value: "1"
- - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
- value: "20"
- - name: PADDLE_INIT_NUM_PASSES
- value: "2"
- - name: PADDLE_INIT_USE_GPU
- value: "0"
- - name: LD_LIBRARY_PATH
- value: "/usr/local/lib:/usr/local/nvidia/lib64"
- - name: NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: "metadata.namespace"
- resources:
- requests:
- memory: 40Gi
- cpu: 2
- limits:
- memory: 40Gi
- cpu: 2
- restartPolicy: Never
diff --git a/benchmark/cluster/vgg16/vgg16_fluid.py b/benchmark/cluster/vgg16/vgg16_fluid.py
deleted file mode 100644
index e9360ab4c79d23bdf9f84d0c0d407af6d39bde3e..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/vgg16_fluid.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""VGG16 benchmark in Fluid"""
-from __future__ import print_function
-
-import sys
-import time
-import numpy as np
-import paddle.v2 as paddle
-import paddle.fluid as fluid
-import paddle.fluid.core as core
-import paddle.fluid.profiler as profiler
-import argparse
-import functools
-import os
-from paddle.fluid import debuger
-
-
-def str2bool(v):
- if v.lower() in ('yes', 'true', 't', 'y', '1'):
- return True
- elif v.lower() in ('no', 'false', 'f', 'n', '0'):
- return False
- else:
- raise argparse.ArgumentTypeError('Boolean value expected.')
-
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument(
- '--batch_size', type=int, default=16, help="Batch size for training.")
-parser.add_argument(
- '--learning_rate',
- type=float,
- default=1e-3,
- help="Learning rate for training.")
-parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.")
-parser.add_argument(
- '--device',
- type=str,
- default='CPU',
- choices=['CPU', 'GPU'],
- help="The device type.")
-parser.add_argument('--device_id', type=int, default=0, help="The device id.")
-parser.add_argument(
- '--data_format',
- type=str,
- default='NCHW',
- choices=['NCHW', 'NHWC'],
- help='The data order, now only support NCHW.')
-parser.add_argument(
- '--data_set',
- type=str,
- default='flowers',
- choices=['cifar10', 'flowers'],
- help='Optional dataset for benchmark.')
-parser.add_argument(
- '--local',
- type=str2bool,
- default=True,
- help='Whether to run as local mode.')
-
-parser.add_argument(
- "--ps_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--trainer_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--profile", action='store_true', help="If set, profile a few steps.")
-
-# Flags for defining the tf.train.Server
-parser.add_argument(
- "--task_index", type=int, default=0, help="Index of task within the job")
-args = parser.parse_args()
-
-
-def vgg16_bn_drop(input):
- def conv_block(input, num_filter, groups, dropouts):
- return fluid.nets.img_conv_group(
- input=input,
- pool_size=2,
- pool_stride=2,
- conv_num_filter=[num_filter] * groups,
- conv_filter_size=3,
- conv_act='relu',
- conv_with_batchnorm=True,
- conv_batchnorm_drop_rate=dropouts,
- pool_type='max')
-
- conv1 = conv_block(input, 64, 2, [0.3, 0])
- conv2 = conv_block(conv1, 128, 2, [0.4, 0])
- conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
- conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
- conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-
- drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
- fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
- bn = fluid.layers.batch_norm(input=fc1, act='relu')
- drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
- fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
- return fc2
-
-
-def main():
- if args.data_set == "cifar10":
- classdim = 10
- if args.data_format == 'NCHW':
- data_shape = [3, 32, 32]
- else:
- data_shape = [32, 32, 3]
- else:
- classdim = 102
- if args.data_format == 'NCHW':
- data_shape = [3, 224, 224]
- else:
- data_shape = [224, 224, 3]
-
- # Input data
- images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
- label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-
- # Train program
- net = vgg16_bn_drop(images)
- predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
- cost = fluid.layers.cross_entropy(input=predict, label=label)
- avg_cost = fluid.layers.mean(x=cost)
-
- # Evaluator
- batch_size = fluid.layers.create_tensor(dtype='int64')
- batch_acc = fluid.layers.accuracy(
- input=predict, label=label, total=batch_size)
-
- # inference program
- inference_program = fluid.default_main_program().clone()
- with fluid.program_guard(inference_program):
- inference_program = fluid.io.get_inference_program(batch_acc)
-
- # Optimization
- optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
- optimize_ops, params_grads = optimizer.minimize(avg_cost)
-
- # Initialize executor
- place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(
- args.device_id)
- exe = fluid.Executor(place)
-
- # test
- def test(exe):
- test_pass_acc = fluid.average.WeightedAverage()
- for batch_id, data in enumerate(test_reader()):
- img_data = np.array(map(lambda x: x[0].reshape(data_shape),
- data)).astype("float32")
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
- y_data = y_data.reshape([-1, 1])
-
- outs = exe.run(inference_program,
- feed={"pixel": img_data,
- "label": y_data},
- fetch_list=[batch_acc, batch_size])
- test_pass_acc.add(value=np.array(outs[0]), weight=np.array(outs[1]))
-
- return test_pass_acc.eval()
-
- def train_loop(exe, trainer_prog):
- iters = 0
- ts = time.time()
- train_pass_acc = fluid.average.WeightedAverage()
- for pass_id in range(args.num_passes):
- # train
- start_time = time.time()
- num_samples = 0
- train_pass_acc.reset()
-
- def run_step(batch_id, data):
- img_data = np.array(
- map(lambda x: x[0].reshape(data_shape), data)).astype(
- "float32")
- y_data = np.array(map(lambda x: x[1], data)).astype("int64")
- y_data = y_data.reshape([-1, 1])
-
- loss, acc, b_size = exe.run(
- trainer_prog,
- feed={"pixel": img_data,
- "label": y_data},
- fetch_list=[avg_cost, batch_acc, batch_size])
- return loss, acc, b_size
-
- if args.profile:
- with profiler.profiler('All', 'total',
- '/tmp/profile_vgg_%d' % args.task_index):
- for batch_id, data in enumerate(train_reader()):
- if batch_id > 5: break
- run_step(batch_id, data)
-
- total_time = 0.0
- count = 0
- for batch_id, data in enumerate(train_reader()):
- ts = time.time()
- loss, acc, b_size = run_step(batch_id, data)
- iters += 1
- num_samples += len(data)
- train_pass_acc.add(value=acc, weight=b_size)
-
- duration = time.time() - ts
- total_time += duration
- count += len(data)
- print(
- "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, "
- "Speed = %.2f (%.2f) img/s" % (pass_id, iters, loss, acc,
- len(data) / duration,
- count / total_time)
- ) # The accuracy is the accumulation of batches, but not the current batch.
-
- pass_elapsed = time.time() - start_time
- pass_train_acc = train_pass_acc.eval()
- pass_test_acc = test(exe)
- print("Task:%d Pass = %d, Training performance = %f imgs/s, "
- "Train accuracy = %f, Test accuracy = %f\n" %
- (args.task_index, pass_id, num_samples / pass_elapsed,
- pass_train_acc, pass_test_acc))
-
- if args.local:
- # Parameter initialization
- exe.run(fluid.default_startup_program())
-
- # data reader
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.train10() if args.data_set == 'cifar10'
- else paddle.dataset.flowers.train(),
- buf_size=5120),
- batch_size=args.batch_size)
- test_reader = paddle.batch(
- paddle.dataset.cifar.test10()
- if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
- batch_size=args.batch_size)
- train_loop(exe, fluid.default_main_program())
- else:
- trainers = int(os.getenv("TRAINERS")) # total trainer count
- print("trainers total: ", trainers)
-
- training_role = os.getenv(
- "TRAINING_ROLE",
- "TRAINER") # get the training role: trainer/pserver
-
- t = fluid.DistributeTranspiler()
- t.transpile(
- trainer_id=args.task_index,
- pservers=args.ps_hosts,
- trainers=trainers)
-
- if training_role == "PSERVER":
- current_endpoint = os.getenv("POD_IP") + ":" + os.getenv(
- "PADDLE_INIT_PORT")
- if not current_endpoint:
- print("need env SERVER_ENDPOINT")
- exit(1)
- pserver_prog = t.get_pserver_program(current_endpoint)
- pserver_startup = t.get_startup_program(current_endpoint,
- pserver_prog)
- exe.run(pserver_startup)
- exe.run(pserver_prog)
- elif training_role == "TRAINER":
- # Parameter initialization
- exe.run(fluid.default_startup_program())
-
- # data reader
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.train10() if args.data_set == 'cifar10'
- else paddle.dataset.flowers.train(),
- buf_size=5120),
- batch_size=args.batch_size)
- test_reader = paddle.batch(
- paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else
- paddle.dataset.flowers.test(),
- batch_size=args.batch_size)
-
- trainer_prog = t.get_trainer_program()
- feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
- # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver
- exe.run(fluid.default_startup_program())
- train_loop(exe, trainer_prog)
- else:
- print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
-
-
-def print_arguments():
- print('----------- Configuration Arguments -----------')
- for arg, value in sorted(vars(args).iteritems()):
- print('%s: %s' % (arg, value))
- print('------------------------------------------------')
-
-
-if __name__ == "__main__":
- print_arguments()
- main()
diff --git a/benchmark/cluster/vgg16/vgg16_tf.py b/benchmark/cluster/vgg16/vgg16_tf.py
deleted file mode 100644
index 2d220478acae46566760209dbc012cff316946aa..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/vgg16_tf.py
+++ /dev/null
@@ -1,366 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""VGG16 benchmark in TensorFlow
-You can get distribution example template structure here:
-https://medium.com/clusterone/how-to-write-distributed-tensorflow-code-with-an-example-on-tensorport-70bf3306adcb
-https://www.tensorflow.org/deploy/distributed
-"""
-
-import tensorflow as tf
-import paddle.v2 as paddle
-import numpy as np
-import argparse
-import time
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument(
- '--batch_size', type=int, default=128, help="Batch size for training.")
-parser.add_argument(
- '--learning_rate',
- type=float,
- default=1e-3,
- help="Learning rate for training.")
-parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.")
-parser.add_argument(
- '--device',
- type=str,
- default='CPU',
- choices=['CPU', 'GPU'],
- help="The device type.")
-parser.add_argument(
- '--data_format',
- type=str,
- default='NHWC',
- choices=['NCHW', 'NHWC'],
- help='The data order, NCHW=[batch, channels, height, width].'
- 'Only support NHWC right now.')
-parser.add_argument(
- '--data_set',
- type=str,
- default='cifar10',
- choices=['cifar10', 'flowers'],
- help='Optional dataset for benchmark.')
-
-parser.add_argument(
- "--ps_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--worker_hosts",
- type=str,
- default="",
- help="Comma-separated list of hostname:port pairs")
-parser.add_argument(
- "--job_name", type=str, default="", help="One of 'worker', 'ps'")
-# Flags for defining the tf.train.Server
-parser.add_argument(
- "--task_index", type=int, default=0, help="Index of task within the job")
-
-args = parser.parse_args()
-
-
-class VGG16Model(object):
- def __init__(self):
- self.parameters = []
-
- def batch_norm_relu(self, inputs, is_training):
- """Performs a batch normalization followed by a ReLU."""
- # We set fused=True for a significant speed boost. See
- # https://www.tensorflow.org/speed/speed_guide#common_fused_ops
- inputs = tf.layers.batch_normalization(
- inputs=inputs,
- axis=1 if args.data_format == 'NCHW' else -1,
- momentum=0.9,
- epsilon=1e-05,
- center=True,
- scale=True,
- training=is_training,
- fused=True)
- inputs = tf.nn.relu(inputs)
- return inputs
-
- def conv_bn_layer(self,
- name,
- images,
- kernel_shape,
- is_training,
- drop_rate=0.0):
- with tf.name_scope(name) as scope:
- kernel = tf.Variable(
- tf.truncated_normal(
- kernel_shape, dtype=tf.float32, stddev=1e-1),
- name='weights')
- conv = tf.nn.conv2d(
- images,
- kernel, [1, 1, 1, 1],
- data_format=args.data_format,
- padding='SAME')
- biases = tf.Variable(
- tf.constant(
- 0.0, shape=[kernel_shape[-1]], dtype=tf.float32),
- trainable=True,
- name='biases')
- out = tf.nn.bias_add(conv, biases)
- out = self.batch_norm_relu(out, is_training)
- out = tf.layers.dropout(out, rate=drop_rate, training=is_training)
- return out
-
- def fc_layer(self, name, inputs, shape):
- with tf.name_scope(name) as scope:
- fc_w = tf.Variable(
- tf.truncated_normal(
- shape, dtype=tf.float32, stddev=1e-1),
- name='weights')
- fc_b = tf.Variable(
- tf.constant(
- 0.0, shape=[shape[-1]], dtype=tf.float32),
- trainable=True,
- name='biases')
- out = tf.nn.bias_add(tf.matmul(inputs, fc_w), fc_b)
- return out
-
- def network(self, images, class_dim, is_training):
- """ VGG16 model structure.
-
- TODO(kuke): enable this network to support the 'NCHW' data format
- """
-
- # conv1
- conv1_1 = self.conv_bn_layer(
- 'conv1_1', images, [3, 3, 3, 64], is_training, drop_rate=0.3)
- conv1_2 = self.conv_bn_layer(
- 'conv1_2', conv1_1, [3, 3, 64, 64], is_training, drop_rate=0.0)
- # pool1
- pool1 = tf.nn.max_pool(
- conv1_2,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool1')
- # conv2
- conv2_1 = self.conv_bn_layer(
- 'conv2_1', pool1, [3, 3, 64, 128], is_training, drop_rate=0.4)
- conv2_2 = self.conv_bn_layer(
- 'conv2_2', conv2_1, [3, 3, 128, 128], is_training, drop_rate=0.0)
- # pool2
- pool2 = tf.nn.max_pool(
- conv2_2,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool2')
- # conv3
- conv3_1 = self.conv_bn_layer(
- 'conv3_1', pool2, [3, 3, 128, 256], is_training, drop_rate=0.4)
- conv3_2 = self.conv_bn_layer(
- 'conv3_2', conv3_1, [3, 3, 256, 256], is_training, drop_rate=0.4)
- conv3_3 = self.conv_bn_layer(
- 'conv3_3', conv3_2, [3, 3, 256, 256], is_training, drop_rate=0.0)
- # pool3
- pool3 = tf.nn.max_pool(
- conv3_3,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool3')
- # conv4
- conv4_1 = self.conv_bn_layer(
- 'conv4_1', pool3, [3, 3, 256, 512], is_training, drop_rate=0.4)
- conv4_2 = self.conv_bn_layer(
- 'conv4_2', conv4_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
- conv4_3 = self.conv_bn_layer(
- 'conv4_3', conv4_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
- # pool4
- pool4 = tf.nn.max_pool(
- conv4_3,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool4')
- # conv5
- conv5_1 = self.conv_bn_layer(
- 'conv5_1', pool4, [3, 3, 512, 512], is_training, drop_rate=0.4)
- conv5_2 = self.conv_bn_layer(
- 'conv5_2', conv5_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
- conv5_3 = self.conv_bn_layer(
- 'conv5_3', conv5_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
- # pool5
- pool5 = tf.nn.max_pool(
- conv5_3,
- ksize=[1, 2, 2, 1],
- strides=[1, 2, 2, 1],
- padding='SAME',
- name='pool4')
- # flatten
- shape = int(np.prod(pool5.get_shape()[1:]))
- pool5_flat = tf.reshape(pool5, [-1, shape])
- # fc1
- drop = tf.layers.dropout(pool5_flat, rate=0.5, training=is_training)
- fc1 = self.fc_layer('fc1', drop, [shape, 512])
- # fc2
- bn = self.batch_norm_relu(fc1, is_training)
- drop = tf.layers.dropout(bn, rate=0.5, training=is_training)
- fc2 = self.fc_layer('fc2', drop, [512, 512])
-
- fc3 = self.fc_layer('fc3', fc2, [512, class_dim])
-
- return fc3
-
-
-def run_benchmark(cluster_spec, server):
- """Run benchmark on cifar10 or flowers."""
-
- if args.data_set == "cifar10":
- class_dim = 10
- raw_shape = (3, 32, 32)
- dat_shape = (None, 32, 32, 3) if args.data_format == 'NHWC' else (
- None, 3, 32, 32)
- else:
- class_dim = 102
- raw_shape = (3, 224, 224)
- dat_shape = (None, 224, 224, 3) if args.data_format == 'NHWC' else (
- None, 3, 224, 224)
-
- device = tf.train.replica_device_setter(
- worker_device="/job:worker/task:{}".format(args.task_index),
- cluster=cluster_spec)
-
- with tf.device(device):
- images = tf.placeholder(tf.float32, shape=dat_shape)
- labels = tf.placeholder(tf.int64, shape=(None, ))
- is_training = tf.placeholder('bool')
- onehot_labels = tf.one_hot(labels, depth=class_dim)
-
- vgg16 = VGG16Model()
- logits = vgg16.network(images, class_dim, is_training)
- loss = tf.losses.softmax_cross_entropy(
- onehot_labels=onehot_labels, logits=logits)
- avg_loss = tf.reduce_mean(loss)
-
- correct = tf.equal(tf.argmax(logits, 1), labels)
- accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
-
- optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
- update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- with tf.control_dependencies(update_ops):
- train_op = optimizer.minimize(avg_loss, global_step=global_step)
-
- summary_op = tf.summary.merge_all()
- init_op = tf.global_variables_initializer()
-
- # data reader
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.train10()
- if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
- buf_size=5120),
- batch_size=args.batch_size)
- test_reader = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.cifar.test10()
- if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
- buf_size=5120),
- batch_size=args.batch_size)
-
- # test
- def test():
- test_accs = []
- for batch_id, data in enumerate(test_reader()):
- test_images = np.array(
- map(lambda x: np.transpose(x[0].reshape(raw_shape),
- axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
- test_labels = np.array(map(lambda x: x[1], data)).astype('int64')
- test_accs.append(
- accuracy.eval(feed_dict={
- images: test_images,
- labels: test_labels,
- is_training: False
- }))
- return np.mean(test_accs)
-
- config = tf.ConfigProto(
- intra_op_parallelism_threads=1,
- inter_op_parallelism_threads=1,
- log_device_placement=True)
- config.gpu_options.allow_growth = True
-
- hooks = [tf.train.StopAtStepHook(last_step=1000000)]
-
- with tf.train.MonitoredTrainingSession(
- master=server.target,
- is_chief=(args.task_index == 0),
- hooks=hooks,
- config=config) as sess:
- iters, num_samples, start_time = 0, 0, 0.0
- for pass_id in range(args.num_passes):
- # train
- num_samples = 0
- start_time = time.time()
- for batch_id, data in enumerate(train_reader()):
- train_images = np.array(
- map(lambda x: np.transpose(x[0].reshape(raw_shape),
- axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
- train_labels = np.array(map(lambda x: x[1], data)).astype(
- 'int64')
- iter_begin_time = time.time()
- _, loss, acc = sess.run([train_op, avg_loss, accuracy],
- feed_dict={
- images: train_images,
- labels: train_labels,
- is_training: True
- })
- iters += 1
- print(
- "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed=%.2f imgs/sec"
- % (pass_id, iters, loss, acc,
- len(data) / (time.time() - iter_begin_time)))
- num_samples += len(data)
- train_elapsed = time.time() - start_time
- # test
- pass_test_acc = test()
- print("Pass = %d, Train speed = %f imgs/s, Test accuracy = %f\n" %
- (pass_id, num_samples / train_elapsed, pass_test_acc))
-
-
-def print_arguments():
- print('----------- Configuration Arguments -----------')
- for arg, value in sorted(vars(args).iteritems()):
- print('%s: %s' % (arg, value))
- print('------------------------------------------------')
-
-
-if __name__ == '__main__':
- print_arguments()
-
- ps_hosts = args.ps_hosts.split(",")
- worker_hosts = args.worker_hosts.split(",")
-
- # Create a cluster from the parameter server and worker hosts.
- cluster_spec = tf.train.ClusterSpec({
- "ps": ps_hosts,
- "worker": worker_hosts
- })
-
- # Create and start a server for the local task.
- server = tf.train.Server(
- cluster_spec, job_name=args.job_name, task_index=args.task_index)
-
- if args.job_name == "ps":
- print("start pserver")
- server.join()
- elif args.job_name == "worker":
- print("start worker")
- run_benchmark(cluster_spec, server)
diff --git a/benchmark/cluster/vgg16/vgg16_v2.py b/benchmark/cluster/vgg16/vgg16_v2.py
deleted file mode 100644
index 1a66af32d7131997c63bd3c3042875f33a467084..0000000000000000000000000000000000000000
--- a/benchmark/cluster/vgg16/vgg16_v2.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import gzip
-
-import paddle.v2.dataset.cifar as cifar
-import paddle.v2 as paddle
-import time
-import os
-
-DATA_DIM = 3 * 32 * 32
-CLASS_DIM = 10
-BATCH_SIZE = os.getenv("BATCH_SIZE")
-if BATCH_SIZE:
- BATCH_SIZE = int(BATCH_SIZE)
-else:
- BATCH_SIZE = 128
-print "batch_size", BATCH_SIZE
-NODE_COUNT = int(os.getenv("TRAINERS"))
-ts = 0
-
-
-def vgg(input, nums, class_dim):
- def conv_block(input, num_filter, groups, num_channels=None):
- return paddle.networks.img_conv_group(
- input=input,
- num_channels=num_channels,
- pool_size=2,
- pool_stride=2,
- conv_num_filter=[num_filter] * groups,
- conv_filter_size=3,
- conv_act=paddle.activation.Relu(),
- pool_type=paddle.pooling.Max())
-
- assert len(nums) == 5
- # the channel of input feature is 3
- conv1 = conv_block(input, 64, nums[0], 3)
- conv2 = conv_block(conv1, 128, nums[1])
- conv3 = conv_block(conv2, 256, nums[2])
- conv4 = conv_block(conv3, 512, nums[3])
- conv5 = conv_block(conv4, 512, nums[4])
-
- fc_dim = 512
- fc1 = paddle.layer.fc(input=conv5,
- size=fc_dim,
- act=paddle.activation.Relu(),
- layer_attr=paddle.attr.Extra(drop_rate=0.5))
- fc2 = paddle.layer.fc(input=fc1,
- size=fc_dim,
- act=paddle.activation.Relu(),
- layer_attr=paddle.attr.Extra(drop_rate=0.5))
- out = paddle.layer.fc(input=fc2,
- size=class_dim,
- act=paddle.activation.Softmax())
- return out
-
-
-def vgg13(input, class_dim):
- nums = [2, 2, 2, 2, 2]
- return vgg(input, nums, class_dim)
-
-
-def vgg16(input, class_dim):
- nums = [2, 2, 3, 3, 3]
- return vgg(input, nums, class_dim)
-
-
-def vgg19(input, class_dim):
- nums = [2, 2, 4, 4, 4]
- return vgg(input, nums, class_dim)
-
-
-def main():
- global ts
- paddle.init(use_gpu=False)
- image = paddle.layer.data(
- name="image", type=paddle.data_type.dense_vector(DATA_DIM))
- lbl = paddle.layer.data(
- name="label", type=paddle.data_type.integer_value(CLASS_DIM))
-
- extra_layers = None
- # NOTE: for v2 distributed training need averaging updates.
- learning_rate = 1e-3 / NODE_COUNT
- out = vgg16(image, class_dim=CLASS_DIM)
- cost = paddle.layer.classification_cost(input=out, label=lbl)
-
- # Create parameters
- parameters = paddle.parameters.create(cost)
-
- # Create optimizer
- optimizer = paddle.optimizer.Momentum(
- momentum=0.9,
- regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
- BATCH_SIZE),
- learning_rate=learning_rate / BATCH_SIZE,
- learning_rate_decay_a=0.1,
- learning_rate_decay_b=128000 * 35,
- learning_rate_schedule="discexp", )
-
- train_reader = paddle.batch(
- paddle.reader.shuffle(
- cifar.train10(),
- # To use other data, replace the above line with:
- # reader.train_reader('train.list'),
- buf_size=1000),
- batch_size=BATCH_SIZE)
- test_reader = paddle.batch(
- cifar.test10(),
- # To use other data, replace the above line with:
- # reader.test_reader('val.list'),
- batch_size=BATCH_SIZE)
-
- # Create trainer
- trainer = paddle.trainer.SGD(cost=cost,
- parameters=parameters,
- update_equation=optimizer,
- extra_layers=extra_layers,
- is_local=False)
-
- # End batch and end pass event handler
- def event_handler(event):
- global ts, ts_pass
- if isinstance(event, paddle.event.BeginPass):
- ts_pass = time.time()
- if isinstance(event, paddle.event.BeginIteration):
- ts = time.time()
- if isinstance(event, paddle.event.EndIteration):
- if event.batch_id % 1 == 0:
- print "\nPass %d, Batch %d, Cost %f, %s, spent: %f" % (
- event.pass_id, event.batch_id, event.cost, event.metrics,
- time.time() - ts)
- if isinstance(event, paddle.event.EndPass):
- print "Pass %d end, spent: %f" % (event.pass_id,
- time.time() - ts_pass)
- result = trainer.test(reader=test_reader)
- print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-
- trainer.train(
- reader=train_reader, num_passes=200, event_handler=event_handler)
-
-
-if __name__ == '__main__':
- main()
diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py
index 1d8f27440d0f1438e0520684ee3e90e8a5891a17..30b070e4acac60caa97a4e8ffd07462cb347ee93 100644
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@@ -94,6 +94,10 @@ def parse_args():
'--memory_optimize',
action='store_true',
help='If set, optimize runtime memory before start.')
+ parser.add_argument(
+ '--use_fake_data',
+ action='store_true',
+ help='If set ommit the actual read data operators.')
parser.add_argument(
'--update_method',
type=str,
@@ -198,6 +202,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
exe.run(train_prog)
return
+ if args.use_fake_data:
+ raise Exception(
+ "fake data is not supported in single GPU test for now.")
+
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_prog)
@@ -244,7 +252,31 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
batch_acc, args, train_prog, startup_prog, nccl_id_var,
num_trainers, trainer_id):
+ feed_var_list = [
+ var for var in train_prog.global_block().vars.itervalues()
+ if var.is_data
+ ]
+ # generate fake:
+ if args.use_fake_data:
+ for var in feed_var_list:
+ v = startup_prog.global_block().clone_variable(var)
+ var.persistable = True
+ v.persistable = True
+
+ real_shape = list(var.shape)
+ real_shape[0] = args.batch_size / args.gpus
+ startup_prog.global_block().append_op(
+ outputs={"Out": v},
+ type="fill_constant",
+ attrs={"shape": real_shape,
+ "value": 1.0,
+ "dtype": var.dtype})
+
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
+ if nccl_id_var and trainer_id == 0:
+ #FIXME(wuyi): wait other trainer to start listening
+ time.sleep(30)
+
startup_exe = fluid.Executor(place)
startup_exe.run(startup_prog)
strategy = fluid.ExecutionStrategy()
@@ -256,10 +288,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
exec_strategy=strategy,
num_trainers=num_trainers,
trainer_id=trainer_id)
- feed_var_list = [
- var for var in train_prog.global_block().vars.itervalues()
- if var.is_data
- ]
+
feeder = fluid.DataFeeder(feed_var_list, place)
for pass_id in range(args.pass_num):
num_samples = 0
@@ -271,7 +300,10 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
num_samples = 0
if iters == args.iterations:
break
- loss, = exe.run([avg_loss.name], feed=feeder.feed(data))
+ if args.use_fake_data:
+ loss, = exe.run([avg_loss.name])
+ else:
+ loss, = exe.run([avg_loss.name], feed=feeder.feed(data))
if args.update_method == "pserver":
exe.bcast_params()
num_samples += len(data)
diff --git a/benchmark/fluid/kube_gen_job.py b/benchmark/fluid/kube_gen_job.py
index 3dbb4b8c5dd13657f8d1853003b321ad047e1349..39ba207fd96f71563504017e77dc0e87c249b3f8 100644
--- a/benchmark/fluid/kube_gen_job.py
+++ b/benchmark/fluid/kube_gen_job.py
@@ -112,6 +112,7 @@ def gen_job():
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
envs.append({"name": "ENTRY", "value": args.entry})
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
+ envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
envs.append({
diff --git a/benchmark/fluid/kube_templates/__init__.py b/benchmark/fluid/kube_templates/__init__.py
index b64a7f78ff10d03987ea4a8c13a0e34bb433f64c..2d09d940a5ee638e4b55405d05924e2d76006cfc 100644
--- a/benchmark/fluid/kube_templates/__init__.py
+++ b/benchmark/fluid/kube_templates/__init__.py
@@ -54,5 +54,13 @@ envs = [
"fieldPath": "status.podIP"
}
}
+ },
+ {
+ "name": "PADDLE_CURRENT_IP",
+ "valueFrom": {
+ "fieldRef": {
+ "fieldPath": "status.podIP"
+ }
+ }
}
]
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index e490397cc0624c310949a4b571bd00cac6e8953b..682614742cf1bd3130c638020a2545e16226d4d6 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS)
add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS)
endif(USE_EIGEN_FOR_BLAS)
+if(EIGEN_USE_THREADS)
+ add_definitions(-DEIGEN_USE_THREADS)
+endif(EIGEN_USE_THREADS)
+
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
index 0fde4373a4be58e71ff1a305bd4991cc554d7a34..2665996432b1f6681927320a85d6835094abe4cd 100644
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -212,6 +212,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake
${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF
+ -DCMAKE_SKIP_RPATH=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst
index 91449042fcdfd48c95f3dd3babf958c5d572e747..f53da4d194f8d2428b4121fa1bb31f3fc95a9f64 100644
--- a/doc/fluid/api/layers.rst
+++ b/doc/fluid/api/layers.rst
@@ -1003,9 +1003,9 @@ dice_loss
.. autofunction:: paddle.fluid.layers.dice_loss
:noindex:
-bilinear_interp
+upsampling_bilinear2d
____
-.. autofunction:: paddle.fluid.layers.bilinear_interp
+.. autofunction:: paddle.fluid.layers.upsampling_bilinear2d
:noindex:
diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst
index 077f5e9b189269f9f6c9cf68310e2bfd43d8cb67..741c01ce5428c0046daa5a784da70d4bb492438c 100644
--- a/doc/v2/build_and_install/build_from_source_cn.rst
+++ b/doc/v2/build_and_install/build_from_source_cn.rst
@@ -35,13 +35,11 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
# 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像
docker build -t paddle:dev .
# 3. 执行下面的命令编译CPU-Only的二进制
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步)
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
-注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。如果使用自行
-构建的镜像(上述第4步)会执行 :code:`Dockerfile` 描述的默认入口程序 :code:`build.sh` 可以省略步骤3中
-最后的执行脚本的命令。
+注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。
编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装:
@@ -72,15 +70,15 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test
如果期望执行其中一个单元测试,(比如 :code:`test_sum_op` ):
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
- bash /paddle/paddle/scripts/docker/build.sh
- cd /paddle/build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
+ ./paddle/scripts/paddle_build.sh build
+ cd build
ctest -R test_sum_op -V
.. _faq_docker:
diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst
index 545e61ce9602240807d515e9eae971dfca9ddd7f..b06c43e19dcfc52ad0f074a85517a16744895a3a 100644
--- a/doc/v2/build_and_install/build_from_source_en.rst
+++ b/doc/v2/build_and_install/build_from_source_en.rst
@@ -34,14 +34,12 @@ Or you can build your own image from source as the optional step below:
# 2. Optional: build development docker image from source
docker build -t paddle:dev .
# 3. Run the following command to build a CPU-Only binaries
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. Or, use your built Docker image to build PaddlePaddle (must run step 2)
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
NOTE: The above command try to mount the current working directory (root directory of source code)
-into :code:`/paddle` directory inside docker container. If you are using your own image
-(Step 4) it will run default entry-point :code:`build.sh` , so you could omit the last
-command in step 3.
+into :code:`/paddle` directory inside docker container.
When the compile finishes, you can get the output whl package under
build/python/dist, then you can choose to install the whl on local
@@ -74,15 +72,15 @@ Set :code:`WITH_GPU=ON` Can also run tests on GPU.
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/paddle/scripts/docker/build.sh
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test
If you wish to run only one unit test, like :code:`test_sum_op`:
.. code-block:: bash
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
- bash /paddle/paddle/scripts/docker/build.sh
- cd /paddle/build
+ docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
+ ./paddle/scripts/paddle_build.sh build
+ cd build
ctest -R test_sum_op -V
.. _faq_docker:
diff --git a/paddle/.gitignore b/paddle/.gitignore
index 1c1c0c2c829f088d7e3f52ca007fcb8f33a16a36..01904aa6ef2057afee95ddd6e30cde064b06c52e 100644
--- a/paddle/.gitignore
+++ b/paddle/.gitignore
@@ -11,7 +11,6 @@ GTAGS
*.pb.cc
*.pb.h
*_pb2.py
-paddle_*
output/
google/
Makefile
diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b19256ef4533a09162edf907f6cd51146517e46
--- /dev/null
+++ b/paddle/contrib/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(inference)
diff --git a/paddle/contrib/float16/README.md b/paddle/contrib/float16/README.md
index ded959c47cb81b9384abbb9815773e25969344ec..58b4a50666bfb622af8acbce29355f2a4a870a82 100644
--- a/paddle/contrib/float16/README.md
+++ b/paddle/contrib/float16/README.md
@@ -89,7 +89,7 @@ cd Paddle
# to `FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04` and similarly for other configurations
nvidia-docker build -t paddle:float16 .
# After running this, different results will be written to different log files in Paddle/contrib/float16/
-nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/contrib/float16/run_float16_demo.sh
+nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/paddle/contrib/float16/run_float16_demo.sh
```
#### Accuracy
diff --git a/paddle/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh
index d8a34ee67b8fab214fa6e96104304689211f84da..031225a85dabb26e5d9ea06f58909c049e7f0c08 100755
--- a/paddle/contrib/float16/run_float16_demo.sh
+++ b/paddle/contrib/float16/run_float16_demo.sh
@@ -3,7 +3,7 @@
BUILD_PATH=/paddle/fp16_build
WHEEL_PATH=$BUILD_PATH/python/dist
INFER_PATH=$BUILD_PATH/paddle/fluid/inference/tests/book
-DEMO_PATH=/paddle/contrib/float16
+DEMO_PATH=/paddle/paddle/contrib/float16
# Use the single most powerful CUDA GPU on your machine
export CUDA_VISIBLE_DEVICES=0
@@ -50,7 +50,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
- --data_set=imagenet \
--dirname=$DEMO_PATH/image_classification_imagenet_vgg.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_vgg.inference.model \
--repeat=$REPEAT \
@@ -68,7 +67,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_resnet \
- --data_set=imagenet \
--dirname=$DEMO_PATH/image_classification_imagenet_resnet.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_resnet.inference.model \
--repeat=$REPEAT \
@@ -86,7 +84,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
- --data_set=cifar10 \
--dirname=$DEMO_PATH/image_classification_cifar10_vgg.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_vgg.inference.model \
--repeat=$REPEAT \
@@ -104,7 +101,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
- --data_set=cifar10 \
--dirname=$DEMO_PATH/image_classification_cifar10_resnet.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_resnet.inference.model \
--repeat=$REPEAT \
diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..26b0cfa27af29699ebb9b525db4cfe753f7def2d
--- /dev/null
+++ b/paddle/contrib/inference/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+cc_library(paddle_inference_api
+ SRCS paddle_inference_api.cc
+ DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
+
+cc_test(test_paddle_inference_api
+ SRCS test_paddle_inference_api.cc
+ DEPS paddle_inference_api)
diff --git a/paddle/contrib/inference/paddle_inference_api.cc b/paddle/contrib/inference/paddle_inference_api.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d67e1e7667800d6dd00cb8915b0d6dc7c664970b
--- /dev/null
+++ b/paddle/contrib/inference/paddle_inference_api.cc
@@ -0,0 +1,15 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/contrib/inference/paddle_inference_api.h"
diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h
index dbaa7c95b97e954537707566e5b7458e6afd14c8..db5092dc6e72c9964043d95a7915aafff4fc69fa 100644
--- a/paddle/contrib/inference/paddle_inference_api.h
+++ b/paddle/contrib/inference/paddle_inference_api.h
@@ -12,49 +12,65 @@
See the License for the specific language governing permissions and
limitations under the License. */
+/*
+ * This file contains the definition of a simple Inference API for Paddle.
+ *
+ * ATTENTION: It requires some C++ features, for lower version C++ or C, we
+ * might release another API.
+ */
+
#pragma once
+#include
#include
#include
namespace paddle {
-class Predictor {
+struct PaddleTensor {
+ std::string name; // variable name.
+ std::vector shape;
+ std::vector data; // bytes of data.
+ size_t type{typeid(float).hash_code()}; // hash of type
+};
+
+/*
+ * A simple Inference API for Paddle. Currently this API might just be used by
+ * non-sequence scenerios.
+ * TODO(Superjomn) Prepare another API for NLP-related usages.
+ */
+class PaddlePredictor {
public:
- struct Attr;
- Predictor() = default;
+ struct Config;
+ PaddlePredictor() = default;
+ PaddlePredictor(const PaddlePredictor&) = delete;
- // Build the network before inference.
- bool Init(const Attr& attr);
+ // One drived class should has such a constructor
+ // PaddlePredictor(const XConfig& config);
+ // The XConfig is a derived class of Config.
// Predict an record.
- // Arguments:
- // inputs: the name of the input variables.
- // outputs: the name of the output varaibles.
- // input_shapes: the shape of the input variables.
- // output_shapes: the shape of the output variables.
- // input_data: the data of the input variables.
- // output_data: the data of the output variables.
- bool Run(const std::vector& inputs,
- const std::vector& outputs,
- const std::vector>& input_shapes,
- const std::vector>& output_shapes,
- const std::vector>& input_data,
- std::vector>* output_data);
-
- // Clone a predictor that share the model weights.
- Predictor* Clone();
+ virtual bool Run(const std::vector& inputs,
+ std::vector* output_data) = 0;
+
+ // Clone a predictor that share the model weights, the Cloned predictor should
+ // be thread-safe.
+ virtual std::unique_ptr Clone() = 0;
// Destroy the Predictor.
- ~Predictor();
+ virtual ~PaddlePredictor() {}
+
+ friend std::unique_ptr CreatePaddlePredictor(
+ const PaddlePredictor::Config& config);
- struct Attr {
+ // The common configs for all the predictors.
+ struct Config {
enum class EngineKind;
std::string model_dir; // path to the model directory.
bool enable_engine{false}; // Enable to execute (part of) the model on
- // third-party engines.
- EngineKind engine_kind{Attr::EngineKind::kNone};
+ // third-party engines.
+ EngineKind engine_kind{Config::EngineKind::kNone};
enum class EngineKind {
kNone = -1, // Use the native Fluid facility.
@@ -66,4 +82,8 @@ public:
};
};
+// A factory to help create difference predictor.
+template
+std::unique_ptr CreatePaddlePredictor(const ConfigT& config);
+
} // namespace paddle
diff --git a/paddle/contrib/inference/test_paddle_inference_api.cc b/paddle/contrib/inference/test_paddle_inference_api.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a19173087649e8493b8c72e758456cc5b8970e23
--- /dev/null
+++ b/paddle/contrib/inference/test_paddle_inference_api.cc
@@ -0,0 +1,64 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/contrib/inference/paddle_inference_api.h"
+
+#include
+#include
+
+namespace paddle {
+
+/*
+ * Do not use this, just a demo indicating how to customize a config for a
+ * specific predictor.
+ */
+struct DemoConfig : public PaddlePredictor::Config {
+ float other_config;
+};
+
+/*
+ * Do not use this, just a demo indicating how to customize a Predictor.
+ */
+class DemoPredictor : public PaddlePredictor {
+public:
+ explicit DemoPredictor(const DemoConfig &config) {
+ LOG(INFO) << "I get other_config " << config.other_config;
+ }
+ bool Run(const std::vector &inputs,
+ std::vector *output_data) override {
+ LOG(INFO) << "Run";
+ return false;
+ }
+
+ std::unique_ptr Clone() override { return nullptr; }
+
+ ~DemoPredictor() override {}
+};
+
+template <>
+std::unique_ptr CreatePaddlePredictor(
+ const DemoConfig &config) {
+ std::unique_ptr x(new DemoPredictor(config));
+ return x;
+}
+
+TEST(paddle_inference_api, demo) {
+ DemoConfig config;
+ config.other_config = 1.7;
+ auto predictor = CreatePaddlePredictor(config);
+ std::vector outputs;
+ predictor->Run({}, &outputs);
+}
+
+} // namespace paddle
diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
index 1b9c685866763ed126a1bf5d7fdd851c38ac1c63..09b67e5a1741c68c5f5487340e8fc86ff31e00a4 100644
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -243,13 +243,8 @@ const std::unordered_map &OpDesc::GetAttrMap() const {
}
void OpDesc::Rename(const std::string &old_name, const std::string &new_name) {
- for (auto &input : inputs_) {
- std::replace(input.second.begin(), input.second.end(), old_name, new_name);
- }
- for (auto &output : outputs_) {
- std::replace(output.second.begin(), output.second.end(), old_name,
- new_name);
- }
+ RenameInput(old_name, new_name);
+ RenameOutput(old_name, new_name);
need_update_ = true;
}
@@ -274,6 +269,13 @@ void OpDesc::RenameInput(const std::string &old_name,
for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name);
}
+
+ auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName());
+ if (it != attrs_.end()) {
+ auto &op_vars = boost::get>(it->second);
+ std::replace(op_vars.begin(), op_vars.end(), old_name, new_name);
+ }
+
need_update_ = true;
}
diff --git a/paddle/fluid/framework/shape_inference.h b/paddle/fluid/framework/shape_inference.h
index 46c8feec001584a872f7f62682080e0e72c06f50..5f497cafa0f75f7c23d550ef767d55274de7c900 100644
--- a/paddle/fluid/framework/shape_inference.h
+++ b/paddle/fluid/framework/shape_inference.h
@@ -63,6 +63,7 @@ class InferShapeContext {
std::vector GetInputVarPtrs(const std::string &name);
std::vector GetOutputVarPtrs(const std::string &name);
+ virtual InferShapeVarPtr GetVarPtr(const std::string &name) = 0;
// Note: In while op, we need this to be public
void SetDims(const std::vector &names,
@@ -81,8 +82,6 @@ class InferShapeContext {
const std::vector &names) const;
virtual proto::VarType::Type GetVarType(const std::string &name) const = 0;
-
- virtual InferShapeVarPtr GetVarPtr(const std::string &name) = 0;
};
} // namespace framework
diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc
index 3bae56532d655a1725e18276e09e0cade47b5c68..507b465435609a91ebca97dd70b176c3b79bee02 100644
--- a/paddle/fluid/operators/detail/sendrecvop_utils.cc
+++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc
@@ -149,12 +149,14 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
}
if (platform::is_gpu_place(ctx.GetPlace())) {
+#ifdef PADDLE_WITH_CUDA
// GPU data is copied to CPU buffer when sending,
// free the buffer when possible.
destroy_callback = [](void* backing) {
platform::CUDAPinnedPlace cuda_pinned;
memory::Free(cuda_pinned, backing);
};
+#endif
}
std::string header;
diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt
index a5bb58c2f4047a3bf2f8592b605772b4fa166c57..20d960f9fee1eae42b2241fb96c163e15db5e24d 100644
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -24,6 +24,8 @@ detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc)
detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu)
detection_library(target_assign_op SRCS target_assign_op.cc
target_assign_op.cu)
+detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
+ polygon_box_transform_op.cu)
# Export local libraries to parent
set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..335e8dd470f851d8c5f6bdbc94cfc343da269034
--- /dev/null
+++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc
@@ -0,0 +1,105 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template
+class PolygonBoxTransformCPUKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& ctx) const override {
+ PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
+ "It must use CUDAPlace.");
+ auto* in = ctx.Input("Input");
+ auto in_dims = in->dims();
+ const T* in_data = in->data();
+ auto* out = ctx.Output("Output");
+ T* out_data = out->mutable_data(ctx.GetPlace());
+
+ int batch_size = in_dims[0];
+ int geo_channel = in_dims[1];
+ int height = in_dims[2];
+ int width = in_dims[3];
+ int id = 0;
+ for (int id_n = 0; id_n < batch_size * geo_channel; ++id_n) {
+ for (int id_h = 0; id_h < height; ++id_h) {
+ for (int id_w = 0; id_w < width; ++id_w) {
+ id = id_n * height * width + width * id_h + id_w;
+ if (id_n % 2 == 0) {
+ out_data[id] = id_w - in_data[id];
+ } else {
+ out_data[id] = id_h - in_data[id];
+ }
+ }
+ }
+ }
+ }
+};
+
+class PolygonBoxTransformOp : public framework::OperatorWithKernel {
+ public:
+ using framework::OperatorWithKernel::OperatorWithKernel;
+
+ void InferShape(framework::InferShapeContext* ctx) const override {
+ PADDLE_ENFORCE(
+ ctx->HasInput("Input"),
+ "Input (Input) of polygon_box transform op should not be null.");
+ PADDLE_ENFORCE(
+ ctx->HasOutput("Output"),
+ "Output (Output) of polygon_box transform op should not be null.");
+
+ auto in_dim = ctx->GetInputDim("Input");
+
+ PADDLE_ENFORCE_EQ(in_dim.size(), 4, "input's rank must be 4.");
+ PADDLE_ENFORCE_EQ(in_dim[1] % 2, 0,
+ "input's second dimension must be even.");
+
+ ctx->SetOutputDim("Output", in_dim);
+ }
+};
+
+class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+ void Make() override {
+ AddInput(
+ "Input",
+ "The input with shape [batch_size, geometry_channels, height, width]");
+ AddOutput("Output", "The output with the same shape as input");
+
+ AddComment(R"DOC(
+PolygonBoxTransform Operator.
+The input is the final geometry output in detection network.
+We use 2*n numbers to denote the coordinate shift from n corner vertices of
+the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi),
+the geometry output contains 2*n channels.
+PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
+)DOC");
+ }
+};
+
+} // namespace operators
+} // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(polygon_box_transform, ops::PolygonBoxTransformOp,
+ ops::PolygonBoxTransformOpMaker,
+ paddle::framework::EmptyGradOpMaker);
+REGISTER_OP_CPU_KERNEL(
+ polygon_box_transform,
+ ops::PolygonBoxTransformCPUKernel,
+ ops::PolygonBoxTransformCPUKernel);
diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..6187ac6622c65d2bbc525c3fe2cb397cf74ac612
--- /dev/null
+++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu
@@ -0,0 +1,76 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/platform/cuda_primitives.h"
+#include "paddle/fluid/platform/gpu_info.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using platform::PADDLE_CUDA_NUM_THREADS;
+#define CUDA_BLOCK_SIZE 16
+
+template
+__global__ void PolygonBoxTransformKernel(const int n, const int h, const int w,
+ const T* input, T* output) {
+ int id_n = threadIdx.x + blockDim.x * blockIdx.x;
+ int id_h = threadIdx.y + blockDim.y * blockIdx.y;
+ int id_w = threadIdx.z + blockDim.z * blockIdx.z;
+ if (id_n < n && id_h < h && id_w < w) {
+ int id = id_n * h * w + w * id_h + id_w;
+ if (id_n % 2 == 0) {
+ output[id] = id_w - input[id];
+ } else {
+ output[id] = id_h - input[id];
+ }
+ }
+}
+
+template
+class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& ctx) const override {
+ PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
+ "It must use CUDAPlace.");
+ auto* in = ctx.Input("Input");
+ auto in_dims = in->dims();
+ const T* in_data = in->data();
+ auto* out = ctx.Output("Output");
+ T* out_data = out->mutable_data(ctx.GetPlace());
+
+ int batch_size = in_dims[0];
+ int geo_channels = in_dims[1];
+ int height = in_dims[2];
+ int width = in_dims[3];
+ dim3 threadsPerBlock(
+ PADDLE_CUDA_NUM_THREADS / (CUDA_BLOCK_SIZE * CUDA_BLOCK_SIZE),
+ CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE);
+ dim3 numBlocks((batch_size * geo_channels) / threadsPerBlock.x,
+ (height + threadsPerBlock.y - 1) / threadsPerBlock.y,
+ (width + threadsPerBlock.z - 1) / threadsPerBlock.z);
+ auto stream = ctx.cuda_device_context().stream();
+ PolygonBoxTransformKernel<<>>(
+ batch_size * geo_channels, height, width, in_data, out_data);
+ }
+};
+
+} // namespace operators
+} // namespace paddle
+
+REGISTER_OP_CUDA_KERNEL(
+ polygon_box_transform,
+ paddle::operators::PolygonBoxTransformOpCUDAKernel,
+ paddle::operators::PolygonBoxTransformOpCUDAKernel);
diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt
index 3106978eb0149b14849dfd1aaad8bbe76791f2f6..62532036f86bfb82465ccd9e0ec526299489932a 100644
--- a/paddle/fluid/operators/reader/CMakeLists.txt
+++ b/paddle/fluid/operators/reader/CMakeLists.txt
@@ -23,6 +23,7 @@ reader_library(create_recordio_file_reader_op SRCS create_recordio_file_reader_o
reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc)
reader_library(create_multi_pass_reader_op SRCS create_multi_pass_reader_op.cc)
reader_library(create_threaded_reader_op SRCS create_threaded_reader_op.cc)
+reader_library(create_custom_reader_op SRCS create_custom_reader_op.cc)
cc_test(reader_blocking_queue_test SRCS reader_blocking_queue_test.cc)
# Export local libraries to parent
diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4ecbf8ed4f0473a552b778fd6c64c92b946cd458
--- /dev/null
+++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc
@@ -0,0 +1,187 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/executor.h"
+#include "paddle/fluid/operators/detail/safe_ref.h"
+#include "paddle/fluid/operators/reader/reader_op_registry.h"
+
+namespace paddle {
+namespace operators {
+namespace reader {
+
+class CustomReader : public framework::DecoratedReader {
+ public:
+ CustomReader(ReaderBase* reader, const framework::BlockDesc& sub_block,
+ const platform::Place& dev_place,
+ const std::vector& source_var_names,
+ const std::vector& sink_var_names)
+ : DecoratedReader(reader),
+ program_(*sub_block.Program()),
+ sub_block_id_(sub_block.ID()),
+ exe_(framework::Executor(dev_place)),
+ source_var_names_(source_var_names),
+ sink_var_names_(sink_var_names) {}
+
+ void ReadNext(std::vector* out) override;
+
+ private:
+ const framework::ProgramDesc program_;
+ int sub_block_id_;
+ framework::Executor exe_;
+
+ std::vector source_var_names_;
+ std::vector sink_var_names_;
+};
+
+class CreateCustomReaderOp : public framework::OperatorBase {
+ public:
+ using framework::OperatorBase::OperatorBase;
+
+ private:
+ void RunImpl(const framework::Scope& scope,
+ const platform::Place& dev_place) const override {
+ auto* out = scope.FindVar(Output("Out"))
+ ->template GetMutable();
+ auto* sub_block = Attr("sub_block");
+ if (out->Get() != nullptr) {
+ return;
+ }
+ const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader"))
+ ->Get();
+ out->Reset(
+ new CustomReader(underlying_reader.Get(), *sub_block, dev_place,
+ Attr>("source_var_names"),
+ Attr>("sink_var_names")));
+ }
+};
+
+class CreateCustomReaderOpMaker : public DecoratedReaderMakerBase {
+ protected:
+ void Apply() override {
+ AddAttr(
+ "sub_block", "The block to hold all preprocessing operators.");
+ AddAttr>(
+ "source_var_names",
+ "Source variables are starting points of data preprocessing. They hold "
+ "preprocessing's input tensors. Each source variable corresponds to "
+ "one of underlying reader's output datas.");
+ AddAttr>(
+ "sink_var_names",
+ "Sink variables are ending points of data preprocessing. They hold "
+ "preprocessing's output tensors. Each sink variable corresponds to "
+ "one of custom reader's output datas.");
+ AddComment(R"DOC(
+ CreateCustomReader Operator
+
+ A custom reader can be used for input data preprocessing.
+ A custom reader holds its own sub-block, which will be executed in its
+ 'ReadNext()' function. Users can configurate their own preprocessing
+ pipelines by inserting operators into custom reader's sub-block.
+ )DOC");
+ }
+};
+
+class CustomReaderInferShape : public framework::InferShapeBase {
+ public:
+ void operator()(framework::InferShapeContext* ctx) const override {
+ PADDLE_ENFORCE(!ctx->IsRuntime(),
+ "'CustomReaderInferShape' should only be invoked during "
+ "compile time.");
+ PADDLE_ENFORCE(ctx->HasOutput("Out"),
+ "The output decorated reader should not be null.");
+ const auto* sub_block =
+ ctx->Attrs().Get("sub_block");
+ const auto sink_var_names =
+ ctx->Attrs().Get>("sink_var_names");
+ std::vector> res_dims;
+ std::vector res_lod_levels;
+ for (const std::string& var_name : sink_var_names) {
+ auto* sink_var = sub_block->FindVar(var_name);
+ PADDLE_ENFORCE_NOT_NULL(sink_var);
+ res_dims.emplace_back(sink_var->GetShape());
+ res_lod_levels.push_back(sink_var->GetLoDLevel());
+ }
+ auto* out_reader =
+ boost::get(ctx->GetOutputVarPtrs("Out")[0]);
+ out_reader->SetShapes(res_dims);
+ out_reader->SetLoDLevels(res_lod_levels);
+ }
+};
+
+class CustomReaderInferVarType : public framework::VarTypeInference {
+ public:
+ void operator()(const framework::OpDesc& op_desc,
+ framework::BlockDesc* block) const override {
+ framework::VarDesc* out_reader = block->FindVar(op_desc.Output("Out")[0]);
+ PADDLE_ENFORCE_NOT_NULL(out_reader);
+ out_reader->SetType(framework::proto::VarType::READER);
+
+ auto sink_var_names =
+ boost::get>(op_desc.GetAttr("sink_var_names"));
+ const auto* sub_block =
+ boost::get(op_desc.GetAttr("sub_block"));
+ std::vector res_data_types;
+ for (const std::string& var_name : sink_var_names) {
+ framework::VarDesc* var = sub_block->FindVar(var_name);
+ PADDLE_ENFORCE_NOT_NULL(var);
+ res_data_types.emplace_back(var->GetDataType());
+ }
+ out_reader->SetDataTypes(res_data_types);
+ }
+};
+
+void CustomReader::ReadNext(std::vector* out) {
+ out->clear();
+ std::vector underlying_outs;
+ reader_->ReadNext(&underlying_outs);
+ if (underlying_outs.empty()) {
+ // There is not next data.
+ return;
+ }
+ PADDLE_ENFORCE(source_var_names_.size() == underlying_outs.size(),
+ "The size of source_var_names(%d) and the size of "
+ "underlying_outs(%d) are not consistent. Each feeding element "
+ "must have its own source variable.",
+ source_var_names_.size(), underlying_outs.size());
+ // The scope for CustomReader's sub-block should be independent and shouldn't
+ // be any other computation scope's child. Otherwise, data preprocessing and
+ // compution cannot be concurrent.
+ framework::Scope scope;
+ // 1. Copy LoDTensors from underlying reader's output to source variables.
+ for (size_t i = 0; i < source_var_names_.size(); ++i) {
+ framework::Variable* var = scope.Var(source_var_names_[i]);
+ framework::LoDTensor* tensor = var->GetMutable();
+ tensor->ShareDataWith(underlying_outs[i]);
+ tensor->set_lod(underlying_outs[i].lod());
+ }
+ // 2. Run the sub-block.
+ exe_.Run(program_, &scope, sub_block_id_, false, true);
+ // 3. Copy LoDTensors from sink variables to out.
+ out->resize(sink_var_names_.size());
+ for (size_t i = 0; i < sink_var_names_.size(); ++i) {
+ const auto& tensor = detail::Ref(scope.FindVar(sink_var_names_[i]))
+ .Get();
+ framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]);
+ }
+}
+
+} // namespace reader
+} // namespace operators
+} // namespace paddle
+
+namespace ops = paddle::operators::reader;
+REGISTER_OPERATOR(create_custom_reader, ops::CreateCustomReaderOp,
+ ops::CreateCustomReaderOpMaker, ops::CustomReaderInferShape,
+ ops::CustomReaderInferVarType,
+ paddle::framework::EmptyGradOpMaker)
diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc
index 11f1ddebc48134158315ea70a2d2b9e07f2e2469..612e1f5eca3a4836db1fd167fc6bb63400d20177 100644
--- a/paddle/fluid/operators/reader/reader_op_registry.cc
+++ b/paddle/fluid/operators/reader/reader_op_registry.cc
@@ -115,6 +115,7 @@ void DecoratedReaderInferShape::operator()(
boost::get(ctx->GetOutputVarPtrs("Out")[0]);
out_reader->SetLoDLevels(in_reader->GetLoDLevels());
}
+
void DecoratedReaderInferVarType::operator()(
const framework::OpDesc& op_desc, framework::BlockDesc* block) const {
std::string in_reader_name = op_desc.Input("UnderlyingReader")[0];
diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp
index bac4659e62b107dd80ef95dd0907b3da4becffbc..8e9dbbd7a154095a7298bb2f59a82d13a60f9bd3 100644
--- a/paddle/function/EigenGemm.cpp
+++ b/paddle/function/EigenGemm.cpp
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include
-#include "unsupported/Eigen/CXX11/Tensor"
+#include "paddle/function/EigenThreadDevice.h"
namespace paddle {
@@ -70,25 +70,26 @@ struct EigenBlasGemm {
dims[0].first = transA ? 0 : 1;
dims[0].second = transB ? 1 : 0;
- Eigen::DefaultDevice device;
+ auto* device = EigenDeviceWarpper::device();
if (N == ldc) {
if (alpha == T(1) && beta == T(0)) {
- c.device(device) = a.contract(b, dims);
+ c.device(*device) = a.contract(b, dims);
} else if (alpha == T(1) && beta == T(1)) {
- c.device(device) += a.contract(b, dims);
+ c.device(*device) += a.contract(b, dims);
} else {
- c.device(device) = alpha * a.contract(b, dims) + beta * c;
+ c.device(*device) = alpha * a.contract(b, dims) + beta * c;
}
} else {
if (alpha == T(1) && beta == T(0)) {
- c.slice(offsetC, extentC).device(device) = a.contract(b, dims);
+ c.slice(offsetC, extentC).device(*device) = a.contract(b, dims);
} else if (alpha == T(1) && beta == T(1)) {
- c.slice(offsetC, extentC).device(device) += a.contract(b, dims);
+ c.slice(offsetC, extentC).device(*device) += a.contract(b, dims);
} else {
- c.slice(offsetC, extentC).device(device) =
+ c.slice(offsetC, extentC).device(*device) =
alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC);
}
}
+ EigenDeviceWarpper::free_device(device);
}
};
diff --git a/paddle/function/EigenThreadDevice.h b/paddle/function/EigenThreadDevice.h
new file mode 100644
index 0000000000000000000000000000000000000000..74269aa664a711c905e12a61958c9ab01e2340c0
--- /dev/null
+++ b/paddle/function/EigenThreadDevice.h
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#pragma once
+
+#if defined(__OSX__) || defined(__APPLE__)
+#include
+#include
+#endif
+#include "unsupported/Eigen/CXX11/Tensor"
+
+namespace paddle {
+
+#if defined(__ANDROID__)
+int GetCpuCount() {
+ FILE* fp = fopen("/sys/devices/system/cpu/possible", "r");
+ if (!fp) {
+ return 1;
+ }
+ int rank0, rank1;
+ int num = fscanf(fp, "%d-%d", &rank0, &rank1);
+ fclose(fp);
+ if (num < 2) return 1;
+ return rank1 + 1;
+}
+#elif defined(__OSX__) || defined(__APPLE__)
+int GetCpuCount() {
+ int count = 0;
+ size_t len = sizeof(int);
+ sysctlbyname("hw.ncpu", &count, &len, NULL, 0);
+ return count > 0 ? count : 1;
+}
+#else
+int GetCpuCount() { return 1; }
+#endif
+
+class EigenDeviceWarpper {
+public: // NOLINT
+#if EIGEN_USE_THREADS
+ static Eigen::ThreadPoolDevice* device() {
+ const int num_cpus = GetCpuCount();
+ const int num_threads = (num_cpus > 2) ? 2 : num_cpus;
+ static Eigen::ThreadPool tp(num_threads);
+ static Eigen::ThreadPoolDevice* device =
+ new Eigen::ThreadPoolDevice(&tp, num_threads);
+ return device;
+ }
+
+ static void free_device(Eigen::ThreadPoolDevice* device) {
+ // do nothing
+ }
+#else
+ static Eigen::DefaultDevice* device() {
+ Eigen::DefaultDevice* device = new Eigen::DefaultDevice;
+ return device;
+ }
+
+ static void free_device(Eigen::DefaultDevice* device) { delete device; }
+#endif
+};
+
+} // namespace paddle
diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt
index 25fc35311fc63988c64a445d72fc6255e49e8d4b..7c80faa48ce960a3a7eb7d88eda4f2b09756410e 100644
--- a/paddle/optimizer/CMakeLists.txt
+++ b/paddle/optimizer/CMakeLists.txt
@@ -7,6 +7,10 @@ set(OPITMIZER_SRCS
sgd_optimizer.cc
)
-cc_library(paddle_optimizer STATIC SRCS ${OPITMIZER_SRCS} DEPS paddle_proto glog)
-cc_test(serialization_test SRCS serialization_test.cc DEPS paddle_proto)
-cc_test(parameter_optimizer_test SRCS parameter_optimizer_test.cc DEPS paddle_optimizer)
+add_library(paddle_optimizer ${OPITMIZER_SRCS})
+target_link_libraries(paddle_optimizer paddle_proto glog)
+
+if (WITH_TESTING)
+ add_unittest(serialization_test serialization_test.cc)
+ add_unittest(parameter_optimizer_test parameter_optimizer_test.cc)
+endif()
diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
deleted file mode 100755
index baff7628ea01caa0248af82c6eed2c3b546cdb35..0000000000000000000000000000000000000000
--- a/paddle/scripts/docker/build.sh
+++ /dev/null
@@ -1,256 +0,0 @@
-#!/bin/bash
-
-function cmake_gen() {
- mkdir -p /paddle/build
- cd /paddle/build
-
- # build script will not fail if *.deb does not exist
- rm *.deb 2>/dev/null || true
- # delete previous built whl packages
- rm -rf /paddle/paddle/dist 2>/dev/null || true
-
- # Support build for all python versions, currently
- # including cp27-cp27m and cp27-cp27mu.
- PYTHON_FLAGS=""
- if [ "$1" != "" ]; then
- echo "using python abi: $1"
- if [ "$1" == "cp27-cp27m" ]; then
- export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:}
- export PATH=/opt/python/cp27-cp27m/bin/:${PATH}
- PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
- -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
- -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
- elif [ "$1" == "cp27-cp27mu" ]; then
- export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:}
- export PATH=/opt/python/cp27-cp27mu/bin/:${PATH}
- PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
- -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
- -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
- fi
- fi
-
- cat < /paddle/build/Dockerfile <
- ENV HOME /root
-EOF
-
- if [[ ${WITH_GPU} == "ON" ]]; then
- NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&"
- else
- NCCL_DEPS=""
- fi
-
- if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then
- PADDLE_VERSION="paddle version"
- CMD='"paddle", "version"'
- else
- PADDLE_VERSION="true"
- CMD='"true"'
- fi
-
- cat >> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <= 21."
- ANDROID_API=21
- fi
-else # armeabi, armeabi-v7a
- ANDROID_ARCH=arm
-fi
-
-ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API
-
-cat <&2
- echo "Please use pre-commit to check what is wrong." 1>&2
- exit 1
-}
-
-trap 'abort' 0
-set -e
-
-# install glide
-curl https://glide.sh/get | bash
-eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
-
-# set up go environment for running gometalinter
-mkdir -p $GOPATH/src/github.com/PaddlePaddle/
-ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle
-cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd -
-
-go get github.com/alecthomas/gometalinter
-gometalinter --install
-
-cd $TRAVIS_BUILD_DIR
-export PATH=/usr/bin:$PATH
-pre-commit install
-clang-format --version
-
-
-
-if ! pre-commit run -a ; then
- git diff
- exit 1
-fi
-
-trap : 0
diff --git a/paddle/scripts/travis/deploy_key.enc b/paddle/scripts/travis/deploy_key.enc
deleted file mode 100644
index b0aa45c5ac626c735735fd8541a43bf8b099d0a0..0000000000000000000000000000000000000000
Binary files a/paddle/scripts/travis/deploy_key.enc and /dev/null differ
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 1470f8c2e50004abb08e75980decd9485c22dece..03d4602f7a99dc335260cffdcdc30a839f3988cd 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+import contextlib
from .. import core
from ..framework import convert_np_dtype_to_dtype_, default_main_program, default_startup_program, Program
@@ -21,7 +22,8 @@ from ..executor import global_scope
__all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file',
- 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer'
+ 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer',
+ 'random_data_generator', 'Preprocessor'
]
@@ -535,8 +537,6 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):
inputs={'UnderlyingReader': reader},
outputs={'Out': [new_reader]},
attrs=attrs)
- new_reader.persistable = True
- new_reader.stop_gradient = True
return monkey_patch_reader_methods(new_reader)
@@ -581,3 +581,82 @@ def read_file(file_obj):
return out[0]
else:
return out
+
+
+class Preprocessor(object):
+ BEFORE_SUB_BLOCK = 0
+ IN_SUB_BLOCK = 1
+ AFTER_SUB_BLOCK = 2
+
+ def __init__(self, reader, name=None):
+ self.underlying_reader = reader
+ new_reader_name = name if name is not None else unique_name(
+ "create_custom_reader")
+ self.main_prog = default_main_program()
+ self.reader = self.main_prog.current_block().create_var(
+ name=new_reader_name)
+ self.sub_block = None
+ self.source_var_names = None
+ self.sink_var_names = None
+ self.status = Preprocessor.BEFORE_SUB_BLOCK
+
+ def is_completed(self):
+ return self.sub_block and self.source_var_names and self.sink_var_names
+
+ @contextlib.contextmanager
+ def block(self):
+ self.status = Preprocessor.IN_SUB_BLOCK
+ self.sub_block = self.main_prog.create_block()
+ yield
+ self.main_prog.rollback()
+ self.status = Preprocessor.AFTER_SUB_BLOCK
+ if not self.is_completed():
+ raise RuntimeError(
+ "The definition of preprocessor is incompleted! "
+ "Please make sure that you have set input and output "
+ "variables by invoking 'inputs' and 'outputs' in "
+ "Preprocessor's sub-block.")
+
+ def inputs(self):
+ if self.status != Preprocessor.IN_SUB_BLOCK:
+ raise RuntimeError(
+ "Preprocessor.inputs() can only be invoked inside the sub-block."
+ )
+
+ source_shapes = self.underlying_reader.desc.shapes()
+ source_dtypes = self.underlying_reader.desc.dtypes()
+ source_lod_levels = self.underlying_reader.desc.lod_levels()
+ self.source_var_names = [
+ unique_name("preprocessor_source")
+ for _ in xrange(len(source_shapes))
+ ]
+ source_vars = []
+ for var_name, shape, dtype, lod_level in zip(
+ self.source_var_names, source_shapes, source_dtypes,
+ source_lod_levels):
+ source_vars.append(self.main_prog.current_block().create_var(
+ name=var_name, shape=shape, dtype=dtype, lod_level=lod_level))
+ return source_vars
+
+ def outputs(self, *outs):
+ if self.status != Preprocessor.IN_SUB_BLOCK:
+ raise RuntimeError(
+ "Preprocessor.outputs() can only be invoked inside the sub-block."
+ )
+ self.sink_var_names = [var.name for var in outs]
+
+ def __call__(self, *args, **kwargs):
+ if self.status != Preprocessor.AFTER_SUB_BLOCK:
+ raise RuntimeError(
+ "Preprocessor output can only be retrieved after rnn block.")
+
+ self.main_prog.current_block().append_op(
+ type="create_custom_reader",
+ inputs={'UnderlyingReader': self.underlying_reader},
+ outputs={'Out': [self.reader]},
+ attrs={
+ "sub_block": self.sub_block,
+ "source_var_names": self.source_var_names,
+ "sink_var_names": self.sink_var_names
+ })
+ return monkey_patch_reader_methods(self.reader)
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 04ee8ac9aee92a0e161e83bf1bb34d3ce727a0fb..b6c47aa9a65b9145983513715233784d77e3d904 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -81,7 +81,7 @@ __all__ = [
'label_smooth',
'roi_pool',
'dice_loss',
- 'bilinear_interp',
+ 'upsampling_bilinear2d',
]
@@ -3917,8 +3917,10 @@ def dice_loss(input, label, epsilon=0.00001):
return reduce_mean(dice_score)
-def bilinear_interp(input, out_h, out_w, name=None):
+def upsampling_bilinear2d(input, out_shape=None, scale=None, name=None):
"""
+ The mathematical meaning of upsampling_bilinear2d is also called
+ Bilinear interpolation.
Bilinear interpolation is an extension of linear interpolation for
interpolating functions of two variables (e.g. H-direction and
W-direction in this layer) on a rectilinear 2D grid.
@@ -3930,8 +3932,13 @@ def bilinear_interp(input, out_h, out_w, name=None):
input (Variable): The input tensor of bilinear interpolation,
This is a 4-D tensor of the shape
(num_batches, channels, in_h, in_w).
- out_h (int): output height of bilinear interpolation layer.
- out_w (int): output width of bilinear interpolation layer.
+ out_shape(list|tuple|None): Output shape of bilinear interpolation
+ layer, the shape is (out_h, out_w).
+ Default: None
+ scale(int|None): The multiplier for the input height or width.
+ At least one of out_shape or scale must be set.
+ And out_shape has a higher priority than scale.
+ Default: None
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
@@ -3942,10 +3949,27 @@ def bilinear_interp(input, out_h, out_w, name=None):
Examples:
.. code-block:: python
- out = fluid.layers.bilinear_interp(input, out_h=12, out_w=12)
+ out = fluid.layers.bilinear_interp(input, out_shape=[12, 12])
"""
+ if out_shape is None and scale is None:
+ raise ValueError("One of out_shape and scale must not be None")
helper = LayerHelper('bilinear_interp', **locals())
dtype = helper.input_dtype()
+
+ def _is_list_or_turple_(data):
+ return (isinstance(data, list) or isinstance(data, tuple))
+
+ if out_shape is not None:
+ if not (_is_list_or_turple_(out_shape) and len(out_shape) == 2):
+ raise ValueError('out_shape should be a list or tuple ',
+ 'with length 2, (out_h, out_w).')
+ out_shape = list(map(int, out_shape))
+ out_h = out_shape[0]
+ out_w = out_shape[1]
+ else:
+ out_h = int(input.shape[2] * scale)
+ out_w = int(input.shape[3] * scale)
+
out = helper.create_tmp_variable(dtype)
helper.append_op(
type="bilinear_interp",
diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py
index 555e371952d0f902063133c2a227eb78f082726c..9946d0a4ff33b2f5040f6d2e31aa20fcf9c609a7 100644
--- a/python/paddle/fluid/lod_tensor.py
+++ b/python/paddle/fluid/lod_tensor.py
@@ -93,12 +93,12 @@ def _convert_lod(lod):
def create_lod_tensor(data, lod, place):
- """Create a lod tensor from a numpy array or an existing lod tensor.
+ """Create a lod tensor from a numpy array, a list, or an existing lod tensor.
Create a lod tensor by doing the following:
1. Check that the length-based input lod is valid.
2. Convert the length-based lod to a offset-based LoD.
- 3. Copy the data from a numpy array or a existing lod tensor to
+ 3. Copy the data from a numpy array, a list or a existing lod tensor to
CPU or GPU device (based on input place).
4. Set the level of detail (LoD) using the offset-based LoD.
@@ -117,7 +117,7 @@ def create_lod_tensor(data, lod, place):
for more details regarding LoD.
Args:
- data: a numpy array or a LoDTensor holding the data to be copied.
+ data: a numpy array or a LoDTensor or a list holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
@@ -126,6 +126,18 @@ def create_lod_tensor(data, lod, place):
"""
if isinstance(data, core.LoDTensor):
return create_lod_tensor(np.array(data), lod, place)
+ elif isinstance(data, list):
+ # When input data is a list, it only deal with the case where the base element
+ # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated
+ # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number
+ # of words or other indexes in the sequence.
+ new_lod = []
+ for seq in data:
+ new_lod.append(len(seq))
+ assert [new_lod] == lod, "data and lod do not match"
+ flattened_data = np.concatenate(data, axis=0).astype("int64")
+ flattened_data = flattened_data.reshape([len(flattened_data), 1])
+ return create_lod_tensor(flattened_data, lod, place)
elif isinstance(data, np.ndarray):
assert _validate_lod(lod,
data.shape[0]), "the provided lod info is invalid"
@@ -134,9 +146,8 @@ def create_lod_tensor(data, lod, place):
tensor.set_lod(_convert_lod(lod))
return tensor
else:
- raise Exception(
- "data should be either a LoDTensor or a Numpy array, but you pass type %s instead"
- % (type(data)))
+ raise TypeError(
+ "data should be either a LoDTensor, a Numpy array or a list")
def create_random_int_lodtensor(lod, base_shape, place, low, high):
diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
index 5fba561e024b0690f10939267146f2622c567fa5..de3906fc6a005181b0ab04a846eb2e7ce14004c2 100644
--- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
@@ -48,7 +48,7 @@ def linear():
return avg_loss
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
@@ -68,8 +68,8 @@ def train(use_cuda, train_program, save_dirname):
['15.343549569447836']
...
'''
- if save_dirname is not None:
- trainer.save_params(save_dirname)
+ if params_dirname is not None:
+ trainer.save_params(params_dirname)
trainer.stop()
trainer.train(
@@ -80,13 +80,13 @@ def train(use_cuda, train_program, save_dirname):
# infer
-def infer(use_cuda, inference_program, save_dirname=None):
- if save_dirname is None:
+def infer(use_cuda, inference_program, params_dirname=None):
+ if params_dirname is None:
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- infer_func=inference_program, param_path=save_dirname, place=place)
+ infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 10
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
@@ -100,10 +100,10 @@ def main(use_cuda):
return
# Directory for saving the trained model
- save_dirname = "fit_a_line.inference.model"
+ params_dirname = "fit_a_line.inference.model"
- train(use_cuda, linear, save_dirname)
- infer(use_cuda, inference_program, save_dirname)
+ train(use_cuda, linear, params_dirname)
+ infer(use_cuda, inference_program, params_dirname)
class TestFitALine(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
index 1160e500dbd6db784eeb81b72968386347fec59a..63dc1b6ce30974ede22a3f7772b76bf207bbae39 100644
--- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
+++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
@@ -85,7 +85,7 @@ def train_network():
return [avg_cost, accuracy]
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
BATCH_SIZE = 128
EPOCH_NUM = 1
@@ -105,8 +105,8 @@ def train(use_cuda, train_program, save_dirname):
print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
if accuracy > 0.01: # Low threshold for speeding up CI
- if save_dirname is not None:
- trainer.save_params(save_dirname)
+ if params_dirname is not None:
+ trainer.save_params(params_dirname)
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
@@ -122,10 +122,10 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['pixel', 'label'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- infer_func=inference_program, param_path=save_dirname, place=place)
+ infer_func=inference_program, param_path=params_dirname, place=place)
# The input's dimension of conv should be 4-D or 5-D.
# Use normilized image pixels as input data, which should be in the range
@@ -142,12 +142,14 @@ def main(use_cuda):
save_path = "image_classification_resnet.inference.model"
train(
- use_cuda=use_cuda, train_program=train_network, save_dirname=save_path)
+ use_cuda=use_cuda,
+ train_program=train_network,
+ params_dirname=save_path)
infer(
use_cuda=use_cuda,
inference_program=inference_network,
- save_dirname=save_path)
+ params_dirname=save_path)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py
index 1e3e955ba0299f2cc0fcc02d79ae6fd8ff4c1171..0bf8f265a1c1b11364ecfa11061af183ce20d51e 100644
--- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py
+++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py
@@ -64,7 +64,7 @@ def train_network():
return [avg_cost, accuracy]
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
BATCH_SIZE = 128
train_reader = paddle.batch(
paddle.reader.shuffle(
@@ -82,8 +82,8 @@ def train(use_cuda, train_program, save_dirname):
print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
if accuracy > 0.01: # Low threshold for speeding up CI
- if save_dirname is not None:
- trainer.save_params(save_dirname)
+ if params_dirname is not None:
+ trainer.save_params(params_dirname)
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
@@ -99,10 +99,10 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['pixel', 'label'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- infer_func=inference_program, param_path=save_dirname, place=place)
+ infer_func=inference_program, param_path=params_dirname, place=place)
# The input's dimension of conv should be 4-D or 5-D.
# Use normilized image pixels as input data, which should be in the range
@@ -119,12 +119,14 @@ def main(use_cuda):
save_path = "image_classification_vgg.inference.model"
train(
- use_cuda=use_cuda, train_program=train_network, save_dirname=save_path)
+ use_cuda=use_cuda,
+ train_program=train_network,
+ params_dirname=save_path)
infer(
use_cuda=use_cuda,
inference_program=inference_network,
- save_dirname=save_path)
+ params_dirname=save_path)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py
index f4344988141af44af83fda24d73da25f597796ef..9464df59797c0b8c35611ee56de6bf362ac7a4a5 100755
--- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py
+++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py
@@ -141,7 +141,7 @@ def train_program():
return [avg_cost]
-def train(use_cuda, train_program, save_path):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
@@ -172,7 +172,7 @@ def train(use_cuda, train_program, save_path):
print("avg_cost: %s" % avg_cost)
if float(avg_cost) < 100.0: # Large value to increase CI speed
- trainer.save_params(save_path)
+ trainer.save_params(params_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
float(avg_cost)))
@@ -183,7 +183,7 @@ def train(use_cuda, train_program, save_path):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
- trainer.save_params(save_path)
+ trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
@@ -197,10 +197,10 @@ def train(use_cuda, train_program, save_path):
feed_order=feed_order)
-def infer(use_cuda, inference_program, save_path):
+def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- inference_program, param_path=save_path, place=place)
+ inference_program, param_path=params_dirname, place=place)
# Setup inputs by creating LoDTensors to represent sequences of words.
# Here each word is the basic element of these LoDTensors and the shape of
@@ -251,9 +251,9 @@ def infer(use_cuda, inference_program, save_path):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
- save_path = "label_semantic_roles.inference.model"
- train(use_cuda, train_program, save_path)
- infer(use_cuda, inference_program, save_path)
+ params_dirname = "label_semantic_roles.inference.model"
+ train(use_cuda, train_program, params_dirname)
+ infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
index 2aac70463c64019ec97b0c3893b4b52f77967797..03439cbd37671b4727879bf3d0793f016f55247a 100644
--- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
+++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py
@@ -57,7 +57,7 @@ def train_program():
return [avg_cost, acc]
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
@@ -78,7 +78,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, avg_cost, acc))
@@ -100,11 +100,11 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['img', 'label'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- infer_func=inference_program, param_path=save_dirname, place=place)
+ infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
@@ -116,17 +116,17 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
- save_dirname = "recognize_digits_conv.inference.model"
+ params_dirname = "recognize_digits_conv.inference.model"
# call train() with is_local argument to run distributed train
train(
use_cuda=use_cuda,
train_program=train_program,
- save_dirname=save_dirname)
+ params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
- save_dirname=save_dirname)
+ params_dirname=params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
index 32653157994f81c46f420c1b55ceddbbbf06f2fe..89bbd21bea7d64a8dd6fc32829b6addb680da62e 100644
--- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
+++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py
@@ -44,7 +44,7 @@ def train_program():
return [avg_cost, acc]
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
@@ -62,7 +62,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, avg_cost, acc))
@@ -81,11 +81,11 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['img', 'label'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- infer_func=inference_program, param_path=save_dirname, place=place)
+ infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
@@ -97,17 +97,17 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
- save_dirname = "recognize_digits_mlp.inference.model"
+ params_dirname = "recognize_digits_mlp.inference.model"
# call train() with is_local argument to run distributed train
train(
use_cuda=use_cuda,
train_program=train_program,
- save_dirname=save_dirname)
+ params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
- save_dirname=save_dirname)
+ params_dirname=params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py
index 259680cb097a12a4fc92107f6fd8595393f88bd5..dfc7325acf23176c05fe42761b9997b98d23372a 100644
--- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py
+++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py
@@ -155,7 +155,7 @@ def train_program():
return [avg_cost, scale_infer]
-def train(use_cuda, train_program, save_path):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.SGD(learning_rate=0.2)
@@ -180,7 +180,7 @@ def train(use_cuda, train_program, save_path):
print("avg_cost: %s" % avg_cost)
if float(avg_cost) < 4: # Smaller value to increase CI speed
- trainer.save_params(save_path)
+ trainer.save_params(params_dirname)
trainer.stop()
else:
print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
@@ -197,43 +197,30 @@ def train(use_cuda, train_program, save_path):
num_epochs=1,
event_handler=event_handler,
reader=train_reader,
- feed_order=[
- 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id',
- 'category_id', 'movie_title', 'score'
- ])
+ feed_order=feed_order)
-def infer(use_cuda, inference_program, save_path):
+def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- inference_program, param_path=save_path, place=place)
-
- def create_lod_tensor(data, lod=None):
- tensor = fluid.LoDTensor()
- if lod is None:
- # Tensor, the shape is [batch_size, 1]
- index = 0
- lod_0 = [index]
- for l in range(len(data)):
- index += 1
- lod_0.append(index)
- lod = [lod_0]
- tensor.set_lod(lod)
-
- flattened_data = np.concatenate(data, axis=0).astype("int64")
- flattened_data = flattened_data.reshape([len(flattened_data), 1])
- tensor.set(flattened_data, place)
- return tensor
-
- # Generate a random input for inference
- user_id = create_lod_tensor([[1]])
- gender_id = create_lod_tensor([[1]])
- age_id = create_lod_tensor([[0]])
- job_id = create_lod_tensor([[10]])
- movie_id = create_lod_tensor([[783]])
- category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
- movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
- [[0, 5]])
+ inference_program, param_path=params_dirname, place=place)
+
+ # Use the first data from paddle.dataset.movielens.test() as input.
+ # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor,
+ # where `data` is a list of sequences of index numbers, `lod` is
+ # the level of detail (lod) info associated with `data`.
+ # For example, data = [[10, 2, 3], [2, 3]] means that it contains
+ # two sequences of indexes, of length 3 and 2, respectively.
+ # Correspondingly, lod = [[3, 2]] contains one level of detail info,
+ # indicating that `data` consists of two sequences of length 3 and 2.
+ user_id = fluid.create_lod_tensor([[1]], [[1]], place)
+ gender_id = fluid.create_lod_tensor([[1]], [[1]], place)
+ age_id = fluid.create_lod_tensor([[0]], [[1]], place)
+ job_id = fluid.create_lod_tensor([[10]], [[1]], place)
+ movie_id = fluid.create_lod_tensor([[783]], [[1]], place)
+ category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place)
+ movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]],
+ place)
results = inferencer.infer(
{
@@ -253,12 +240,15 @@ def infer(use_cuda, inference_program, save_path):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
- save_path = "recommender_system.inference.model"
- train(use_cuda=use_cuda, train_program=train_program, save_path=save_path)
+ params_dirname = "recommender_system.inference.model"
+ train(
+ use_cuda=use_cuda,
+ train_program=train_program,
+ params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
- save_path=save_path)
+ params_dirname=params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt
index 673c965b662a022739f8d489c331f4de9455a926..d71147a85e77ea6dc5b6391aa169abd9b02a0aa1 100644
--- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt
@@ -1,6 +1,11 @@
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+# This test is buggy
+# py_test(test_understand_sentiment_dynamic_rnn SRCS
+# test_understand_sentiment_dynamic_rnn.py SERIAL)
+LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn)
+
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py
index 7e32696f9909a0a440f6bdc401ac9f9594c4dec7..11e9fd1bec1450f6753dbe38c7014090d6e136b6 100644
--- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py
+++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py
@@ -64,7 +64,7 @@ def train_program(word_dict):
return [avg_cost, accuracy]
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
@@ -85,7 +85,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
trainer.stop()
else:
@@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
@@ -112,13 +112,13 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
- param_path=save_dirname,
+ param_path=params_dirname,
place=place)
# Setup input by creating LoDTensor to represent sequence of words.
@@ -143,9 +143,9 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
- save_path = "understand_sentiment_conv.inference.model"
- train(use_cuda, train_program, save_path)
- infer(use_cuda, inference_program, save_path)
+ params_dirname = "understand_sentiment_conv.inference.model"
+ train(use_cuda, train_program, params_dirname)
+ infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py
index e50b7920b17f86eada3abc700c5403053fca8771..90757d54f99715163518ce5a094e6ba3a67efed3 100644
--- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py
+++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py
@@ -79,7 +79,7 @@ def train_program(word_dict):
return [avg_cost, accuracy]
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
@@ -100,7 +100,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
trainer.stop()
else:
@@ -112,7 +112,7 @@ def train(use_cuda, train_program, save_dirname):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
@@ -127,13 +127,13 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
- param_path=save_dirname,
+ param_path=params_dirname,
place=place)
# Setup input by creating LoDTensor to represent sequence of words.
@@ -158,9 +158,9 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
- save_path = "understand_sentiment_conv.inference.model"
- train(use_cuda, train_program, save_path)
- infer(use_cuda, inference_program, save_path)
+ params_dirname = "understand_sentiment_conv.inference.model"
+ train(use_cuda, train_program, params_dirname)
+ infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py
index d4fb80168814359827708ad921bd3f53b14bb2ee..52b7d4a83779d01936afb3d9d1e4864b05d55b5a 100644
--- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py
+++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py
@@ -71,7 +71,7 @@ def train_program(word_dict):
return [avg_cost, accuracy]
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
@@ -92,7 +92,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
trainer.stop()
else:
@@ -104,7 +104,7 @@ def train(use_cuda, train_program, save_dirname):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
@@ -119,13 +119,13 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['words', 'label'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
- param_path=save_dirname,
+ param_path=params_dirname,
place=place)
# Setup input by creating LoDTensor to represent sequence of words.
@@ -150,9 +150,9 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
- save_path = "understand_sentiment_stacked_lstm.inference.model"
- train(use_cuda, train_program, save_path)
- infer(use_cuda, inference_program, save_path)
+ params_dirname = "understand_sentiment_stacked_lstm.inference.model"
+ train(use_cuda, train_program, params_dirname)
+ infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py
index 16d73d4aff4ba31327e6d8f5ac04a36387f59daa..eeb8e67087334ea96aab9cdb6272e34e2eb99939 100644
--- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py
@@ -80,7 +80,7 @@ def train_program(is_sparse):
return avg_cost
-def train(use_cuda, train_program, save_dirname):
+def train(use_cuda, train_program, params_dirname):
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
test_reader = paddle.batch(
@@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname):
print("loss= ", avg_cost)
if avg_cost < 10.0:
- trainer.save_params(save_dirname)
+ trainer.save_params(params_dirname)
trainer.stop()
if math.isnan(avg_cost):
@@ -115,10 +115,10 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
-def infer(use_cuda, inference_program, save_dirname=None):
+def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
- infer_func=inference_program, param_path=save_dirname, place=place)
+ infer_func=inference_program, param_path=params_dirname, place=place)
# Setup inputs by creating 4 LoDTensors representing 4 words. Here each word
# is simply an index to look up for the corresponding word vector and hence
@@ -153,17 +153,17 @@ def main(use_cuda, is_sparse):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
- save_path = "word2vec.inference.model"
+ params_dirname = "word2vec.inference.model"
train(
use_cuda=use_cuda,
train_program=partial(train_program, is_sparse),
- save_dirname=save_path)
+ params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=partial(inference_program, is_sparse),
- save_dirname=save_path)
+ params_dirname=params_dirname)
if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py
index 7be924f762ddeb045dda890dbfdcd96a65449553..65d6552acc9b3d31a97a45290e4613a633fffa3c 100644
--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -173,63 +173,33 @@ def train(use_cuda, save_dirname, is_local=True):
test_reader = paddle.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
- feeding = {
- 'user_id': 0,
- 'gender_id': 1,
- 'age_id': 2,
- 'job_id': 3,
- 'movie_id': 4,
- 'category_id': 5,
- 'movie_title': 6,
- 'score': 7
- }
-
- def func_feed(feeding, data):
- feed_tensors = {}
- for (key, idx) in feeding.iteritems():
- tensor = fluid.LoDTensor()
- if key != "category_id" and key != "movie_title":
- if key == "score":
- numpy_data = np.array(map(lambda x: x[idx], data)).astype(
- "float32")
- else:
- numpy_data = np.array(map(lambda x: x[idx], data)).astype(
- "int64")
- else:
- numpy_data = map(lambda x: np.array(x[idx]).astype("int64"),
- data)
- lod_info = [len(item) for item in numpy_data]
- offset = 0
- lod = [offset]
- for item in lod_info:
- offset += item
- lod.append(offset)
- numpy_data = np.concatenate(numpy_data, axis=0)
- tensor.set_lod([lod])
-
- numpy_data = numpy_data.reshape([numpy_data.shape[0], 1])
- tensor.set(numpy_data, place)
- feed_tensors[key] = tensor
- return feed_tensors
+ feed_order = [
+ 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id',
+ 'movie_title', 'score'
+ ]
def train_loop(main_program):
exe.run(framework.default_startup_program())
+ feed_list = [
+ main_program.global_block().var(var_name) for var_name in feed_order
+ ]
+ feeder = fluid.DataFeeder(feed_list, place)
+
PASS_NUM = 100
for pass_id in range(PASS_NUM):
for batch_id, data in enumerate(train_reader()):
# train a mini-batch
outs = exe.run(program=main_program,
- feed=func_feed(feeding, data),
+ feed=feeder.feed(data),
fetch_list=[avg_cost])
out = np.array(outs[0])
if (batch_id + 1) % 10 == 0:
avg_cost_set = []
for test_data in test_reader():
- avg_cost_np = exe.run(
- program=test_program,
- feed=func_feed(feeding, test_data),
- fetch_list=[avg_cost])
+ avg_cost_np = exe.run(program=test_program,
+ feed=feeder.feed(test_data),
+ fetch_list=[avg_cost])
avg_cost_set.append(avg_cost_np[0])
break # test only 1 segment for speeding up CI
@@ -279,23 +249,6 @@ def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
- def create_lod_tensor(data, lod=None):
- tensor = fluid.LoDTensor()
- if lod is None:
- # Tensor, the shape is [batch_size, 1]
- index = 0
- lod_0 = [index]
- for l in range(len(data)):
- index += 1
- lod_0.append(index)
- lod = [lod_0]
- tensor.set_lod(lod)
-
- flattened_data = np.concatenate(data, axis=0).astype("int64")
- flattened_data = flattened_data.reshape([len(flattened_data), 1])
- tensor.set(flattened_data, place)
- return tensor
-
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc,
@@ -307,26 +260,33 @@ def infer(use_cuda, save_dirname=None):
# Use the first data from paddle.dataset.movielens.test() as input
assert feed_target_names[0] == "user_id"
- user_id = create_lod_tensor([[1]])
+ # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor
+ # where `data` is a list of sequences of index numbers, `lod` is
+ # the level of detail (lod) info associated with `data`.
+ # For example, data = [[10, 2, 3], [2, 3]] means that it contains
+ # two sequences of indexes, of length 3 and 2, respectively.
+ # Correspondingly, lod = [[3, 2]] contains one level of detail info,
+ # indicating that `data` consists of two sequences of length 3 and 2.
+ user_id = fluid.create_lod_tensor([[1]], [[1]], place)
assert feed_target_names[1] == "gender_id"
- gender_id = create_lod_tensor([[1]])
+ gender_id = fluid.create_lod_tensor([[1]], [[1]], place)
assert feed_target_names[2] == "age_id"
- age_id = create_lod_tensor([[0]])
+ age_id = fluid.create_lod_tensor([[0]], [[1]], place)
assert feed_target_names[3] == "job_id"
- job_id = create_lod_tensor([[10]])
+ job_id = fluid.create_lod_tensor([[10]], [[1]], place)
assert feed_target_names[4] == "movie_id"
- movie_id = create_lod_tensor([[783]])
+ movie_id = fluid.create_lod_tensor([[783]], [[1]], place)
assert feed_target_names[5] == "category_id"
- category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
+ category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place)
assert feed_target_names[6] == "movie_title"
- movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
- [[0, 5]])
+ movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]],
+ [[5]], place)
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py
index b11131456a1f87419407c4d8626ebcde26dd7640..013d72f418cf7ac11eb31fd221052039e896e203 100644
--- a/python/paddle/fluid/tests/test_lod_tensor.py
+++ b/python/paddle/fluid/tests/test_lod_tensor.py
@@ -53,11 +53,14 @@ class TestLoDTensor(unittest.TestCase):
self.assertEqual(_convert_lod(lod), converted_lod)
def test_create_lod_tensor(self):
- # Only numpy array or a fluid LoDTensor is valid input to
- # create_lod_tensor function, currently a list of lists is not.
- data = [[1, 2], [3, 4]]
- self.assertRaises(Exception, create_lod_tensor, data, [],
+ # Create LoDTensor from a list
+ data = [[1, 2, 3], [3, 4]]
+ wrong_lod = [[2, 2]]
+ correct_lod = [[3, 2]]
+ self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod,
fluid.CPUPlace())
+ tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace())
+ self.assertEqual(tensor.lod(), [[0, 3, 5]])
# Create LoDTensor from numpy array
data = numpy.random.random([10, 1])
diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py
index 709b4bf2fcfb180c747ba3539711a58a57e3b77f..b611470fa1ff326df960c349b71006f52d586d8e 100644
--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -479,9 +479,9 @@ class OpTest(unittest.TestCase):
def np_dtype_to_fluid_dtype(input):
"""Change the dtype of float16 numpy array
- numpy float16 is binded to paddle::platform::float16
+ numpy float16 is binded to paddle::platform::float16
in tensor_py.h via the help of uint16 data type since
- the internal memory representation of float16 is
+ the internal memory representation of float16 is
uint16_t in paddle and np.uint16 in numpy, which are
themselves binded together by pybind.
@@ -489,9 +489,9 @@ class OpTest(unittest.TestCase):
input: input numpy array
Returns:
- input: The dtype of input will be changed to np.uint16 if
+ input: The dtype of input will be changed to np.uint16 if
it is originally np.float16, such that the internal memory
- of input will be reinterpreted as of dtype np.uint16.
+ of input will be reinterpreted as of dtype np.uint16.
"""
if input.dtype == np.float16:
input.dtype = np.uint16
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index c44ac59ccdb7fa212ab2a8ab83ee0c70fc498f9f..60dc1f83fc32e2551eb2a04ef35f1c8a0ffec769 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -369,11 +369,13 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(output)
print(str(program))
- def test_bilinear_interp(self):
+ def test_upsampling_bilinear2d(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[3, 9, 6], dtype="float32")
- output = layers.bilinear_interp(x, 12, 12)
+ output = layers.upsampling_bilinear2d(x, out_shape=[12, 12])
+ self.assertIsNotNone(output)
+ output = layers.upsampling_bilinear2d(x, scale=3)
self.assertIsNotNone(output)
print(str(program))
diff --git a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..2105d320665367e3ec1bfd7b3a353a144c91244f
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+def PolygonBoxRestore(input):
+ shape = input.shape
+ batch_size = shape[0]
+ geo_channels = shape[1]
+ h = shape[2]
+ w = shape[3]
+ h_indexes = np.array(range(h) * w).reshape(
+ [w, h]).transpose()[np.newaxis, :] # [1, h, w]
+ w_indexes = np.array(range(w) * h).reshape(
+ [h, w])[np.newaxis, :] # [1, h, w]
+ indexes = np.concatenate(
+ (w_indexes, h_indexes))[np.newaxis, :] # [1, 2, h, w]
+ indexes = indexes.repeat(
+ [geo_channels / 2],
+ axis=0)[np.newaxis, :] # [1, geo_channels/2, 2, h, w]
+ indexes = indexes.repeat(
+ [batch_size], axis=0) # [batch_size, geo_channels/2, 2, h, w]
+ return indexes.reshape(
+ input.shape) - input # [batch_size, geo_channels, h, w]
+
+
+class TestPolygonBoxRestoreOp(OpTest):
+ def config(self):
+ self.input_shape = (1, 8, 2, 2)
+
+ def setUp(self):
+ self.config()
+ self.op_type = "polygon_box_transform"
+ input = np.random.random(self.input_shape).astype("float32")
+ self.inputs = {'Input': input}
+ output = PolygonBoxRestore(input)
+ self.outputs = {'Output': output}
+
+ def test_check_output(self):
+ self.check_output()
+
+
+class TestCase1(TestPolygonBoxRestoreOp):
+ def config(self):
+ self.input_shape = (2, 10, 3, 2)
+
+
+class TestCase2(TestPolygonBoxRestoreOp):
+ def config(self):
+ self.input_shape = (3, 12, 4, 5)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_preprocessor.py b/python/paddle/fluid/tests/unittests/test_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbf1a7e0c50a87cd43507ffdb94109873cf4e5d9
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_preprocessor.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+
+import paddle.fluid as fluid
+import paddle.v2 as paddle
+import paddle.v2.dataset.mnist as mnist
+
+
+class TestPreprocessor(unittest.TestCase):
+ def setUp(self):
+ with fluid.program_guard(fluid.Program(), fluid.Program()):
+ reader = paddle.batch(mnist.train(), batch_size=32)
+ feeder = fluid.DataFeeder(
+ feed_list=[ # order is image and label
+ fluid.layers.data(
+ name='image', shape=[784]),
+ fluid.layers.data(
+ name='label', shape=[1], dtype='int64'),
+ ],
+ place=fluid.CPUPlace())
+ self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file(
+ './mnist_for_preprocessor_test.recordio', reader, feeder)
+
+ def test_main(self):
+ N = 10
+
+ img_expected_res = []
+ lbl_expected_res = []
+ with fluid.program_guard(fluid.Program(), fluid.Program()):
+ data_file = fluid.layers.io.open_recordio_file(
+ './mnist_for_preprocessor_test.recordio',
+ shapes=[[-1, 784], [-1, 1]],
+ lod_levels=[0, 0],
+ dtypes=['float32', 'int64'])
+ img, lbl = fluid.layers.io.read_file(data_file)
+
+ if fluid.core.is_compiled_with_cuda():
+ place = fluid.CUDAPlace(0)
+ else:
+ place = fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+ for _ in range(N):
+ img_v, lbl_v = exe.run(fetch_list=[img, lbl])
+ img_expected_res.append(img_v / 2)
+ lbl_expected_res.append(lbl_v + 1)
+
+ img_actual_res = []
+ lbl_actual_res = []
+ with fluid.program_guard(fluid.Program(), fluid.Program()):
+ data_file = fluid.layers.io.open_recordio_file(
+ './mnist_for_preprocessor_test.recordio',
+ shapes=[[-1, 784], [-1, 1]],
+ lod_levels=[0, 0],
+ dtypes=['float32', 'int64'])
+ preprocessor = fluid.layers.io.Preprocessor(reader=data_file)
+ with preprocessor.block():
+ img, lbl = preprocessor.inputs()
+ img_out = img / 2
+ lbl_out = lbl + 1
+ preprocessor.outputs(img_out, lbl_out)
+
+ data_file = fluid.layers.io.double_buffer(preprocessor())
+ img, lbl = fluid.layers.io.read_file(data_file)
+
+ if fluid.core.is_compiled_with_cuda():
+ place = fluid.CUDAPlace(0)
+ else:
+ place = fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+ for _ in range(N):
+ img_v, lbl_v = exe.run(fetch_list=[img, lbl])
+ img_actual_res.append(img_v)
+ lbl_actual_res.append(lbl_v)
+
+ for idx in range(N):
+ np.allclose(img_expected_res[idx], img_actual_res[idx])
+ np.allclose(lbl_expected_res[idx], lbl_actual_res[idx])