diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6140340890c0e5025eb08209e8ea78df918b4dc0..eeda759ff18ccb86ce6a585fe41cb972ea3ae295 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,14 @@ repos: entry: bash ./tools/codestyle/cpplint_pre_commit.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$ +- repo: local + hooks: + - id: pylint-doc-string + name: pylint + description: Check python docstring style using docstring_checker. + entry: bash ./tools/codestyle/pylint_pre_commit.hook + language: system + files: \.(py)$ - repo: https://github.com/PaddlePaddle/pre-commit-golang sha: 8337620115c25ff8333f1b1a493bd031049bd7c0 hooks: diff --git a/.travis.yml b/.travis.yml index 3391e2c3cab9938c9dc5705b51367c707d3bbe9d..8c772030925dcad3909f142b08e4d8057a3f89b7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,8 @@ env: addons: ssh_known_hosts: 13.229.163.131 before_install: + # For pylint dockstring checker + - sudo pip install pylint pytest astroid isort - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: diff --git a/CMakeLists.txt b/CMakeLists.txt index 710b4774ca021c2e916460e7253d4fbf979a38cc..cfaab206e1f321a55119d4a8d65c4a99d3819fff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,7 +57,10 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) +option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) +option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) +option(WITH_CONTRIB "Compile the third-party contributation" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -202,7 +205,7 @@ endif(USE_NNPACK) add_subdirectory(proto) -if(NOT MOBILE_INFERENCE) +if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY) # "add_subdirectory(go)" should be placed after the following loine, # because it depends on paddle/optimizer. add_subdirectory(paddle/optimizer) @@ -230,3 +233,7 @@ if(WITH_DOC) find_python_module(recommonmark REQUIRED) add_subdirectory(doc) endif() + +if (WITH_CONTRIB) + add_subdirectory(paddle/contrib) +endif() diff --git a/Dockerfile b/Dockerfile index ea39efd00bb5c0a7deb3f6d57083d83a673b883c..80a96983ec1ca6b9ec440f7e95de6c328eb1ed40 100644 --- a/Dockerfile +++ b/Dockerfile @@ -79,6 +79,9 @@ RUN pip install pre-commit 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ pip install opencv-python +#For docstring checker +RUN pip install pylint pytest astroid isort + COPY ./python/requirements.txt /root/ RUN pip install -r /root/requirements.txt @@ -101,6 +104,3 @@ RUN echo 'root:root' | chpasswd RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config EXPOSE 22 - -# development image default do build work -CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"] diff --git a/Dockerfile.android b/Dockerfile.android index 848a7eba6f1421432addae8acff407b611adb4ae..48db2efea21a648657e3f490c95429b9a29ede52 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -40,5 +40,3 @@ RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \ unzip -q android-ndk-r14b-linux-x86_64.zip && \ mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ rm -rf /opt/android-ndk-tmp - -CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"] diff --git a/benchmark/cluster/README.md b/benchmark/cluster/README.md deleted file mode 100644 index 64816098a524f064ec12474a736cd4c721227a70..0000000000000000000000000000000000000000 --- a/benchmark/cluster/README.md +++ /dev/null @@ -1,196 +0,0 @@ -# Cluster Training Benchmark - -## Setup - -- Platform - - Kubernetes: v1.6.2 - - Linux Kernel: v3.10.0 - -- Resource - - CPU: 10 Cores per Pod - - Memory: 5GB per Pod - -- Docker Image - - We use different base Docker Image to run the benchmark on Kubernetes: - - PaddlePaddle v2: paddlepaddle/paddle:0.11.0 - - PaddlePaddle Fluid: paddlepaddle/paddle:[commit-id] - - TensorFlow: tensorflow/tensorflow:1.5.0-rc0 - -- Model - vgg16 is used in this benchmark. - -## Cases - -- Variable - - Batch Size of training data. - - PServer count of the training job. - - The number of trainers. - -- Invariant - - The resource of trainer/pserver Pod. - -### Measure the Performance for Different Batch Size - -- PServer Count: 40 -- Trainer Count: 100 -- Metrics: mini-batch / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Batch Size 3264128 256
PaddlePaddle Fluid-- - -
PaddlePaddle v2 - - - -
TensorFlow - - - -
- -### Measure the Performance for Different PServer Count - -- Trainer Count: 100 -- Batch Size: 64 -- Metrics: mini-batch / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PServer Count 102040 60
PaddlePaddle Fluid-- - -
PaddlePaddle v2 - - - -
TensorFlow - - - -
- -### Measure Parallel Efficiency By Increasing Trainer Count - -- PServer Count: 20 -- Batch Size: 64 -- Metrics: - -$S = \div(T1, TN)$ - -which S is the ratio of T1 over TN, training time of 1 and N trainers. -The parallel efficiency is: - -$E = \div(S, N)$ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Trainer Counter 11020 30405060 708090100
PaddlePaddle Fluid-- - - -- - - -- -
PaddlePaddle v2 - - - - -- - - -- -
TensorFlow - - - - -- - - -- -
- - -## Reproduce the benchmark - -TODO diff --git a/benchmark/cluster/vgg16/Dockerfile b/benchmark/cluster/vgg16/Dockerfile deleted file mode 100644 index 13ad8e1b6237e6f41a076c4fb54311728832ae33..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04 - -# you can get mirror list here: -# https://launchpad.net/ubuntu/+archivemirrors -ARG UBUNTU_MIRROR -RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' - -RUN apt-get update && apt-get install -y python python-dev python-pip iputils-ping libgtk2.0-dev -RUN pip install -U kubernetes opencv-python - -RUN pip install paddlepaddle -# if network is slowly, you may need to add proxy here. -# ENV https_proxy= -RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python' -RUN pip uninstall -y paddlepaddle -# unset proxy if it is setted. -# ENV https_proxy="" - -# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF, -# so we must build one with distribute support to install in this image. -ADD *.whl / -RUN pip install /*.whl && rm -f /*.whl -ENV LD_LIBRARY_PATH=/usr/local/lib - -# tf k8s -RUN pip install tensorflow==1.4.0 -ADD tf_k8s /usr/bin -RUN chmod +x /usr/bin/tf_k8s -ADD vgg16_tf.py /workspace/ - -# below lines may change a lot for debugging -ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin -ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root -RUN chmod +x /usr/bin/paddle_k8s -ADD vgg16_fluid.py vgg16_v2.py /workspace/ diff --git a/benchmark/cluster/vgg16/README.md b/benchmark/cluster/vgg16/README.md deleted file mode 100644 index d56a912b9b03986e32693363f82df05a34b779e9..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/README.md +++ /dev/null @@ -1,195 +0,0 @@ -# Performance for Distributed vgg16 - -## Test Result - -### Hardware Infomation - -- CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz -- cpu MHz : 2101.000 -- cache size : 20480 KB - -### Blas settings - -Setting environment variable: `MKL_NUM_THREADS=1`. - -### Single Node Single Thread - -- Metrics: samples / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Batch Size 3264128 256
PaddlePaddle Fluid 15.44 16.32 16.74 16.79
PaddlePaddle v2 15.97 17.04 17.60 17.83
TensorFlow 9.09 9.10 9.24 8.66
- - -### Different Batch Size - -- PServer Count: 10 -- Trainer Count: 20 -- Metrics: samples / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Batch Size 3264128 256
PaddlePaddle Fluid 190.20 222.15 247.40 258.18
PaddlePaddle v2 170.96 233.71 256.14 329.23
TensorFlow - - - -
- -### Accelerate Rate - -- Pserver Count: 20 -- Batch Size: 128 -- Metrics: samples / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Trainer Count 204080100
PaddlePaddle Fluid 263.29 (78.64%) 518.80 (77.47%) 836.26 (62.44%) 1019.29 (60.89%)
PaddlePaddle v2 (need more tests) 326.85 (92.85%) 534.58 (75.93%) 853.30 (60.60%) 1041.99 (59.20%)
TensorFlow - - - -
- - -### Different Pserver Count - -- Trainer Count: 60 -- Batch Size: 128 -- Metrics: samples/ sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PServer Count 361020
PaddlePaddle Fluid(should fix in next PR) 589.1 592.6 656.4 655.8
PaddlePaddle v2 (need more tests) 593.4 791.3 729.7 821.7
TensorFlow - - - -
- - -*The performance gap between Fuild and v2 comes from the network interference.* - - -## Steps to Run the Performance Test - -1. You must re-compile PaddlePaddle and enable `-DWITH_DISTRIBUTE` to build PaddlePaddle with distributed support. -1. When the build finishes, copy the output `whl` package located under `build/python/dist` to current directory. -1. Run `docker build -t [image:tag] .` to build the docker image and run `docker push [image:tag]` to push the image to reponsitory so kubernetes can find it. -1. Run `kubectl create -f pserver.yaml && kubectl create -f trainer.yaml` to start the job on your kubernetes cluster (you must configure the `kubectl` client before this step). -1. Run `kubectl get po` to get running pods, and run `kubectl logs [podID]` to fetch the pod log of pservers and trainers. - -Check the logs for the distributed training progress and analyze the performance. - -## Enable Verbos Logs - -Edit `pserver.yaml` and `trainer.yaml` and add an environment variable `GLOG_v=3` and `GLOG_logtostderr=1` to see what happend in detail. diff --git a/benchmark/cluster/vgg16/fluid_pserver.yaml b/benchmark/cluster/vgg16/fluid_pserver.yaml deleted file mode 100644 index ee8b0763b62fc011f40f6197e929a68b48a93e47..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/fluid_pserver.yaml +++ /dev/null @@ -1,72 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: ReplicaSet -metadata: - name: vgg16job-pserver -spec: - replicas: 10 - template: - metadata: - labels: - paddle-job-pserver: vgg16job - spec: - hostNetwork: true - imagePullSecrets: - - name: job-registry-secret - containers: - - name: pserver - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PADDLE_JOB_NAME - value: vgg16job - - name: MKL_NUM_THREADS - value: "1" - - name: TRAINING_ROLE - value: "PSERVER" - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "1" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - command: ["paddle_k8s", "start_fluid"] - resources: - requests: - memory: 10Gi - cpu: 4 - limits: - memory: 10Gi - cpu: 4 diff --git a/benchmark/cluster/vgg16/fluid_trainer.yaml b/benchmark/cluster/vgg16/fluid_trainer.yaml deleted file mode 100644 index 3d56caac009464d1073423bb63abff1f8b0cf28f..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/fluid_trainer.yaml +++ /dev/null @@ -1,69 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: vgg16job-trainer -spec: - parallelism: 20 - completions: 20 - template: - metadata: - labels: - paddle-job: vgg16job - spec: - imagePullSecrets: - - name: job-registry-secret - hostNetwork: true - containers: - - name: trainer - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - command: ["paddle_k8s", "start_fluid"] - env: - - name: PADDLE_JOB_NAME - value: vgg16job - - name: TRAINING_ROLE - value: "TRAINER" - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0 --batch_size 128" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "1" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - resources: - requests: - memory: 40Gi - cpu: 2 - limits: - memory: 40Gi - cpu: 2 - restartPolicy: Never diff --git a/benchmark/cluster/vgg16/run_vgg_dist.sh b/benchmark/cluster/vgg16/run_vgg_dist.sh deleted file mode 100644 index 8c0501439e9d5fa175f5aa9b62d286e690a10904..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/run_vgg_dist.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Update to point to the source file. -VGG_SRC="vgg16_fluid.py" - -export TRAINING_ROLE=PSERVER -export TRAINERS=2 -export POD_IP=127.0.0.1 -export PADDLE_INIT_PORT=6174 -MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 & - -# Need to wait for the ps to start first. -sleep 10 -echo "done start ps" - -export TRAINING_ROLE=TRAINER -export TRAINERS=2 -export POD_IP=127.0.0.1 -export PADDLE_INIT_PORT=6174 -CUDA_VISIBLE_DEVICES=4 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=0 & -CUDA_VISIBLE_DEVICES=5 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=1 & diff --git a/benchmark/cluster/vgg16/tf_k8s b/benchmark/cluster/vgg16/tf_k8s deleted file mode 100644 index 4fc263d5f681aeabfa71f1758714d269d987b272..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/tf_k8s +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash -check_trainer_ret() { - ret=$1 - stdbuf -oL echo "job returned $ret...setting pod return message..." - stdbuf -oL echo "===============================" - - if [ $ret -eq 136 ] ; then - echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log - elif [ $ret -eq 139 ] ; then - echo "Segmentation Fault" > /dev/termination-log - elif [ $ret -eq 1 ] ; then - echo "General Error" > /dev/termination-log - elif [ $ret -eq 134 ] ; then - echo "Program Abort" > /dev/termination-log - fi - stdbuf -oL echo "termination log wroted..." - exit $ret -} - -g_pservers="" -g_trainers="" - -wait_running_pods(){ - pserver_label="tf-job-pserver=${JOB_NAME}" - trainer_label="tf-job-trainer=${JOB_NAME}" - - stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS_NUM} - stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS_NUM} - - g_pservers=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PORT}) - g_trainers=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PORT}) -} - -start_tf_pserver(){ - wait_running_pods - - label="tf-job-pserver=${JOB_NAME}" - pserver_id=$(python /root/k8s_tools.py fetch_id ${label}) - - cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \ - --job_name=${TF_JOB_NAME} --task_index=${pserver_id}" - - stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}" -} - -start_tf_trainer(){ - wait_running_pods - - label="tf-job-trainer=${JOB_NAME}" - trainer_id=$(python /root/k8s_tools.py fetch_id ${label}) - - cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \ - --job_name=${TF_JOB_NAME} --task_index=${trainer_id} --batch_size=${BATCH_SIZE}" - - stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}" - check_trainer_ret $? -} - -start_tf(){ - if [[ "${TF_JOB_NAME}" == "worker" ]]; then - start_tf_trainer - else - start_tf_pserver - fi -} - -usage() { - echo "usage: tf_k8s []:" - echo " start_tf Start tensorflow jobs" -} - -case "$1" in - start_tf) - start_tf - ;; - --help) - usage - ;; - *) - usage - ;; -esac diff --git a/benchmark/cluster/vgg16/tf_pserver.yaml b/benchmark/cluster/vgg16/tf_pserver.yaml deleted file mode 100644 index 5e37c700819119c8af05c40fe4b8d13911efc3e1..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/tf_pserver.yaml +++ /dev/null @@ -1,56 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: ReplicaSet -metadata: - name: vgg16job-tf-pserver -spec: - replicas: 10 - template: - metadata: - labels: - tf-job-pserver: vgg16job-tf - spec: - hostNetwork: true - imagePullSecrets: - - name: job-registry-secret - containers: - - name: pserver - image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16" - imagePullPolicy: Always - command: ["tf_k8s", "start_tf"] - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PORT - value: "32036" - - name: ENTRY - value: "python vgg16_tf.py" - - name: JOB_NAME - value: vgg16job-tf - - name: PSERVERS_NUM - value: "10" - - name: TF_JOB_NAME - value: "ps" - - name: TRAINERS_NUM - value: "20" - - name: BATCH_SIZE - value: "128" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: NUM_PASSES - value: "1" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - resources: - requests: - memory: 10Gi - cpu: 4 - limits: - memory: 10Gi - cpu: 4 diff --git a/benchmark/cluster/vgg16/tf_trainer.yaml b/benchmark/cluster/vgg16/tf_trainer.yaml deleted file mode 100644 index 08795df3addfa7b618db24a65e57be190e268f06..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/tf_trainer.yaml +++ /dev/null @@ -1,58 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: vgg16job-tf-trainer -spec: - parallelism: 20 - completions: 20 - template: - metadata: - labels: - tf-job-trainer: vgg16job-tf - spec: - imagePullSecrets: - - name: job-registry-secret - hostNetwork: true - containers: - - name: trainer - image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16" - imagePullPolicy: Always - command: ["tf_k8s", "start_tf"] - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PORT - value: "32036" - - name: JOB_NAME - value: vgg16job-tf - - name: TF_JOB_NAME - value: "worker" - - name: ENTRY - value: "python vgg16_tf.py" - - name: PSERVERS_NUM - value: "10" - - name: BATCH_SIZE - value: "128" - - name: TRAINERS_NUM - value: "20" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: NUM_PASSES - value: "1" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - resources: - requests: - memory: 40Gi - cpu: 2 - limits: - memory: 40Gi - cpu: 2 - restartPolicy: Never diff --git a/benchmark/cluster/vgg16/v2_pserver.yaml b/benchmark/cluster/vgg16/v2_pserver.yaml deleted file mode 100644 index dd1271e0cf399184134c06b3200ee1202c65cef0..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/v2_pserver.yaml +++ /dev/null @@ -1,64 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: ReplicaSet -metadata: - name: vgg16v2job-pserver -spec: - replicas: 10 - template: - metadata: - labels: - paddle-job-pserver: vgg16v2job - spec: - hostNetwork: true - imagePullSecrets: - - name: job-registry-secret - containers: - - name: pserver - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PADDLE_JOB_NAME - value: vgg16v2job - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "python train.py" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "1" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - command: ["paddle_k8s", "start_pserver"] - resources: - requests: - memory: 10Gi - cpu: 4 - limits: - memory: 10Gi - cpu: 4 diff --git a/benchmark/cluster/vgg16/v2_trainer.yaml b/benchmark/cluster/vgg16/v2_trainer.yaml deleted file mode 100644 index 12c8964066cbcfe8d2a44de2f51a3d12ea422fe2..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/v2_trainer.yaml +++ /dev/null @@ -1,65 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: vgg16v2job-trainer -spec: - parallelism: 20 - completions: 20 - template: - metadata: - labels: - paddle-job: vgg16v2job - spec: - imagePullSecrets: - - name: job-registry-secret - hostNetwork: true - containers: - - name: trainer - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - command: ["paddle_k8s", "start_trainer", "v2"] - env: - - name: PADDLE_JOB_NAME - value: vgg16v2job - - name: BATCH_SIZE - value: "256" - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "cd /workspace && MKL_NUM_THREADS=1 python /workspace/vgg16_v2.py" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "2" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - resources: - requests: - memory: 40Gi - cpu: 2 - limits: - memory: 40Gi - cpu: 2 - restartPolicy: Never diff --git a/benchmark/cluster/vgg16/vgg16_fluid.py b/benchmark/cluster/vgg16/vgg16_fluid.py deleted file mode 100644 index e9360ab4c79d23bdf9f84d0c0d407af6d39bde3e..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/vgg16_fluid.py +++ /dev/null @@ -1,312 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""VGG16 benchmark in Fluid""" -from __future__ import print_function - -import sys -import time -import numpy as np -import paddle.v2 as paddle -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.profiler as profiler -import argparse -import functools -import os -from paddle.fluid import debuger - - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - '--batch_size', type=int, default=16, help="Batch size for training.") -parser.add_argument( - '--learning_rate', - type=float, - default=1e-3, - help="Learning rate for training.") -parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.") -parser.add_argument( - '--device', - type=str, - default='CPU', - choices=['CPU', 'GPU'], - help="The device type.") -parser.add_argument('--device_id', type=int, default=0, help="The device id.") -parser.add_argument( - '--data_format', - type=str, - default='NCHW', - choices=['NCHW', 'NHWC'], - help='The data order, now only support NCHW.') -parser.add_argument( - '--data_set', - type=str, - default='flowers', - choices=['cifar10', 'flowers'], - help='Optional dataset for benchmark.') -parser.add_argument( - '--local', - type=str2bool, - default=True, - help='Whether to run as local mode.') - -parser.add_argument( - "--ps_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--trainer_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--profile", action='store_true', help="If set, profile a few steps.") - -# Flags for defining the tf.train.Server -parser.add_argument( - "--task_index", type=int, default=0, help="Index of task within the job") -args = parser.parse_args() - - -def vgg16_bn_drop(input): - def conv_block(input, num_filter, groups, dropouts): - return fluid.nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max') - - conv1 = conv_block(input, 64, 2, [0.3, 0]) - conv2 = conv_block(conv1, 128, 2, [0.4, 0]) - conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) - conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) - conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - - drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) - fc1 = fluid.layers.fc(input=drop, size=4096, act=None) - bn = fluid.layers.batch_norm(input=fc1, act='relu') - drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) - fc2 = fluid.layers.fc(input=drop2, size=4096, act=None) - return fc2 - - -def main(): - if args.data_set == "cifar10": - classdim = 10 - if args.data_format == 'NCHW': - data_shape = [3, 32, 32] - else: - data_shape = [32, 32, 3] - else: - classdim = 102 - if args.data_format == 'NCHW': - data_shape = [3, 224, 224] - else: - data_shape = [224, 224, 3] - - # Input data - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - - # Train program - net = vgg16_bn_drop(images) - predict = fluid.layers.fc(input=net, size=classdim, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(x=cost) - - # Evaluator - batch_size = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size) - - # inference program - inference_program = fluid.default_main_program().clone() - with fluid.program_guard(inference_program): - inference_program = fluid.io.get_inference_program(batch_acc) - - # Optimization - optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) - optimize_ops, params_grads = optimizer.minimize(avg_cost) - - # Initialize executor - place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace( - args.device_id) - exe = fluid.Executor(place) - - # test - def test(exe): - test_pass_acc = fluid.average.WeightedAverage() - for batch_id, data in enumerate(test_reader()): - img_data = np.array(map(lambda x: x[0].reshape(data_shape), - data)).astype("float32") - y_data = np.array(map(lambda x: x[1], data)).astype("int64") - y_data = y_data.reshape([-1, 1]) - - outs = exe.run(inference_program, - feed={"pixel": img_data, - "label": y_data}, - fetch_list=[batch_acc, batch_size]) - test_pass_acc.add(value=np.array(outs[0]), weight=np.array(outs[1])) - - return test_pass_acc.eval() - - def train_loop(exe, trainer_prog): - iters = 0 - ts = time.time() - train_pass_acc = fluid.average.WeightedAverage() - for pass_id in range(args.num_passes): - # train - start_time = time.time() - num_samples = 0 - train_pass_acc.reset() - - def run_step(batch_id, data): - img_data = np.array( - map(lambda x: x[0].reshape(data_shape), data)).astype( - "float32") - y_data = np.array(map(lambda x: x[1], data)).astype("int64") - y_data = y_data.reshape([-1, 1]) - - loss, acc, b_size = exe.run( - trainer_prog, - feed={"pixel": img_data, - "label": y_data}, - fetch_list=[avg_cost, batch_acc, batch_size]) - return loss, acc, b_size - - if args.profile: - with profiler.profiler('All', 'total', - '/tmp/profile_vgg_%d' % args.task_index): - for batch_id, data in enumerate(train_reader()): - if batch_id > 5: break - run_step(batch_id, data) - - total_time = 0.0 - count = 0 - for batch_id, data in enumerate(train_reader()): - ts = time.time() - loss, acc, b_size = run_step(batch_id, data) - iters += 1 - num_samples += len(data) - train_pass_acc.add(value=acc, weight=b_size) - - duration = time.time() - ts - total_time += duration - count += len(data) - print( - "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, " - "Speed = %.2f (%.2f) img/s" % (pass_id, iters, loss, acc, - len(data) / duration, - count / total_time) - ) # The accuracy is the accumulation of batches, but not the current batch. - - pass_elapsed = time.time() - start_time - pass_train_acc = train_pass_acc.eval() - pass_test_acc = test(exe) - print("Task:%d Pass = %d, Training performance = %f imgs/s, " - "Train accuracy = %f, Test accuracy = %f\n" % - (args.task_index, pass_id, num_samples / pass_elapsed, - pass_train_acc, pass_test_acc)) - - if args.local: - # Parameter initialization - exe.run(fluid.default_startup_program()) - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() if args.data_set == 'cifar10' - else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.dataset.cifar.test10() - if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), - batch_size=args.batch_size) - train_loop(exe, fluid.default_main_program()) - else: - trainers = int(os.getenv("TRAINERS")) # total trainer count - print("trainers total: ", trainers) - - training_role = os.getenv( - "TRAINING_ROLE", - "TRAINER") # get the training role: trainer/pserver - - t = fluid.DistributeTranspiler() - t.transpile( - trainer_id=args.task_index, - pservers=args.ps_hosts, - trainers=trainers) - - if training_role == "PSERVER": - current_endpoint = os.getenv("POD_IP") + ":" + os.getenv( - "PADDLE_INIT_PORT") - if not current_endpoint: - print("need env SERVER_ENDPOINT") - exit(1) - pserver_prog = t.get_pserver_program(current_endpoint) - pserver_startup = t.get_startup_program(current_endpoint, - pserver_prog) - exe.run(pserver_startup) - exe.run(pserver_prog) - elif training_role == "TRAINER": - # Parameter initialization - exe.run(fluid.default_startup_program()) - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() if args.data_set == 'cifar10' - else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else - paddle.dataset.flowers.test(), - batch_size=args.batch_size) - - trainer_prog = t.get_trainer_program() - feeder = fluid.DataFeeder(feed_list=[images, label], place=place) - # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver - exe.run(fluid.default_startup_program()) - train_loop(exe, trainer_prog) - else: - print("environment var TRAINER_ROLE should be TRAINER os PSERVER") - - -def print_arguments(): - print('----------- Configuration Arguments -----------') - for arg, value in sorted(vars(args).iteritems()): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -if __name__ == "__main__": - print_arguments() - main() diff --git a/benchmark/cluster/vgg16/vgg16_tf.py b/benchmark/cluster/vgg16/vgg16_tf.py deleted file mode 100644 index 2d220478acae46566760209dbc012cff316946aa..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/vgg16_tf.py +++ /dev/null @@ -1,366 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""VGG16 benchmark in TensorFlow -You can get distribution example template structure here: -https://medium.com/clusterone/how-to-write-distributed-tensorflow-code-with-an-example-on-tensorport-70bf3306adcb -https://www.tensorflow.org/deploy/distributed -""" - -import tensorflow as tf -import paddle.v2 as paddle -import numpy as np -import argparse -import time - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - '--batch_size', type=int, default=128, help="Batch size for training.") -parser.add_argument( - '--learning_rate', - type=float, - default=1e-3, - help="Learning rate for training.") -parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.") -parser.add_argument( - '--device', - type=str, - default='CPU', - choices=['CPU', 'GPU'], - help="The device type.") -parser.add_argument( - '--data_format', - type=str, - default='NHWC', - choices=['NCHW', 'NHWC'], - help='The data order, NCHW=[batch, channels, height, width].' - 'Only support NHWC right now.') -parser.add_argument( - '--data_set', - type=str, - default='cifar10', - choices=['cifar10', 'flowers'], - help='Optional dataset for benchmark.') - -parser.add_argument( - "--ps_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--worker_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--job_name", type=str, default="", help="One of 'worker', 'ps'") -# Flags for defining the tf.train.Server -parser.add_argument( - "--task_index", type=int, default=0, help="Index of task within the job") - -args = parser.parse_args() - - -class VGG16Model(object): - def __init__(self): - self.parameters = [] - - def batch_norm_relu(self, inputs, is_training): - """Performs a batch normalization followed by a ReLU.""" - # We set fused=True for a significant speed boost. See - # https://www.tensorflow.org/speed/speed_guide#common_fused_ops - inputs = tf.layers.batch_normalization( - inputs=inputs, - axis=1 if args.data_format == 'NCHW' else -1, - momentum=0.9, - epsilon=1e-05, - center=True, - scale=True, - training=is_training, - fused=True) - inputs = tf.nn.relu(inputs) - return inputs - - def conv_bn_layer(self, - name, - images, - kernel_shape, - is_training, - drop_rate=0.0): - with tf.name_scope(name) as scope: - kernel = tf.Variable( - tf.truncated_normal( - kernel_shape, dtype=tf.float32, stddev=1e-1), - name='weights') - conv = tf.nn.conv2d( - images, - kernel, [1, 1, 1, 1], - data_format=args.data_format, - padding='SAME') - biases = tf.Variable( - tf.constant( - 0.0, shape=[kernel_shape[-1]], dtype=tf.float32), - trainable=True, - name='biases') - out = tf.nn.bias_add(conv, biases) - out = self.batch_norm_relu(out, is_training) - out = tf.layers.dropout(out, rate=drop_rate, training=is_training) - return out - - def fc_layer(self, name, inputs, shape): - with tf.name_scope(name) as scope: - fc_w = tf.Variable( - tf.truncated_normal( - shape, dtype=tf.float32, stddev=1e-1), - name='weights') - fc_b = tf.Variable( - tf.constant( - 0.0, shape=[shape[-1]], dtype=tf.float32), - trainable=True, - name='biases') - out = tf.nn.bias_add(tf.matmul(inputs, fc_w), fc_b) - return out - - def network(self, images, class_dim, is_training): - """ VGG16 model structure. - - TODO(kuke): enable this network to support the 'NCHW' data format - """ - - # conv1 - conv1_1 = self.conv_bn_layer( - 'conv1_1', images, [3, 3, 3, 64], is_training, drop_rate=0.3) - conv1_2 = self.conv_bn_layer( - 'conv1_2', conv1_1, [3, 3, 64, 64], is_training, drop_rate=0.0) - # pool1 - pool1 = tf.nn.max_pool( - conv1_2, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool1') - # conv2 - conv2_1 = self.conv_bn_layer( - 'conv2_1', pool1, [3, 3, 64, 128], is_training, drop_rate=0.4) - conv2_2 = self.conv_bn_layer( - 'conv2_2', conv2_1, [3, 3, 128, 128], is_training, drop_rate=0.0) - # pool2 - pool2 = tf.nn.max_pool( - conv2_2, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool2') - # conv3 - conv3_1 = self.conv_bn_layer( - 'conv3_1', pool2, [3, 3, 128, 256], is_training, drop_rate=0.4) - conv3_2 = self.conv_bn_layer( - 'conv3_2', conv3_1, [3, 3, 256, 256], is_training, drop_rate=0.4) - conv3_3 = self.conv_bn_layer( - 'conv3_3', conv3_2, [3, 3, 256, 256], is_training, drop_rate=0.0) - # pool3 - pool3 = tf.nn.max_pool( - conv3_3, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool3') - # conv4 - conv4_1 = self.conv_bn_layer( - 'conv4_1', pool3, [3, 3, 256, 512], is_training, drop_rate=0.4) - conv4_2 = self.conv_bn_layer( - 'conv4_2', conv4_1, [3, 3, 512, 512], is_training, drop_rate=0.4) - conv4_3 = self.conv_bn_layer( - 'conv4_3', conv4_2, [3, 3, 512, 512], is_training, drop_rate=0.0) - # pool4 - pool4 = tf.nn.max_pool( - conv4_3, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool4') - # conv5 - conv5_1 = self.conv_bn_layer( - 'conv5_1', pool4, [3, 3, 512, 512], is_training, drop_rate=0.4) - conv5_2 = self.conv_bn_layer( - 'conv5_2', conv5_1, [3, 3, 512, 512], is_training, drop_rate=0.4) - conv5_3 = self.conv_bn_layer( - 'conv5_3', conv5_2, [3, 3, 512, 512], is_training, drop_rate=0.0) - # pool5 - pool5 = tf.nn.max_pool( - conv5_3, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool4') - # flatten - shape = int(np.prod(pool5.get_shape()[1:])) - pool5_flat = tf.reshape(pool5, [-1, shape]) - # fc1 - drop = tf.layers.dropout(pool5_flat, rate=0.5, training=is_training) - fc1 = self.fc_layer('fc1', drop, [shape, 512]) - # fc2 - bn = self.batch_norm_relu(fc1, is_training) - drop = tf.layers.dropout(bn, rate=0.5, training=is_training) - fc2 = self.fc_layer('fc2', drop, [512, 512]) - - fc3 = self.fc_layer('fc3', fc2, [512, class_dim]) - - return fc3 - - -def run_benchmark(cluster_spec, server): - """Run benchmark on cifar10 or flowers.""" - - if args.data_set == "cifar10": - class_dim = 10 - raw_shape = (3, 32, 32) - dat_shape = (None, 32, 32, 3) if args.data_format == 'NHWC' else ( - None, 3, 32, 32) - else: - class_dim = 102 - raw_shape = (3, 224, 224) - dat_shape = (None, 224, 224, 3) if args.data_format == 'NHWC' else ( - None, 3, 224, 224) - - device = tf.train.replica_device_setter( - worker_device="/job:worker/task:{}".format(args.task_index), - cluster=cluster_spec) - - with tf.device(device): - images = tf.placeholder(tf.float32, shape=dat_shape) - labels = tf.placeholder(tf.int64, shape=(None, )) - is_training = tf.placeholder('bool') - onehot_labels = tf.one_hot(labels, depth=class_dim) - - vgg16 = VGG16Model() - logits = vgg16.network(images, class_dim, is_training) - loss = tf.losses.softmax_cross_entropy( - onehot_labels=onehot_labels, logits=logits) - avg_loss = tf.reduce_mean(loss) - - correct = tf.equal(tf.argmax(logits, 1), labels) - accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) - - optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - global_step = tf.Variable(0, name='global_step', trainable=False) - with tf.control_dependencies(update_ops): - train_op = optimizer.minimize(avg_loss, global_step=global_step) - - summary_op = tf.summary.merge_all() - init_op = tf.global_variables_initializer() - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() - if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.test10() - if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), - buf_size=5120), - batch_size=args.batch_size) - - # test - def test(): - test_accs = [] - for batch_id, data in enumerate(test_reader()): - test_images = np.array( - map(lambda x: np.transpose(x[0].reshape(raw_shape), - axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32") - test_labels = np.array(map(lambda x: x[1], data)).astype('int64') - test_accs.append( - accuracy.eval(feed_dict={ - images: test_images, - labels: test_labels, - is_training: False - })) - return np.mean(test_accs) - - config = tf.ConfigProto( - intra_op_parallelism_threads=1, - inter_op_parallelism_threads=1, - log_device_placement=True) - config.gpu_options.allow_growth = True - - hooks = [tf.train.StopAtStepHook(last_step=1000000)] - - with tf.train.MonitoredTrainingSession( - master=server.target, - is_chief=(args.task_index == 0), - hooks=hooks, - config=config) as sess: - iters, num_samples, start_time = 0, 0, 0.0 - for pass_id in range(args.num_passes): - # train - num_samples = 0 - start_time = time.time() - for batch_id, data in enumerate(train_reader()): - train_images = np.array( - map(lambda x: np.transpose(x[0].reshape(raw_shape), - axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32") - train_labels = np.array(map(lambda x: x[1], data)).astype( - 'int64') - iter_begin_time = time.time() - _, loss, acc = sess.run([train_op, avg_loss, accuracy], - feed_dict={ - images: train_images, - labels: train_labels, - is_training: True - }) - iters += 1 - print( - "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed=%.2f imgs/sec" - % (pass_id, iters, loss, acc, - len(data) / (time.time() - iter_begin_time))) - num_samples += len(data) - train_elapsed = time.time() - start_time - # test - pass_test_acc = test() - print("Pass = %d, Train speed = %f imgs/s, Test accuracy = %f\n" % - (pass_id, num_samples / train_elapsed, pass_test_acc)) - - -def print_arguments(): - print('----------- Configuration Arguments -----------') - for arg, value in sorted(vars(args).iteritems()): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -if __name__ == '__main__': - print_arguments() - - ps_hosts = args.ps_hosts.split(",") - worker_hosts = args.worker_hosts.split(",") - - # Create a cluster from the parameter server and worker hosts. - cluster_spec = tf.train.ClusterSpec({ - "ps": ps_hosts, - "worker": worker_hosts - }) - - # Create and start a server for the local task. - server = tf.train.Server( - cluster_spec, job_name=args.job_name, task_index=args.task_index) - - if args.job_name == "ps": - print("start pserver") - server.join() - elif args.job_name == "worker": - print("start worker") - run_benchmark(cluster_spec, server) diff --git a/benchmark/cluster/vgg16/vgg16_v2.py b/benchmark/cluster/vgg16/vgg16_v2.py deleted file mode 100644 index 1a66af32d7131997c63bd3c3042875f33a467084..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/vgg16_v2.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import gzip - -import paddle.v2.dataset.cifar as cifar -import paddle.v2 as paddle -import time -import os - -DATA_DIM = 3 * 32 * 32 -CLASS_DIM = 10 -BATCH_SIZE = os.getenv("BATCH_SIZE") -if BATCH_SIZE: - BATCH_SIZE = int(BATCH_SIZE) -else: - BATCH_SIZE = 128 -print "batch_size", BATCH_SIZE -NODE_COUNT = int(os.getenv("TRAINERS")) -ts = 0 - - -def vgg(input, nums, class_dim): - def conv_block(input, num_filter, groups, num_channels=None): - return paddle.networks.img_conv_group( - input=input, - num_channels=num_channels, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act=paddle.activation.Relu(), - pool_type=paddle.pooling.Max()) - - assert len(nums) == 5 - # the channel of input feature is 3 - conv1 = conv_block(input, 64, nums[0], 3) - conv2 = conv_block(conv1, 128, nums[1]) - conv3 = conv_block(conv2, 256, nums[2]) - conv4 = conv_block(conv3, 512, nums[3]) - conv5 = conv_block(conv4, 512, nums[4]) - - fc_dim = 512 - fc1 = paddle.layer.fc(input=conv5, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - fc2 = paddle.layer.fc(input=fc1, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - out = paddle.layer.fc(input=fc2, - size=class_dim, - act=paddle.activation.Softmax()) - return out - - -def vgg13(input, class_dim): - nums = [2, 2, 2, 2, 2] - return vgg(input, nums, class_dim) - - -def vgg16(input, class_dim): - nums = [2, 2, 3, 3, 3] - return vgg(input, nums, class_dim) - - -def vgg19(input, class_dim): - nums = [2, 2, 4, 4, 4] - return vgg(input, nums, class_dim) - - -def main(): - global ts - paddle.init(use_gpu=False) - image = paddle.layer.data( - name="image", type=paddle.data_type.dense_vector(DATA_DIM)) - lbl = paddle.layer.data( - name="label", type=paddle.data_type.integer_value(CLASS_DIM)) - - extra_layers = None - # NOTE: for v2 distributed training need averaging updates. - learning_rate = 1e-3 / NODE_COUNT - out = vgg16(image, class_dim=CLASS_DIM) - cost = paddle.layer.classification_cost(input=out, label=lbl) - - # Create parameters - parameters = paddle.parameters.create(cost) - - # Create optimizer - optimizer = paddle.optimizer.Momentum( - momentum=0.9, - regularization=paddle.optimizer.L2Regularization(rate=0.0005 * - BATCH_SIZE), - learning_rate=learning_rate / BATCH_SIZE, - learning_rate_decay_a=0.1, - learning_rate_decay_b=128000 * 35, - learning_rate_schedule="discexp", ) - - train_reader = paddle.batch( - paddle.reader.shuffle( - cifar.train10(), - # To use other data, replace the above line with: - # reader.train_reader('train.list'), - buf_size=1000), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - cifar.test10(), - # To use other data, replace the above line with: - # reader.test_reader('val.list'), - batch_size=BATCH_SIZE) - - # Create trainer - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=optimizer, - extra_layers=extra_layers, - is_local=False) - - # End batch and end pass event handler - def event_handler(event): - global ts, ts_pass - if isinstance(event, paddle.event.BeginPass): - ts_pass = time.time() - if isinstance(event, paddle.event.BeginIteration): - ts = time.time() - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 1 == 0: - print "\nPass %d, Batch %d, Cost %f, %s, spent: %f" % ( - event.pass_id, event.batch_id, event.cost, event.metrics, - time.time() - ts) - if isinstance(event, paddle.event.EndPass): - print "Pass %d end, spent: %f" % (event.pass_id, - time.time() - ts_pass) - result = trainer.test(reader=test_reader) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) - - trainer.train( - reader=train_reader, num_passes=200, event_handler=event_handler) - - -if __name__ == '__main__': - main() diff --git a/benchmark/fluid/README.md b/benchmark/fluid/README.md index 0fc02b704362f79f2219252538b4b3195e665b2c..065df2edb8d3152ab0891798628653d3b283f1df 100644 --- a/benchmark/fluid/README.md +++ b/benchmark/fluid/README.md @@ -24,22 +24,22 @@ Currently supported `--model` argument include: * Run the following command to start a benchmark job locally: ```bash - python fluid_benchmark.py --model mnist --parallel 1 --device GPU --with_test + python fluid_benchmark.py --model mnist --device GPU ``` You can choose to use GPU/CPU training. With GPU training, you can specify - `--parallel 1` to run multi GPU training. + `--gpus ` to run multi GPU training. * Run distributed training with parameter servers: * start parameter servers: ```bash - PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver + PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver ``` * start trainers: ```bash - PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver + PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver ``` * Run distributed training using NCCL2 ```bash - PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method nccl2 + PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2 ``` ## Run Distributed Benchmark on Kubernetes Cluster @@ -48,7 +48,7 @@ We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submi distributed benchmark jobs to your cluster. To generate a job yaml, just run: ```bash -python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver --with_test" --disttype pserver +python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver " --disttype pserver ``` Then the yaml files are generated under directory `myjob`, you can run: diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index 1d8f27440d0f1438e0520684ee3e90e8a5891a17..30b070e4acac60caa97a4e8ffd07462cb347ee93 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -94,6 +94,10 @@ def parse_args(): '--memory_optimize', action='store_true', help='If set, optimize runtime memory before start.') + parser.add_argument( + '--use_fake_data', + action='store_true', + help='If set ommit the actual read data operators.') parser.add_argument( '--update_method', type=str, @@ -198,6 +202,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, exe.run(train_prog) return + if args.use_fake_data: + raise Exception( + "fake data is not supported in single GPU test for now.") + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_prog) @@ -244,7 +252,31 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, args, train_prog, startup_prog, nccl_id_var, num_trainers, trainer_id): + feed_var_list = [ + var for var in train_prog.global_block().vars.itervalues() + if var.is_data + ] + # generate fake: + if args.use_fake_data: + for var in feed_var_list: + v = startup_prog.global_block().clone_variable(var) + var.persistable = True + v.persistable = True + + real_shape = list(var.shape) + real_shape[0] = args.batch_size / args.gpus + startup_prog.global_block().append_op( + outputs={"Out": v}, + type="fill_constant", + attrs={"shape": real_shape, + "value": 1.0, + "dtype": var.dtype}) + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) + if nccl_id_var and trainer_id == 0: + #FIXME(wuyi): wait other trainer to start listening + time.sleep(30) + startup_exe = fluid.Executor(place) startup_exe.run(startup_prog) strategy = fluid.ExecutionStrategy() @@ -256,10 +288,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, exec_strategy=strategy, num_trainers=num_trainers, trainer_id=trainer_id) - feed_var_list = [ - var for var in train_prog.global_block().vars.itervalues() - if var.is_data - ] + feeder = fluid.DataFeeder(feed_var_list, place) for pass_id in range(args.pass_num): num_samples = 0 @@ -271,7 +300,10 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, num_samples = 0 if iters == args.iterations: break - loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) + if args.use_fake_data: + loss, = exe.run([avg_loss.name]) + else: + loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) if args.update_method == "pserver": exe.bcast_params() num_samples += len(data) diff --git a/benchmark/fluid/kube_gen_job.py b/benchmark/fluid/kube_gen_job.py index 3dbb4b8c5dd13657f8d1853003b321ad047e1349..39ba207fd96f71563504017e77dc0e87c249b3f8 100644 --- a/benchmark/fluid/kube_gen_job.py +++ b/benchmark/fluid/kube_gen_job.py @@ -112,6 +112,7 @@ def gen_job(): envs.append({"name": "PSERVERS", "value": str(args.pservers)}) envs.append({"name": "ENTRY", "value": args.entry}) envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)}) + envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) # NOTE: these directories below are cluster specific, please modify # this settings before you run on your own cluster. envs.append({ diff --git a/benchmark/fluid/kube_templates/__init__.py b/benchmark/fluid/kube_templates/__init__.py index b64a7f78ff10d03987ea4a8c13a0e34bb433f64c..2d09d940a5ee638e4b55405d05924e2d76006cfc 100644 --- a/benchmark/fluid/kube_templates/__init__.py +++ b/benchmark/fluid/kube_templates/__init__.py @@ -54,5 +54,13 @@ envs = [ "fieldPath": "status.podIP" } } + }, + { + "name": "PADDLE_CURRENT_IP", + "valueFrom": { + "fieldRef": { + "fieldPath": "status.podIP" + } + } } ] diff --git a/cmake/configure.cmake b/cmake/configure.cmake index e490397cc0624c310949a4b571bd00cac6e8953b..682614742cf1bd3130c638020a2545e16226d4d6 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS) add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) endif(USE_EIGEN_FOR_BLAS) +if(EIGEN_USE_THREADS) + add_definitions(-DEIGEN_USE_THREADS) +endif(EIGEN_USE_THREADS) + if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 0fde4373a4be58e71ff1a305bd4991cc554d7a34..2665996432b1f6681927320a85d6835094abe4cd 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -212,6 +212,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake ${OPTIONAL_ARGS} -Dprotobuf_BUILD_TESTS=OFF + -DCMAKE_SKIP_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst index 91449042fcdfd48c95f3dd3babf958c5d572e747..f53da4d194f8d2428b4121fa1bb31f3fc95a9f64 100644 --- a/doc/fluid/api/layers.rst +++ b/doc/fluid/api/layers.rst @@ -1003,9 +1003,9 @@ dice_loss .. autofunction:: paddle.fluid.layers.dice_loss :noindex: -bilinear_interp +upsampling_bilinear2d ____ -.. autofunction:: paddle.fluid.layers.bilinear_interp +.. autofunction:: paddle.fluid.layers.upsampling_bilinear2d :noindex: diff --git a/doc/fluid/getstarted/Developer's_Guide_to_Paddle_Fluid.md b/doc/fluid/getstarted/Developer's_Guide_to_Paddle_Fluid.md new file mode 100644 index 0000000000000000000000000000000000000000..0c0156c8e46378e7bbeea8072938b8ccfb9ab6d7 --- /dev/null +++ b/doc/fluid/getstarted/Developer's_Guide_to_Paddle_Fluid.md @@ -0,0 +1,1819 @@ + +# Paddle Fluid 开发者指南 + +--- + +### ==1==. 为什么需要 PaddlePaddle Fluid? + +--- + +### 两个基础问题 + + + +1. 如何描述机器学习模型和优化过程? + - 完备自洽,表达能力足以支持潜在出现的各种计算需求 +1. 如何充分利用资源高效计算? + - 支持异步设备、多卡、分布式计算 + - 降低计算/计算优化的开发成本 + - …… + + + +--- + +### 如何描述模型和优化过程? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
一组连续执行的layersvariable和operator构成的计算图 不再有模型的概念
2013 Caffe,Theano, Torch, PaddlePaddle
2015 TensorFlow, MxNet, Caffe2, ONNX, n-graph
2016 PyTorch, TensorFlow Eager Execution, **==PaddlePaddle Fluid==**
+ +--- + + +###

目标

+ + + +- 提高对各类机器学习任务的描述能力:能够描述潜在出现的任意机器学习模型。 +- 代码结构逻辑清晰,各模块充分解耦:内外部贡献者能够专注于自己所需的功能模块,基于框架进行再次开发。 +- 从设计上,留下技术优化的空间和潜力。 +- 代码解耦后降低多设备支持、计算优化等的开发成本。 +- 在统一的设计理念下,实现自动可伸缩,自动容错的分布式计算。 + + + +--- + +## ==2.== Design Overview + +--- + +# Fluid: 系统形态 + +- [编译器式的执行流程,区分编译时和运行时](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/motivation/fluid_compiler.md) +
+ +

+ +

+ +--- + +#### 让我们在Fluid程序实例中,区分编译时和运行时 + +--- +### Fluid 编译时 + + + +- ==**定义前向计算**== + + ```python + x = fluid.layers.data(name='x',shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(x=cost) + ``` + +- ==**添加反向、正则、优化**== + ```python + learning_rate = 0.01 + sgd_optimizer = fluid.optimizer.SGD(learning_rate) + sgd_optimizer.minimize(avg_cost) + ``` + + +--- + +### `Program` vs. 计算图 + + + +- 在科学计算领域,计算图是一种描述计算的经典方式。下图展示了从前向计算图(蓝色)开始,通过添加反向(红色)和优化算法相关(绿色)操作,构建出整个计算图的过程: +- +

+ +

+ + +- Fluid ==使用`Program`而不是计算图==来描述模型和优化过程。`Program`由`Block`、`Operator`和`Variable`构成,相关概念会在后文详细展开。 +- 编译时 Fluid 接受前向计算(这里可以先简单的理解为是一段有序的计算流)`Program`,为这段前向计算按照:前向 -> 反向 -> 梯度 clip -> 正则 -> 优化 的顺序,添加相关 `Operator`和`Variable`到`Program`到完整的计算。 + +
+ +--- + +### Fluid 运行时 + + + +- ==**读入数据**== + + ```python + train_reader = paddle.batch( + paddle.reader.shuffle(paddle.dataset.uci_housing.train(), buf_size=500), + batch_size=20) + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + ``` +- ==**定义执行程序的设备**== + ```python + place = fluid.CPUPlace() + feeder = fluid.DataFeeder(place=place,feed_list=[x, y]) + ``` + +- ==创建执行器(Executor),执行初始化 `Program`和训练`Program`== + + ```python + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + avg_loss_value, = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost]) + print(avg_loss_value) + ``` + + +--- + +### 总结:框架做什么?用户做什么? +
+ + + + + + + + + + + + + + + + +
构建训练执行训练
+用户:描述前向运算
框架:添加反向运算
框架:添加优化运算
框架:添加内存优化
框架:添加并行/多设备/分布式相关的计算单元 +
+框架:创建Operator(计算)+ Variable(数据)
框架:创建`Block`
框架:内存管理/设备管理
框架:执行计算 +
+
+ +--- + +###

总结:编译时

+ + +**用户编写一段Python程序,描述模型的前向计算** +1. 创建变量描述 `VarDesc` +1. 创建operators的描述 `OpDesc` +1. 创建operators的属性 +1. 推断变量的类型和形状,进行静态检查:`inferShape` +1. 规划变量的内存复用 +1. 创建反向计算 +1. 添加优化相关的Operators +1. (可选)添加多卡/多机相关的Operator,生成在多卡/多机上运行的程序 + + + +--- + +###

总结:运行时

+ + +**执行规划好的计算** +1. 创建`Executor` +1. 为将要执行的一段计算,在层级式的`Scope`空间中创建`Scope` +1. 创建`Block`,依次执行`Block` + +

+
+ Figure. 编译时运行时概览 +

+ +
+ +--- + +## ==3==. 用户如何描述计算? +--- + +### Fluid:==像写程序一样==定义计算 + + +- 顺序执行 + ```python + x = fluid.layers.data(name='x',shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + ``` + +- 条件分支: [swith](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/execution/switch.md)、[ifelse](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/execution/if_else_op.md) + + ```python + a = fluid.Var(10) + b = fluid.Var(0) + + switch = fluid.switch() + with switch.block(): + with switch.case(fluid.less_equal(a, 10)): + fluid.print("Case 1") + with switch.case(fluid.larger(a, 0)): + fluid.print("Case 2") + with switch.default(): + fluid.print("Case 3") + ``` + +>[A Lisp cond form may be compared to a continued if-then-else as found in many algebraic programming languages](https://www.cs.cmu.edu/Groups/AI/html/cltl/clm/node84.html). + + + +--- + +### Fluid: ==像写程序一样==定义计算 + + + +- 循环:[while](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_machine_translation.py#L105) + + ```python + d0 = layers.data("d0", shape=[10], dtype='float32') + data_array = layers.array_write(x=d0, i=i) + array_len = layers.fill_constant(shape=[1],dtype='int64', value=3) + + cond = layers.less_than(x=i, y=array_len) + while_op = layers.While(cond=cond) + with while_op.block(): + d = layers.array_read(array=data_array, i=i) + i = layers.increment(x=i, in_place=True) + layers.array_write(result, i=i, array=d) + layers.less_than(x=i, y=array_len, cond=cond) + ``` + +- 完整实例请点查看 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/unittests/test_while_op.py#L36-L44) +- beam search [->]( https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_machine_translation.py#L105) + + + +--- + +####

总结

+ + + +1. 用户层提供的描述语法具有完备性、自洽性,有能力支持对复杂计算过程描述 +1. 使用方式和核心概念可以类比编程语言,认知能够直接迁移 +1. 能够支持:定义问题,逐步求解 + + + +--- + +## ==3.== 核心概念 + +--- +### 编译时概念 :==变量和计算的描述== + + + +- `VarDesc` + `TensorDesc` + `OpDesc` -> `BlockDesc` -> `ProgramDesc` + - https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/framework.proto + +- 什么是 Fluid Program + + - 在Fluid中,一个神经网络任务(训练/预测)被描述为一段`Program` + - `Program`包含对`Variable`(数据)和 `Operator`(对数据的操作)的描述 + - `Variable` 和 `Operator` 被组织为多个可以嵌套的`Block`,构成一段完整的`Fluid Program` + + +>编译阶段最终,经过 Transpiler 的执行规划,变换处理,生成使用`protobuf`序列化后的`ProgramDesc`。可以发送给多卡或者网络中的其它计算节点执行 + + + +--- + +### 编译时概念 :==**[Transpiler](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/motivation/fluid_compiler.md)**== + + +1. 接受一段`ProgramDesc`作为输入,生成一段新的`ProgramDesc` + + - *Memory optimization transpiler*:向原始`ProgramDesc` 中插入 `FreeMemoryOps`,在一次迭代优化结束前提前释放内存,使得能够维持较小的 memory footprint + + - *Distributed training transpiler*:将原始的`ProgramDesc`中转化为对应的分布式版本,生成两段新的`ProgramDesc`: + 1. trainer进程执行的`ProgramDesc` + 1. parameter server执行的`ProgramDesc` + +1. ==**WIP**==: 接受一段`ProgramDesc`,生成可直接被`gcc`, `nvcc`, `icc`等编译的代码,编译后得到可执行文件 + + + +--- +### Transplier + +

+ +

+ +--- + +### 打印 `ProgramDesc` + +

+ +

+ + + +- `default_startup_program`:创建可学习参数,对参数进行初始化 +- `default_main_program`:由用户定义的模型,包括了前向、反向、优化及所有必要的计算 + +- 打印可读的 `Program` + ```python + from paddle.v2.fluid import debuger + print debuger.pprint_program_codes(framework.default_main_program().desc) + ``` + + +--- +### 输出效果 + + + + + + + + + + + + + + +
variable in block 0variable in block 0
+
+ +--- + +### 运行时概念 + + + +- 数据相关 + - `Tensor` / `LoDTensor` / `Variable` + - `Scope` + +- 计算相关 + - `Block` + - `Kernel`、`OpWithKernel`、`OpWithoutKernel` + + + + + + + + + + + + + + + + + + + + + + + + + + + +
protobuf messagesC++ class objects
Data[VarDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/framework.proto#L107) +[Variable](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/variable.h#L24) +
Operation[OpDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/framework.proto#L35) +[Operator](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/operator.h#L64) +
BlockBlockDesc +Block +
+ +- 执行相关 :`Executor` + +
+ +--- +#### Tensor 和 LoD(Level-of-Detail) Tensor + + +- Tensor 是$n$-dimensional arry的推广,LoDTensor是在Tensor基础上附加了序列信息 +- Fluid中输入、输出,网络中的可学习参数全部统一使用LoDTensor(n-dimension array)表示 +- 一个mini-batch输入数据是一个LoDTensor + - 在Fluid中,RNN 处理变长序列无需padding,得益于 `LoDTensor`表示 + - 可以简单将 LoD 理解为:`std::vector>` + - 对非序列数据,LoD 信息为空 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TensorFlowPaddlePaddle
RNNSupport +Support +
recursive RNNSupport +Support +
padding zerosMust +No need +
blob data typeTensor +LODTensor +
+ +
+ +--- +#### LoD 信息实例 + + + +

+ +

+ +- 图(a)的LoD 信息 + ```cpp + [0, 5, 8, 10, 14] + ``` +- 图(b)的 LoD 信息 + ```cpp + [[0, 5, 8, 10, 14] /*level=1*/, [0, 2, 3, 5, 7, 8, 10, 13, 14] /*level=2*/] + ``` +
+ +--- +#### Tensor, Variable, Scope 之间的关系 + +

+ +

+ + +1. `Block` 是一个实现层的概念,不在应用层暴露给用户。目前用户无法自行创建并利用`Block`,用户能够感知的只有`Program`这个概念。 +1. 逻辑上,可以将 `Block` 类比为编程语言中的大括号:定义了一段作用域,其中运行一段代码 +1. `Executor`会为每一个`Block`创建一个`Scope`,`Block`是可嵌套的,因此`Scope`也是可嵌套的 + + + +--- +### Executor + + + + + + + + + + + + + + +
接口说明

+ +

输入
1. `ProgramDesc`
2. `Scope`
3.`block_id`

解释执行步骤
1. 创建所有 Variables
2. 逐一创建 Operator 并运行 +
+ +--- +### Operator/OpWithKernel/Kernel + + +

+ +

+ +- operator 无状态,Operator的核心是==Run==方法 +- 一个operator可以注册多个kernel +- operator 可以无 kernel:while_op 、ifelse op + +
+ +--- +#### Fluid Operator vs. PaddlePaddle layers + + + + + + + + + + + + + + + + + + +
LayerOperator

+ +

+ +

1. 内部维护状态
2. 包含forward和backward方法
1. 内部无状态
2. 只有Run方法
+ +
+ +--- + +### ==4.== 内存管理 + +--- +### 目标 + +- 为异构设备提供统一的内存分配、回收接口 +- 最小化管理内存所需的时间,最小化管理开销 +- 减少内存碎片 +- 将内存管理与计算(Operators/Kernels)完全剥离 +- 统一内存管理是内存优化的基础 + +--- + + + +### Memory 接口 + +- 内存管理模块向上层应用逻辑提供三个基础接口: + ```cpp + template + void* Alloc(Place place, size_t size); + + template + void Free(Place place, void* ptr); + + template + size_t Used(Place place); + + struct Usage : public boost::static_visitor { + size_t operator()(const platform::CPUPlace& cpu) const; + size_t operator()(const platform::CUDAPlace& gpu) const; + }; + ``` +- 模板参数 `Place` 指示内存分配发生的设备 +- 实现时,需特化支持的 `Place`, 提供以上三个接口的实现 + + + +--- +### 代码结构 + + + +内存管理模块可以理解为由以下两部分构成: + +1. SystemAllocator:实际从物理设备上分配、释放的内存的接口 +1. BuddyAllocator:内存管理算法 + + + +--- +### System Allocator + + + +- SystemAllocator 是实现物理内存分配、回收的基类 + - 不同设备上的内存分配和回收终将转化为标准接口调用 + - 为不同设备实现MemoryAllocator,继承自SystemAllocator + + ```cpp + class SystemAllocator { + public: + virtual ~SystemAllocator() {} + virtual void* Alloc(size_t& index, size_t size) = 0; + virtual void Free(void* p, size_t size, size_t index) = 0; + virtual bool UseGpu() const = 0; + }; + ``` + + +--- + +### CPU/GPU Allocator + + + +```cpp +class CPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t& index, size_t size); + virtual void Free(void* p, size_t size, size_t index); + virtual bool UseGpu() const; +}; + +#ifdef PADDLE_WITH_CUDA +class GPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t& index, size_t size); + virtual void Free(void* p, size_t size, size_t index); + virtual bool UseGpu() const; + private: + size_t gpu_alloc_size_ = 0; + size_t fallback_alloc_size_ = 0; +}; +#endif +``` +- CPUAllocator和GPUAllocator分别继承自SystemAllocator,分别调用相应的标准库函数实现物理内存的分配和释放。 +- 一旦大块、连续的物理内存分配之后,将通过内存管理算法实现内存的按块分配、回收、重用等。 + + + +--- +### CPU Allocator + + + +- CPU 内存的分配提供两种选项: + 1. non-pinned memory:可分页内存 + 2. pinned memory:页锁定内存 + - 分配过大的页锁定内存有可能因为系统可使用的分页内存减少,影响系统性能,默认CPU下分配的是可分页内存 + +- 通过gflags进行设置一次性分配内存的大小以及是否使用页锁定内存。 + + ```cpp + DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); + DEFINE_double(fraction_of_cpu_memory_to_use, 1, + "Default use 100% of CPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + ``` + + + +--- +### GPU Allocator + + + +- 通过 cudaMalloc 分配GPU显存 +- GPUAllocator::Alloc 首先会计算指定GPU device上的可用显存 + - 如果可用显存小于请求分配大小,调用cudaMalloc进行分配 + - 如果可用显存不足,目前会报错退出。 +- 通过gflags控制GPU下一次性分配显存的大小: + + ```cpp + DEFINE_double(fraction_of_gpu_memory_to_use, 0.92, + "Default use 92% of GPU memory for PaddlePaddle," + "reserve the rest for page tables, etc"); + ``` + + + +--- +#### 内存管理算法: [Buddy Memory Allocation](https://en.wikipedia.org/wiki/Buddy_memory_allocation) + + + +- Memory Arena:一次性分配大块连续内存,之后会基于这块内存进行内存管理:动态分配、释放、重用内存块。 +- 伙伴内存分配: + - 将内存划分为 2 的幂次方个分区,使用 best-fit 方法来分配内存请求。 + - 当释放内存时,检查 buddy 块,查看相邻的内存块是否也已被释放。如果是,将内存块合并,以最小化内存碎片。 + - 分配的内存在物理内存的自然边界对齐,提高内存访问效率。 + - 算法的时间效率高,单使用 best-fit 方法的缘故,会产生一定的内存浪费 + + + +--- + +### Buddy Allocator + + + +- BuddyAllocator 是一个单例,每个设备(如: GPU/CPU(0)/GPU(1)) 拥有一个BuddyAllocator +- BuddyAllocator 内部拥有一个私有成员变量 SystemAllocator +- 当请求的内存超过BuddyAllocator管理的空余内存时,将会调用SystemAllocator去指定的设备上分配物理内存 + + + +--- +### 实例:CPU 下内存管理接口的实现 + + + +- 对上层应用,统一通过BuddyAllocator来实现内存的分配、释放以及用量查询 + ```cpp + template <> + void* Alloc(platform::CPUPlace place, size_t size) { + VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); + void* p = GetCPUBuddyAllocator()->Alloc(size); + VLOG(10) << " pointer=" << p; + return p; + } + + template <> + void Free(platform::CPUPlace place, void* p) { + VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); + GetCPUBuddyAllocator()->Free(p); + } + + template <> + size_t Used(platform::CPUPlace place) { + return GetCPUBuddyAllocator()->Used(); + } + ``` + + +--- +### ==5.== 多设备支持 + +--- +### 多设备支持(一) + + + +- step 1:添加Place类型,由用户实现添加到框架 + - 可以将Place类型理解为一个整数加上一个枚举型,包括:设备号 + 设备类型 + +

+ +

+- DeviceContext + - 不同的Place会对应一个相应的DeviceContext,用于组织管理与设备相关的信息 + - 例如,GpuDeviceContext中会管理Cuda stream + - 目前实现中一些特殊的库也会对应有自己的DeviceContext:例如: + ```cpp + class MKLDNNDeviceContext : public CPUDeviceContext {……} + ``` + - 每种设备对应的DeviceContext需要管理的内容不尽相同,视具体需求来实现 + +
+ +--- + +### 多设备支持(二) + + + +- step 2: 增加KernelType,为相应的KernelType注册Kernel对象,由用户实现注册给框架 可以按照: + 1. Place 执行设备 + 1. DataType 执行数据类型 FP32/FP64/INT32/INT64 + 1. Memory layout: 运行时 Tensor 在内存中的排布格式 NCHW、 NHWC + 1. 使用的库 + + 来区分Kernel,为同一个operator注册多个 Kernel。 + + ```cpp + struct OpKernelType { + proto::DataType data_type_; + DataLayout data_layout_; + platform::Place place_; + LibraryType library_type_; + } + ``` + + + +--- + +### 多设备支持(三) + + + +step 3: 运行时的 KernelType 推断和Kernel切换,按需要修改Kernel推断和Kernel切换规则 +- Expected Kernel:期待调用的Kernel:由(1)`Place`和计算精度决定;或(2)用户在配置中显示指定使用的计算库,如`cudnn`、`mkldnn`等。 +- Actual Kernel:运行时从`Operator`的输入(`Variable`)可以推断出实际需要的`KernelType` +- 当Expected Kernel和Actual Kernel不一致的时候,框架会插入`data_transformer`或者`data_layerout_transform`等,保证Expected Kernel可以执行,包括: + - CPUPlace -> GPUPlace :跨设备内存复制 + - NCHW -> nChw8c :Layout转换 + - FP32 -> FP16 :精度转换 _**尚未支持**_ + - …… +- 以上过程实现在OperatorWithKernel类的Run方法中 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/operator.cc#L497) + + + +--- +## ==6.== while_op + +--- +### while_op + + + +- 循环执行一段`Program`,直到条件operator判断循环条件不满足时终止循环 +- while_op 的特殊之处: + 1. while_op 没有 kernel + 1. while_op 拥有自己的`Block`,会形成一段嵌套的`Block` + 1. ==while_op 内部创建了一个 Executor,来循环执行`Block`== + +- while_op 输入输出 : LoDTensorArray + ```cpp + namespace paddle { + namespace framework { + using LoDTensorArray = std::vector; + } + } + ``` + - 每一次循环,从原始输入中“切出”一个片段 + - LoDTensorArray 在Python端暴露,是Fluid支持的基础数据结构之一,用户可以直接创建并使用 + + + +--- +### while_op [Run](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/while_op.cc#L42) 方法概览 + + + +```cpp + +void Run(const framework::Scope &scope, + const platform::Place &dev_place) const override { + PADDLE_ENFORCE_NOT_NULL(scope.FindVar(Input(kCondition))); + auto &cond = scope.FindVar(Input(kCondition))->Get(); + PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1})); + + framework::Executor executor(dev_place); + auto *block = Attr(kStepBlock); + + auto *program = block->Program(); + auto step_scopes = + scope.FindVar(Output(kStepScopes))->GetMutable(); + + while (cond.data()[0]) { + auto ¤t_scope = scope.NewScope(); + step_scopes->push_back(¤t_scope); + executor.Run(*program, ¤t_scope, block->ID(), + false /*create_local_scope*/); + } +} + +``` + + + +--- +### while_op 的重要应用:Dynamic RNN + +--- + +### 什么是 `dynamicRNN` ? + + +
+ +1. 用户可以自定义在一个时间步之内的计算, 框架接受序列输入数据,在其上循环调用用户定义的单步计算 +1. 可学习参数在多个时间步之间共享 +1. `dynamicRNN` 由 `while_op` 实现 +1. 如果`dynamicRNN`中定义了`memory`,将会构成一个循环神经网络,否则其行为就等于在输入序列上循环调用预定义的单步计算 + +
+ +--- + +#### `dynamic RNN` 用户接口 + + +

+ +

+ +- `dynamicRNN` 中的重要元素 + 1. **step input**: `dynamicRNN` 每个时间步的输入 + 1. **step function**: 用户定义的单步计算 + 1. **memory**: 用于形成循环连接 + 1. **external/static memory**:单步计算的每一步都可以全部读取到的外部输入 + +
+ +--- + +#### dynamicRNN 中的 Memory + + + +`dynamicRNN`中`memory`的行为非常类似于 C++ 中的引用变量 + - `memory` “指向” 一个operator的输出变量,记作: A + - `memory` 可以被 LoDTensor 初始化(当LoD信息为空时,为非序列,否则为序列),默认`memory`被初始化为零 + - `memory` 在 operator A 前向计算之后,进行前向计算 + - 当 `memory` 的前向计算会 "指向" A 的输出 LoDTensor + - `memory` 的输出可以是另一个 operator 的输入,于是形成了“循环”连接 + + + +--- + +### DynamicRNN 实现细节 + + + +- `while_op` 无法独立构成dynamicRNN,必须和一组相关的 operator 及数据结构配合 + - 依赖的 operators (这里仅列出最重要的,并非全部): + - `lod_rank_table` operator + - `lod_tensor_to_array` operator + - `array_to_lod_tensor` operator + - `shrink_memory` operator + - 依赖的数据结构 + - `TensorArray` + - `LoDRankTable` + +- 在Fluid中,RNN接受变长序列输入,无需填充,以上数据结构和相关的operator配合工作,实现了对变长输入以batch计算 + + + +--- + +### `dynamicRNN` 如何实现 batch 计算 ? + + + +- 问题: + - RNN 可以看作是一个展开的前向网络,前向网络的深度是最长序列的长度 + - 如果不对变长序列进行填充,将它们填充到一样长度,每个mini-batch输入将会不等长,每个样本展开长度不一致,导致前向和反向计算实现困难 + + + +---- +##### 实例 :RNN encoder-decoder with attention + + + +- 以机器翻译的RNN encoder-decoder 模型(涉及了`dynamicRNN`的所有设计要素)为例,下图是 RNN encoder-decoder 的原始输入: +

+
Figure. RNN encoder-decoder 原始batch 输入数据 +

+ +- source word sequences 是encoder RNN的输出,是一个LoDTensor +- target word sequences 是look_uptable的输入,是一个LoDTensor +- 上图中一个矩形方块是CPU/GPU内存中一片连续的内存空间,表示一个dense vector + +
+ +--- + +### `dynamicRNN` 如何实现 batch 计算 ? + + + +1. 对一个mini batch中不等长样本进行排序,最长样本变成batch中的第一个,最短样本是batch中最后一个 + - `LoDTensor` -> `LoDRankTable` :heavy_plus_sign: `lod_rank_table operaator` + - 可以将`LoDRankTable`理解为对LoDTensor中的多个序列按照长度排序LoDRankTable 存储了排序之后的index + +2. 构建每个时间步的batch输入:随着时间步增加,每个时间步的batch输入可能会逐渐缩小 + - `TensorArray` :heavy_plus_sign: `lod_tensor_to_array` -> `LoDTensor` (without LoD) +3. 每个时间步输出写入一个输出 `LoDTensorArray` +3. `dynamicRNN`循环结束后, 按照`LoDRankTable`中记录的信息对输出`LoDTensorArray`重排序,还原会原始输入顺序 + - `TensorArray` :heavy_plus_sign: `array_to_lod_tensor` -> `LoDTensor` + + + +--- + +### 运行实例 + +

+ +

+ +--- +### 运行实例 + +

+ +

+ + + +- 执行到第5~7个batch时,batch size将会缩小 + + + +--- +### 运行实例 + +

+ +

+ + + +- 第5 ~ 7个batch时RNN的`memory`会发生什么? + - `memory` 指向某个operator的输出Tensor,在该operator前向计算之后,“取回”其计算结果 + - 5 ~ 7时,遇到了序列的结束,==下一个时间步计算不再需要在已经结束的序列上展开== + - 在`dynamicRNN`中`shrink_memory` operator 用来缩小`memory`的batch输入 + + + +--- +### 运行实例:batch 1 ~ 2 + +

+
Figure. 第1、2个batch输入dynamicRNN的batch输入 +

+ +--- +### 运行实例:batch 3 ~ 4 + +

+
Figure. 第3、4个batch输入dynamicRNN的batch输入 +

+ +--- + +### 运行实例:batch 5 ~ 7 + +

+
Figure. 第5、6、7个batch输入dynamicRNN的batch输入 +

+ +--- +### ==7.== Fluid 代码结构 + +--- +### Fluid 代码结构 + + + + + + + + + + + + + + + +
代码结构模块结构
+

+ +

+
+

+ +

+
+ +--- + +### ==8.== 文档总结 + +--- + + +- 设计概览 + - 重构概览 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/refactorization.md) + - fluid [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/fluid.md) + - fluid_compiler [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/motivation/fluid_compiler.md) +- 核心概念 + - variable 描述 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/var_desc.md) + - Tensor [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/tensor.md) + - LoDTensor [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md) + - TensorArray [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/tensor_array.md) + - Program [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md) + - Block [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.md) + - Scope [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md) + +--- + +- 重要功能模块 + - backward [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/backward.md) + - 内存优化 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/memory_optimization.md) + - evaluator [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/executor.md) + - python API [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md) + - regularization [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/regularization.md) + +- 开发指南 + - 支持新设硬件设备库 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/support_new_device.md) + - 添加新的Operator [->](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_cn.md) + - 添加新的Kernel [->]( +https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_kernel_en.md) + + + +--- + +### ==9.== 开发指南 + +--- + +#### 建议开发环境:使用 Docker 编译和测试 + + + +Docker编译PaddlePaddle源码: [->](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/build_and_install/docker_install_cn.html) + +PaddlePaddle 在 Dockerhub 地址:[->]( + https://hub.docker.com/r/paddlepaddle/paddle/tags/) + +1. 获取PaddlePaddle的Docker镜像 + ```bash + docker pull paddlepaddle/paddle:latest-dev + ``` + +1. 启动 docker container + + ```bash + docker run -it -v $PWD/Paddle:/paddle paddlepaddle/paddle:latest-dev /bin/bash + ``` + +1. 进入docker container后,从源码编译,请参考文档 [->]( http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/build_and_install/build_from_source_cn.html) + + + +--- + +### 一些说明 + + + +1. PaddlePaddle的Docker镜像为了减小体积,默认没有安装vim,可以在容器中执行`apt-get install -y vim`来安装vim。 +1. 开发推荐使用tag为`latest-dev`的镜像,其中打包了所有编译依赖。`latest`及`lastest-gpu`是production镜像,主要用于运行PaddlePaddle程序。 +2. 在Docker中运行GPU程序,推荐使用nvidia-docker,[否则需要将CUDA库和设备挂载到Docker容器内](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/build_and_install/docker_install_cn.html)。 + + + ```bash + nvidia-docker run -it -v $PWD/Paddle:/paddle paddlepaddle/paddle:latest-dev /bin/bash + ``` + + + + + +--- + +### [如何贡献](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/dev/contribute_to_paddle_cn.html) + + + +- ==提交PullRequest前请务必阅读==: [->](http://www.paddlepaddle.org/docs/develop/documentation/fluid/zh/dev/contribute_to_paddle_cn.html) +- 代码要求 + 1. 代码注释遵守 Doxygen 的样式 + 1. 确保编译器选项 WITH_STYLE_CHECK 已打开,并且编译能通过代码样式检查 + 1. 所有代码必须具有单元测试,且能够通过所有单元测试 +- 使用 `pre-commit` 钩子提交Pull Request + 1. 帮助格式化源代码(C++,Python) + 1. 在提交前自动检查一些基本事宜:如每个文件只有一个 EOL,Git 中不要添加大文件等 + 1. 安装pre-commit,并在PaddlePaddle根目录运行: + ```bash + ➜ pip install pre-commit + ➜ pre-commit install + ``` + + +--- + +### 如何贡献 + + + +1. 开始开发之前请先建立issue。 + - 让其它同学知道某项工作已经有人在进行,以避免多人开发同一功能的情况。 +1. 提交PR必须关联相关的issue。做法请参考:[->](https://help.github.com/articles/closing-issues-using-keywords/) + - 目的:为了在提交的版本中留有记录描述这个PR是为了开发什么样的功能,为了解决什么样的问题。 + - 当PR被merge后,关联的issue会被自动关闭。 +1. PR review 中,reviewer的每条comment都必须回复。 + - 如修改完可直接回复:Done。 + - 目的:review comment 中可能会有(1)询问类型的问题;(2)可以在下一个PR修改的问题;(3)comment意见不合理等。需要明确回复,以便reviewer和其他人有历史可查,便于区分是否已经进行修改,或者准备下一个PR修改,或者意见不合理可以不用进行修改。 + + + +--- + +### ==10.== 添加新的 Operator + +--- + +### 概念简介 + + + +添加一个新的operator,会涉及实现以下C++类的派生类: + +1. `framework::OperatorBase`: Operator(简写,Op)基类。 +1. `framework::OpKernel`: Op计算函数的基类,称作Kernel。 +1. `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 +1. `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 + +依据是否包含kernel,可以将Op分为两种: +1. 包含Kernel的Op:继承自OperatorWithKernel,==绝大多数operator都属于这一类== +1. 不包含kernel的Op,继承自OperatorBase,只有少量Op属于这一类,例如while_op,ifelse_op + +这里主要介绍带Kernel的Op如何编写。 + + + +--- + +#### 添加新的Operator需要修改/添加哪些文件? + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
内容定义位置
+OpProtoMake定义 + +`.cc`文件,Backward Op不需要OpProtoMaker +
+Op定义 + +`.cc`文件 +
+Kernel实现 + +CPU、CUDA共享Kernel实现在`.h`文件中,否则,CPU 实现在`.cc`文件中,CUDA 实现在`.cu`文件中。 +
+注册Op + +Op注册实现在`.cc`文件;Kernel注册CPU实现在`.cc`文件中,CUDA实现在`.cu`文件中 +
+ +- 添加 Operator 之前请阅读:[Operator 命名规范](https://github.com/PaddlePaddle/Paddle/blob/63cca04cfd488a4dab6d6273fd04a8017ef45932/doc/fluid/dev/name_convention.md)及[Operator Markdown注释规范](https://github.com/PaddlePaddle/Paddle/blob/63cca04cfd488a4dab6d6273fd04a8017ef45932/doc/fluid/dev/op_markdown_format.md)。 +- 实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 +- 根据文件名自动构建op和Python端绑定,请务必遵守以上命名,否则需要进一步修改PyBind相关文件及CMakeLists.txt。 +
+ +--- + +###### 实现带Kernel的Operator step1: 定义ProtoMaker类 + + + +下面均以[clip_op](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/clip_op.h)为例进行介绍 + +- clip_op计算公式:$Out = \min(\max(X, min), max)$ +- 首先定义`ProtoMaker`来描述该Op的输入、输出,并添加注释(*下面代码段的中注释进行了简化,实现时需按照规范添加注释*): + + ```cpp + template + class ClipOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ClipOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X","(Tensor)The input of clip op."); + AddOutput("Out", "(Tensor),The output of clip op."); + AddAttr( + "min", "(float),Minimum value."); + AddAttr( + "max", "(float),Maximum value."); + AddComment(R"DOC( + …… + )DOC"); + } + }; + ``` + + + +--- + +###### 实现带Kernel的Operator step2: 定义Operator类 + + + +下面的代码段实现了`clip_op`的定义: + +```cpp +class ClipOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of ClipOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of ClipOp should not be null."); + auto x_dims = ctx->GetInputDim("X"); + auto max = ctx->Attrs().Get("max"); + auto min = ctx->Attrs().Get("min"); + PADDLE_ENFORCE_LT(min, max, "max should be greater than min."); + ctx->SetOutputDim("Out", x_dims); + ctx->ShareLoD("X", /*->*/ "Out"); + } +}; +``` + + +--- + +### Operator 类中需要完成的工作 + + + +1. clip_op 继承自`OperatorWithKernel`, + + ```cpp + using framework::OperatorWithKernel::OperatorWithKernel; + ``` + 表示使用基类`OperatorWithKernel`的构造函数。 + +1. 重写`InferShape`接口。 + - `InferShape` 为const函数,不能修改Op的成员变 + - `InferShape` 的参数为 `const framework::InferShapeContext &ctx`,从中可获取到输入输出以及属性 + - `InferShape` 会被调用两次,一次是编译时(创建op),一次是运行时(调用op的`Run`方法时),需要完成以下功能: + 1. 做检查, 尽早报错:检查输入数据维度、类型等是否合法 + 2. 设置输出Tensor的形状 + +通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中。 + + + +--- + +### 补充说明 + + + +1. `InferShape`目前支持两种实现方式,二者最后都会生成一个functor注册给OpInfo结构体。 + 1. 继承framework::InferShapeBase,实现为一个functor(参考 [mul_op](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/mul_op.cc#L22)) + 2. override InferShape函数(参考 [clip_op](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/clip_op.cc#L24)) + +1. 什么是`functor` ? + + - 类或结构体仅重载了`()`,一般是可被多个kernel复用的计算函数。 + + + + ```cpp + template + class CrossEntropyFunctor { + public: + void operator()(const platform::CPUDeviceContext& ctx, + framework::Tensor* out, + const framework::Tensor* prob, + const framework::Tensor* labels, const bool softLabel) { + …… + } + }; + ``` + + + - 在 clip_op 内也会看到将一段计算函数抽象为functor的使用法: [->](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/operators/clip_op.h#L27)。 + + + +--- + +###### 实现带Kernel的Operator step3: 定义OpKernel类 + + + +- `ClipKernel`继承自`framework::OpKernel`,带有下面两个模板参数: + 1. `typename DeviceContext`: 表示设备类型,不同设备共享同一个Kernel时,需添加该模板参数。不共享时,需要提供针对不同设备的特化实现。 + 1. `typename T` : 表示支持的数据类型,如`float`, `double`等 + +- 在`ClipKernel`类中重写`Compute`方法 + 1. `Compute`接受输入参数:`const framework::ExecutionContext& context` + - `ExecutionContext` 是从 `Scope`中将运行时Op的输入、输出`Variable`组织在一起,使得Op在调用`Compute`方法时,能够简单地通过名字拿到需要的输入输出`Variable` + - 与`InferShapeContext`相比,`ExecutionContext` 中增加了设备类型 + 1. 在`Compute`函数里实现`OpKernel`的具体计算逻辑 + + + +--- +#### ClipKernel 代码概览 + + + +```cpp +template +class ClipKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto max = context.Attr("max"); + auto min = context.Attr("min"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); + T* out_data = out->mutable_data(context.GetPlace()); + const T* x_data = x->data(); + int64_t numel = x->numel(); + Transform trans; + trans(context.template device_context(), x_data, + x_data + numel, out_data, ClipFunctor(min, max)); + } +}; +``` + +- 为了使`OpKernel`的计算过程书写更加简单,并且CPU、CUDA的代码可以复用, Fluid 使用 Eigen 作为基础的矩阵运算库 +- Fluid对Eigen unsupported Tensor提供了一些基本的封装,可以在`Compute`接口中直接调用 + - 关于在PaddlePaddle中如何使用Eigen库,请参考[使用文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/use_eigen_cn.md)。 + + + +--- +###### 实现带Kernel的Operator step4: 实现反向Op + + + +- ==**反向Op没有`ProtoMaker`**==,除此之外定义与实现方式前向Op完全一致,不再赘述 +- 这里仅对反向Op的输入输出进行说明: + 1. 反向Op的输入 + - 前向Op的输出 + - 反向传播过程中传递给当前Op的梯度 + - 需要注意,Fluid中,不区分Cost Op和中间层Op,所有Op都必须正确处理接收到的梯度 + 2. 反向Op的输出 + - 对可学习参数的求导结果 + - 对所有输入的求导结果 + + + + +--- + +###### 实现带Kernel的Operator step5: 注册Op及Kernel + + + +至此Op和Op kernel都已经实现完毕,接下来,需要在`.cc`和`cu`文件中注册op和kernel + +1. 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 + + + + ```cpp + namespace ops = paddle::operators; + REGISTER_OP(clip, ops::ClipOp, ops::ClipOpMaker, clip_grad, + ops::ClipOpGrad); + REGISTER_OP_CPU_KERNEL( + clip, ops::ClipKernel); + REGISTER_OP_CPU_KERNEL( + clip_grad, ops::ClipGradKernel); + ``` + + - 在上面的代码片段中: + + 1. `REGISTER_OP` : 注册`ops::ClipOp`类,类型名为`clip`,该类的`ProtoMaker`为`ops::ClipOpMaker`,注册`ops::ClipOpGrad`,类型名为`clip_grad` + 1. `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op,例如:优化算法相关的Op + 1. `REGISTER_OP_CPU_KERNEL` :注册`ops::ClipKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::ClipGradKernel`类 + + +1. 按照同样方法,在`.cu`文件中注册GPU Kernel + - 如果CUDA Kernel的实现基于Eigen,需在 `.cu`的开始加上宏定义 `#define EIGEN_USE_GPU` + + + +--- + +##### 编译和Python端绑定 + + + +- 运行下面命令可以仅编译新添加的Op: + + ``` + make mul_op + ``` + - 需注意,运行单元测试需要编译整个工程 + +- 如果遵循前文的文件命名规则,构建过程中,会自动为新增的op添加Python端绑定,并链接到生成的lib库中 + + + +--- + +###### 实现带Kernel的Operator step6: 添加前向单测及梯度检测 + + + +- 新增Op的单元测试统一添加至:[python/paddle/v2/fluid/tests/unittests](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/tests/unittests)目录 +- 前向Operator单测 + + 1. Op单元测试继承自`OpTest`,各项具体的单元测试在`TestClipOp`里完成,所有单测case都以`TestXX`命名 + 1. 单元测试Operator,需要: + 1. 在`setUp`函数定义输入、输出,以及相关的属性参数 + 1. 生成随机的输入数据 + 1. 在Python脚本中实现与前向operator相同的计算逻辑,得到输出值,与operator前向计算的输出进行对比 + 1. 反向梯度检测流程测试框架已经实现,直接调用相应接口`check_grad`即可 + +- `clip_op` 单测代码请参考 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/unittests/test_clip_op.py),这里不再展开 + + + +--- +#### 编译执行单测 + + + +- `python/paddle/v2/framework/tests` 目录下新增的 `test_*.py` 单元测试会被自动加入工程进行编译 + + - 运行单元测试测时需要编译整个工程,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON` +- 编译成功后,执行下面的命令来运行单元测试: + + ```bash + make test ARGS="-R test_mul_op -V" + ``` + + 或者: + + ``` + ctest -R test_mul_op + ``` + + +--- + +### 添加Op的一些注意事项 + + + +- 为每个Op创建单独的`*_op.h`(如有)、`*_op.cc`和`*_op.cu`(如有)。不允许一个文件中包含多个Op,将会导致编译出错。 +- 注册Op时的类型名,需要和该Op的名字一样。不允许在`A_op.cc`里面,注册`REGISTER_OP(B, ...)`,会导致单元测试出错。 +- 如果Op没有实现CUDA Kernel,不要创建空的`*_op.cu`,会导致单元测试出错。 +- 如果多个Op依赖一些共用的函数,可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。 + + + +--- + +### ==10.== 使用相关问题 + +--- + +### 定义前向计算 + + + +- 当在python端执行时: + ```python + import paddle.v2.fluid as fluid + ``` + [`framework.py`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/framework.py#L1040)定义了两个全局`Program`: + ```python + # program is a global instance. + _main_program_ = Program() + _startup_program_ = Program() + ``` + +- 前向定义的过程就是不断往`mian_program`中添加Op和Variable +- 如果需要执行一个新的`mian_program`时,可以调用调用: + ```python + def switch_main_program(program): + """ + Switch the main program to a new program. + This funtion returns the previous main program. + """ + …… + ``` + + +--- + +### 自定义参数的初始化 + + + +- 调用`fluid.ParamAttr(……)`接口,自定义参数的初始化 + + ```python + w_param_attrs = ParamAttr(name=None, + initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + learning_rate=1.0, + regularizer=L1Decay(1.0), + trainable=True, + clip=GradientClipByValue(-1.0, 1.0), + ) + y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) + ``` + +- 补充问题:如何创建 `Variable` + ```python + cur_program = Program() + cur_block = cur_program.current_block() + new_var = cur_block.create_var(name="X", shape=[-1, 16, 16], dtype="float32") + ``` + + + +--- + +### 添加反向Op + + + +- 调用`fluid.backward.append_backward(X)`(`X`是一个Variable),来为一段前向`ProgramDesc`添加反Op + + ```python + data = fluid.layers.data(name="data", shape=(2,3,4)) + out = fluid.layers.fc(input=data,size=128,act=None) + loss = fluid.layers.reduce_sum(out) + fluid.backward.append_backward(loss=loss) + ``` + +- 添加优化相关的Op + ```python + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(loss) + ``` + +- 可以随时调用`print(fluid.default_main_program())`来输出当前的`main_program` + +- 当构建完成整个`Program`后,调用下面的接口执行内存优化: + ```python + fluid.memory_optimize(fluid.default_main_program()) + ``` + - _注:内存优化目前仍在持续开发中,有可能不够稳定。_ + + + +--- + +### 总结:编译时执行流程 + + + +- 用户定义前向计算 +- 添加反向Op到`default_main_program` +- 添加 gradient clipping Op 到 +- 添加 regularization Op 到`default_main_program` +- 为指定的优化算法,添加相关的状态 variable of optimizer 到`default_startup_program` + - 状态相关 variable是指如学习率, 历史 momentum, 二阶momentum等 +- 添加初始化 variable 的Op 到 `default_startup_program` +- 为整个网络最后一个op,添加设置其接受到的梯度的Op到`default_main_program` +- 进行内存优化规划 + + + +--- + +### Feed 数据 (一):通过 feed 字典 + + + +- 执行executor的run方法时,指定feed字典,feed op 会将指定的数据放到`x`和`y`两个Variable中 + ```python + y_data = np.random.randint(0, 8, [1]).astype("int32") + y_tensor = core.Tensor() + y_tensor.set(y_data, place) + + x_data = np.random.uniform(0.1, 1, [11, 8]).astype("float32") + x_tensor = core.Tensor() + x_tensor.set(x_data, place) + …… + cost = exe.run( + fluid.default_main_program(), + feed={'x': x_tensor, + 'y': y_tensor}, + fetchlist=[avg_cost]) + ``` + +- 这种方法较为底层,一般用于单测中 + + + +--- + +### Feed 数据 (二):使用 DataFeeder接口 + + + +- 编写一个data_reader函数,data_reader是一个Python generator + + ```python + def demo_reader(): + def random_generator(): + yield np.random.uniform(0.1, 1, [4]), np.random.randint(0, 1, [1]) + return random_generator + ``` +- 在训练任务中使用 DataFeeder 接口 + ```python + cost = exe.run( + fluid.default_main_program(), + feed={'x': x_tensor, + 'y': y_tensor}, + fetchlist=[avg_cost]) + + train_reader = paddle.batch( + paddle.reader.shuffle(demo_reader(), buf_size=500), batch_size=4) + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + for data in train_reader(): + cost = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[cost]) + ``` + + + +--- + +### 常见问题 + + + +- 如何使用 evaluator ? [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_label_semantic_roles.py#L168) + + ```python + accuracy = fluid.evaluator.Accuracy(input=predict, label=label) + for pass_id in range(PASS_NUM): + accuracy.reset() + for data in train_reader(): + loss, acc = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost] + accuracy.metrics) + pass_acc = accuracy.eval(exe) + # acc 当前一个batch 的 accuracy + # pass_acc 当前batch 的 accuracy + pass_total_acc = accuracy.eval(exe) # 整个pass的accuracy + ``` + +- 如何在训练中测试?[->](https://github.com/dzhwinter/benchmark/blob/master/fluid/vgg16.py#L144) +- 如何保存训练好的模型?[->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recognize_digits.py#L143) +- 如何加载训练好的模型进行预测?[->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recognize_digits.py#L154) +- 如何在同一个训练任务中定义多个Program,并交替运行? [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/demo/fc_gan.py) +- 如何profile?Fluid 实现了profile 工具,可以直接调用。请参考示例 [->](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/unittests/test_profiler.py) + + + + +--- diff --git a/doc/fluid/images/1.png b/doc/fluid/images/1.png new file mode 100644 index 0000000000000000000000000000000000000000..67daf566f91aab570e60971c4ea8e2be876e214d Binary files /dev/null and b/doc/fluid/images/1.png differ diff --git a/doc/fluid/images/2.png b/doc/fluid/images/2.png new file mode 100644 index 0000000000000000000000000000000000000000..43367777f41449a666e7a3b571f09ac5d5dfb1ae Binary files /dev/null and b/doc/fluid/images/2.png differ diff --git a/doc/fluid/images/3.png b/doc/fluid/images/3.png new file mode 100644 index 0000000000000000000000000000000000000000..481021ef306e2596818aab7fe17a570754f63635 Binary files /dev/null and b/doc/fluid/images/3.png differ diff --git a/doc/fluid/images/4.png b/doc/fluid/images/4.png new file mode 100644 index 0000000000000000000000000000000000000000..4279f41e06de459f18b9a622539511d555e9a0af Binary files /dev/null and b/doc/fluid/images/4.png differ diff --git a/doc/fluid/images/LoDTensor.png b/doc/fluid/images/LoDTensor.png new file mode 100644 index 0000000000000000000000000000000000000000..75369f5378309e0f304b83f6bb69bdb195eac079 Binary files /dev/null and b/doc/fluid/images/LoDTensor.png differ diff --git a/doc/fluid/images/compile_run_time.png b/doc/fluid/images/compile_run_time.png new file mode 100644 index 0000000000000000000000000000000000000000..0bc9b2fd0e81b4851e6d96171ccb9a05d0f42a48 Binary files /dev/null and b/doc/fluid/images/compile_run_time.png differ diff --git a/doc/fluid/images/executor.png b/doc/fluid/images/executor.png new file mode 100644 index 0000000000000000000000000000000000000000..b29c0d779e3d46b779b5baeabe3176adaeb00a6d Binary files /dev/null and b/doc/fluid/images/executor.png differ diff --git a/doc/fluid/images/fluid_examples.png b/doc/fluid/images/fluid_examples.png new file mode 100644 index 0000000000000000000000000000000000000000..aa99472c0f914cde128fd7b3bd8dc29ac24f94b6 Binary files /dev/null and b/doc/fluid/images/fluid_examples.png differ diff --git a/doc/fluid/images/fluid_module_1.png b/doc/fluid/images/fluid_module_1.png new file mode 100644 index 0000000000000000000000000000000000000000..554782ba54e43efc3d6babbb94e3cac3530ac649 Binary files /dev/null and b/doc/fluid/images/fluid_module_1.png differ diff --git a/doc/fluid/images/fluid_module_2.png b/doc/fluid/images/fluid_module_2.png new file mode 100644 index 0000000000000000000000000000000000000000..4219efccbb1e87839adf6b5720fe46808b7d2fcf Binary files /dev/null and b/doc/fluid/images/fluid_module_2.png differ diff --git a/doc/fluid/images/layer.png b/doc/fluid/images/layer.png new file mode 100644 index 0000000000000000000000000000000000000000..e46db4c9c6f5b65ff274b498b716b11de343a8b0 Binary files /dev/null and b/doc/fluid/images/layer.png differ diff --git a/doc/fluid/images/operator1.png b/doc/fluid/images/operator1.png new file mode 100644 index 0000000000000000000000000000000000000000..3975b06f615b7a88dfc11e71b6451fdf4ce42d60 Binary files /dev/null and b/doc/fluid/images/operator1.png differ diff --git a/doc/fluid/images/operator2.png b/doc/fluid/images/operator2.png new file mode 100644 index 0000000000000000000000000000000000000000..b7bb1fae2050d3a70797517bc20dbbdef3dfcb7c Binary files /dev/null and b/doc/fluid/images/operator2.png differ diff --git a/doc/fluid/images/place.png b/doc/fluid/images/place.png new file mode 100644 index 0000000000000000000000000000000000000000..14e77511d639af155e5a3725cde05323e0cc94f2 Binary files /dev/null and b/doc/fluid/images/place.png differ diff --git a/doc/fluid/images/print_fluid_program.png b/doc/fluid/images/print_fluid_program.png new file mode 100644 index 0000000000000000000000000000000000000000..e8e459e1b3d5c8706b3caa05dc371db8d46df4a5 Binary files /dev/null and b/doc/fluid/images/print_fluid_program.png differ diff --git a/doc/fluid/images/program_desc1.png b/doc/fluid/images/program_desc1.png new file mode 100644 index 0000000000000000000000000000000000000000..0656336914ece957f2e5bb4d70ad337a63e31d88 Binary files /dev/null and b/doc/fluid/images/program_desc1.png differ diff --git a/doc/fluid/images/program_desc2.png b/doc/fluid/images/program_desc2.png new file mode 100644 index 0000000000000000000000000000000000000000..db5bfa1231345add8661b4f8ef0fc9d861f40d24 Binary files /dev/null and b/doc/fluid/images/program_desc2.png differ diff --git a/doc/fluid/images/raw_input.png b/doc/fluid/images/raw_input.png new file mode 100644 index 0000000000000000000000000000000000000000..0725f92d2b169c2b59ec7c68b402859c2a2dd1d8 Binary files /dev/null and b/doc/fluid/images/raw_input.png differ diff --git a/doc/fluid/images/scope_variable_tensor.png b/doc/fluid/images/scope_variable_tensor.png new file mode 100644 index 0000000000000000000000000000000000000000..59b0de6fb36f9f6b469227c05760a7612bb30b4d Binary files /dev/null and b/doc/fluid/images/scope_variable_tensor.png differ diff --git a/doc/fluid/images/sorted_input.png b/doc/fluid/images/sorted_input.png new file mode 100644 index 0000000000000000000000000000000000000000..ff601128368ee179e3fd33e5e295a9ddd3dcbaeb Binary files /dev/null and b/doc/fluid/images/sorted_input.png differ diff --git a/doc/fluid/images/transpiler.png b/doc/fluid/images/transpiler.png new file mode 100644 index 0000000000000000000000000000000000000000..422973c0dc7aa2b544d2fc86a97ace706388cb9e Binary files /dev/null and b/doc/fluid/images/transpiler.png differ diff --git a/doc/fluid/images/user_interface.png b/doc/fluid/images/user_interface.png new file mode 100644 index 0000000000000000000000000000000000000000..ffc94e3d8945ec6291460afd90e8fcc600828390 Binary files /dev/null and b/doc/fluid/images/user_interface.png differ diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst index 077f5e9b189269f9f6c9cf68310e2bfd43d8cb67..741c01ce5428c0046daa5a784da70d4bb492438c 100644 --- a/doc/v2/build_and_install/build_from_source_cn.rst +++ b/doc/v2/build_and_install/build_from_source_cn.rst @@ -35,13 +35,11 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 # 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像 docker build -t paddle:dev . # 3. 执行下面的命令编译CPU-Only的二进制 - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build # 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步) - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build -注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。如果使用自行 -构建的镜像(上述第4步)会执行 :code:`Dockerfile` 描述的默认入口程序 :code:`build.sh` 可以省略步骤3中 -最后的执行脚本的命令。 +注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。 编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装: @@ -72,15 +70,15 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test 如果期望执行其中一个单元测试,(比如 :code:`test_sum_op` ): .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash - bash /paddle/paddle/scripts/docker/build.sh - cd /paddle/build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash + ./paddle/scripts/paddle_build.sh build + cd build ctest -R test_sum_op -V .. _faq_docker: diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst index 545e61ce9602240807d515e9eae971dfca9ddd7f..b06c43e19dcfc52ad0f074a85517a16744895a3a 100644 --- a/doc/v2/build_and_install/build_from_source_en.rst +++ b/doc/v2/build_and_install/build_from_source_en.rst @@ -34,14 +34,12 @@ Or you can build your own image from source as the optional step below: # 2. Optional: build development docker image from source docker build -t paddle:dev . # 3. Run the following command to build a CPU-Only binaries - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build # 4. Or, use your built Docker image to build PaddlePaddle (must run step 2) - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build NOTE: The above command try to mount the current working directory (root directory of source code) -into :code:`/paddle` directory inside docker container. If you are using your own image -(Step 4) it will run default entry-point :code:`build.sh` , so you could omit the last -command in step 3. +into :code:`/paddle` directory inside docker container. When the compile finishes, you can get the output whl package under build/python/dist, then you can choose to install the whl on local @@ -74,15 +72,15 @@ Set :code:`WITH_GPU=ON` Can also run tests on GPU. .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test If you wish to run only one unit test, like :code:`test_sum_op`: .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash - bash /paddle/paddle/scripts/docker/build.sh - cd /paddle/build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash + ./paddle/scripts/paddle_build.sh build + cd build ctest -R test_sum_op -V .. _faq_docker: diff --git a/doc/v2/build_and_install/docker_install_cn.rst b/doc/v2/build_and_install/docker_install_cn.rst index da876b03e384a8175b27f78756af648c80fc6784..106c86bace075764c84bc2a7f7cb09d466fa8794 100644 --- a/doc/v2/build_and_install/docker_install_cn.rst +++ b/doc/v2/build_and_install/docker_install_cn.rst @@ -98,7 +98,7 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note 国内用户可以使用下面的镜像源来加速访问: - .. code-block: bash + .. code-block:: bash docker run -p 8888:8888 docker.paddlepaddlehub.com/book diff --git a/doc/v2/build_and_install/docker_install_en.rst b/doc/v2/build_and_install/docker_install_en.rst index 5dbdedc4cb064ef415e8d19f00727a16d1c175c6..25aecb8d0da9feb00006da6259b529b7011d91cb 100644 --- a/doc/v2/build_and_install/docker_install_en.rst +++ b/doc/v2/build_and_install/docker_install_en.rst @@ -105,7 +105,7 @@ We provide a packaged book image, simply issue the command: For users in China, we provide a faster mirror: - .. code-block: bash + .. code-block:: bash docker run -p 8888:8888 docker.paddlepaddlehub.com/book diff --git a/paddle/.gitignore b/paddle/.gitignore index 1c1c0c2c829f088d7e3f52ca007fcb8f33a16a36..01904aa6ef2057afee95ddd6e30cde064b06c52e 100644 --- a/paddle/.gitignore +++ b/paddle/.gitignore @@ -11,7 +11,6 @@ GTAGS *.pb.cc *.pb.h *_pb2.py -paddle_* output/ google/ Makefile diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b19256ef4533a09162edf907f6cd51146517e46 --- /dev/null +++ b/paddle/contrib/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_subdirectory(inference) diff --git a/paddle/contrib/float16/README.md b/paddle/contrib/float16/README.md index ded959c47cb81b9384abbb9815773e25969344ec..58b4a50666bfb622af8acbce29355f2a4a870a82 100644 --- a/paddle/contrib/float16/README.md +++ b/paddle/contrib/float16/README.md @@ -89,7 +89,7 @@ cd Paddle # to `FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04` and similarly for other configurations nvidia-docker build -t paddle:float16 . # After running this, different results will be written to different log files in Paddle/contrib/float16/ -nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/contrib/float16/run_float16_demo.sh +nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/paddle/contrib/float16/run_float16_demo.sh ``` #### Accuracy diff --git a/paddle/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh index d8a34ee67b8fab214fa6e96104304689211f84da..031225a85dabb26e5d9ea06f58909c049e7f0c08 100755 --- a/paddle/contrib/float16/run_float16_demo.sh +++ b/paddle/contrib/float16/run_float16_demo.sh @@ -3,7 +3,7 @@ BUILD_PATH=/paddle/fp16_build WHEEL_PATH=$BUILD_PATH/python/dist INFER_PATH=$BUILD_PATH/paddle/fluid/inference/tests/book -DEMO_PATH=/paddle/contrib/float16 +DEMO_PATH=/paddle/paddle/contrib/float16 # Use the single most powerful CUDA GPU on your machine export CUDA_VISIBLE_DEVICES=0 @@ -50,7 +50,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=imagenet \ --dirname=$DEMO_PATH/image_classification_imagenet_vgg.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_vgg.inference.model \ --repeat=$REPEAT \ @@ -68,7 +67,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_resnet \ - --data_set=imagenet \ --dirname=$DEMO_PATH/image_classification_imagenet_resnet.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_resnet.inference.model \ --repeat=$REPEAT \ @@ -86,7 +84,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=cifar10 \ --dirname=$DEMO_PATH/image_classification_cifar10_vgg.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_vgg.inference.model \ --repeat=$REPEAT \ @@ -104,7 +101,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=cifar10 \ --dirname=$DEMO_PATH/image_classification_cifar10_resnet.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_resnet.inference.model \ --repeat=$REPEAT \ diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4fe10f708e5bb8b28e34b2d91b2254c346c467f --- /dev/null +++ b/paddle/contrib/inference/CMakeLists.txt @@ -0,0 +1,57 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST) + set(options "") + set(oneValueArgs "") + set(multiValueArgs ARGS) + cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) + set(arg_list "") + if(inference_test_ARGS) + foreach(arg ${inference_test_ARGS}) + list(APPEND arg_list "_${arg}") + endforeach() + else() + list(APPEND arg_list "_") + endif() + foreach(arg ${arg_list}) + string(REGEX REPLACE "^_$" "" arg "${arg}") + cc_test(${TARGET_NAME} + SRCS ${TEST_SRC} + DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl + ARGS --dirname=${PYTHON_TESTS_DIR}/book/) + # set_tests_properties(${TARGET_NAME} + # PROPERTIES DEPENDS ${DEP_TEST}) + endforeach() +endfunction(inference_api_test) + + +cc_library(paddle_inference_api + SRCS paddle_inference_api.cc + DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) + +cc_library(paddle_inference_api_impl + SRCS paddle_inference_api_impl.cc + DEPS paddle_inference_api paddle_fluid_api) + +cc_test(test_paddle_inference_api + SRCS test_paddle_inference_api.cc + DEPS paddle_inference_api) + +inference_api_test(test_paddle_inference_api_impl + test_paddle_inference_api_impl.cc + test_word2vec) diff --git a/paddle/contrib/inference/paddle_inference_api.cc b/paddle/contrib/inference/paddle_inference_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..d67e1e7667800d6dd00cb8915b0d6dc7c664970b --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api.cc @@ -0,0 +1,15 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/contrib/inference/paddle_inference_api.h" diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index dbaa7c95b97e954537707566e5b7458e6afd14c8..9ac8ebdef8151f2a144b479fa258b8bc830fc2e9 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -12,49 +12,74 @@ See the License for the specific language governing permissions and limitations under the License. */ +/* + * This file contains the definition of a simple Inference API for Paddle. + * + * ATTENTION: It requires some C++ features, for lower version C++ or C, we + * might release another API. + */ + #pragma once +#include #include #include namespace paddle { -class Predictor { -public: - struct Attr; - Predictor() = default; +enum PaddleDType { + FLOAT32, + INT64, +}; - // Build the network before inference. - bool Init(const Attr& attr); +struct PaddleBuf { + void* data; // pointer to the data memory. + size_t length; // number of memory bytes. +}; + +struct PaddleTensor { + std::string name; // variable name. + std::vector shape; + PaddleBuf data; // blob of data. + PaddleDType dtype; +}; + +/* +* A simple Inference API for Paddle. Currently this API might just be used by +* non-sequence scenerios. +* TODO(Superjomn) Prepare another API for NLP-related usages. +*/ +class PaddlePredictor { +public: + struct Config; + PaddlePredictor() = default; + PaddlePredictor(const PaddlePredictor&) = delete; // Predict an record. - // Arguments: - // inputs: the name of the input variables. - // outputs: the name of the output varaibles. - // input_shapes: the shape of the input variables. - // output_shapes: the shape of the output variables. - // input_data: the data of the input variables. - // output_data: the data of the output variables. - bool Run(const std::vector& inputs, - const std::vector& outputs, - const std::vector>& input_shapes, - const std::vector>& output_shapes, - const std::vector>& input_data, - std::vector>* output_data); - - // Clone a predictor that share the model weights. - Predictor* Clone(); + // The caller should be responsible for allocating and releasing the memory of + // `inputs`. `inputs` should be alive until Run returns. caller should be + // responsible for releasing the memory of `output_data`. + virtual bool Run(const std::vector& inputs, + std::vector* output_data) = 0; + + // Clone a predictor that share the model weights, the Cloned predictor should + // be thread-safe. + virtual std::unique_ptr Clone() = 0; // Destroy the Predictor. - ~Predictor(); + virtual ~PaddlePredictor() {} - struct Attr { + friend std::unique_ptr CreatePaddlePredictor( + const PaddlePredictor::Config& config); + + // The common configs for all the predictors. + struct Config { enum class EngineKind; std::string model_dir; // path to the model directory. bool enable_engine{false}; // Enable to execute (part of) the model on - // third-party engines. - EngineKind engine_kind{Attr::EngineKind::kNone}; + // third-party engines. + EngineKind engine_kind{Config::EngineKind::kNone}; enum class EngineKind { kNone = -1, // Use the native Fluid facility. @@ -66,4 +91,8 @@ public: }; }; +// A factory to help create difference predictor. +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + } // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..ecca16d3f82bbeee6858883a0f9e577a479f9d06 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -0,0 +1,309 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api_impl.h" + +namespace paddle { +namespace { + +// Timer for timer +class Timer { +public: + double start; + double startu; + void tic() { + struct timeval tp; + gettimeofday(&tp, NULL); + start = tp.tv_sec; + startu = tp.tv_usec; + } + double toc() { + struct timeval tp; + gettimeofday(&tp, NULL); + double used_time_ms = + (tp.tv_sec - start) * 1000.0 + (tp.tv_usec - startu) / 1000.0; + return used_time_ms; + } +}; + +template +std::string num2str(T a) { + std::stringstream istr; + istr << a; + return istr.str(); +} +} // namespace + +bool PaddlePredictorImpl::Init() { + VLOG(3) << "Predictor::init()"; + + // TODO(panyx0718): Should CPU vs GPU device be decided by id? + if (config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(config_.device); + } else { + place_ = paddle::platform::CPUPlace(); + } + paddle::framework::InitDevices(false); + executor_.reset(new paddle::framework::Executor(place_)); + scope_.reset(new paddle::framework::Scope()); + + // Initialize the inference program + if (!config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.model_dir); + } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.prog_file, config_.param_file); + } else { + LOG(ERROR) << "fail to load inference model."; + return false; + } + ctx_ = executor_->Prepare(*inference_program_, 0); + + // Create variables + // TODO(panyx0718): Why need to test share_variables here? + if (config_.share_variables) { + executor_->CreateVariables(*inference_program_, scope_.get(), 0); + } + // Get the feed_target_names and fetch_target_names + feed_target_names_ = inference_program_->GetFeedTargetNames(); + fetch_target_names_ = inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::Run(const std::vector &inputs, + std::vector *output_data) { + VLOG(3) << "Predictor::predict"; + Timer timer; + timer.tic(); + // set feed variable + std::map feed_targets; + std::vector feeds; + if (!SetFeed(inputs, &feeds)) { + LOG(ERROR) << "fail to set feed"; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + feed_targets[feed_target_names_[i]] = &feeds[i]; + } + // get fetch variable + std::map fetch_targets; + std::vector fetchs; + fetchs.resize(fetch_target_names_.size()); + for (size_t i = 0; i < fetch_target_names_.size(); ++i) { + fetch_targets[fetch_target_names_[i]] = &fetchs[i]; + } + // Run the inference program + // if share variables, we need not create variables + executor_->RunPreparedContext(ctx_.get(), + scope_.get(), + &feed_targets, + &fetch_targets, + !config_.share_variables); + if (!GetFetch(fetchs, output_data)) { + LOG(ERROR) << "fail to get fetchs"; + return false; + } + VLOG(3) << "predict cost: " << timer.toc() << "ms"; + return true; +} + +std::unique_ptr PaddlePredictorImpl::Clone() { + VLOG(3) << "Predictor::clone"; + std::unique_ptr cls(new PaddlePredictorImpl(config_)); + if (!cls->InitShared(this)) { + LOG(ERROR) << "fail to call InitShared"; + return nullptr; + } + return cls; +} + +// TODO(panyx0718): Consider merge with Init()? +bool PaddlePredictorImpl::InitShared(PaddlePredictorImpl *cls) { + VLOG(3) << "Predictor::init_shared"; + // 1. Define place, executor, scope + if (this->config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(); + } else { + place_ = paddle::platform::CPUPlace(); + } + this->executor_.reset(new paddle::framework::Executor(this->place_)); + this->scope_.reset(new paddle::framework::Scope()); + // Initialize the inference program + if (!this->config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + this->inference_program_ = paddle::inference::Load( + this->executor_.get(), this->scope_.get(), this->config_.model_dir); + } else if (!this->config_.prog_file.empty() && + !this->config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + this->inference_program_ = + paddle::inference::Load(this->executor_.get(), + this->scope_.get(), + this->config_.prog_file, + this->config_.param_file); + } + this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0); + // 3. create variables + // TODO(panyx0718): why test share_variables. + if (config_.share_variables) { + this->executor_->CreateVariables( + *this->inference_program_, this->scope_.get(), 0); + } + // 4. Get the feed_target_names and fetch_target_names + this->feed_target_names_ = this->inference_program_->GetFeedTargetNames(); + this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::SetFeed( + const std::vector &inputs, + std::vector *feeds) { + VLOG(3) << "Predictor::set_feed"; + if (inputs.size() != feed_target_names_.size()) { + LOG(ERROR) << "wrong feed input size."; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + paddle::framework::LoDTensor input; + paddle::framework::DDim ddim = + paddle::framework::make_ddim(inputs[i].shape); + void *input_ptr; + if (inputs[i].dtype == PaddleDType::INT64) { + input_ptr = + input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else if (inputs[i].dtype == PaddleDType::FLOAT32) { + input_ptr = input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else { + LOG(ERROR) << "unsupported feed type " << inputs[i].dtype; + return false; + } + + // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. + std::memcpy(static_cast(input_ptr), + inputs[i].data.data, + inputs[i].data.length); + feeds->push_back(input); + LOG(ERROR) << "Actual feed type " << feeds->back().type().name(); + } + return true; +} + +bool PaddlePredictorImpl::GetFetch( + const std::vector &fetchs, + std::vector *outputs) { + VLOG(3) << "Predictor::get_fetch"; + outputs->resize(fetchs.size()); + for (size_t i = 0; i < fetchs.size(); ++i) { + // TODO(panyx0718): Support fetch of other types. + if (fetchs[i].type() != typeid(float)) { + LOG(ERROR) << "only support fetching float now."; + return false; + } + std::vector shape; + auto dims_i = fetchs[i].dims(); + auto lod = fetchs[i].lod(); + const float *output_ptr = fetchs[i].data(); + // const int64_t* output_ptr = fetchs[i].data(); + auto num = fetchs[i].numel(); + std::vector data; + if (0 == lod.size()) { + std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); + for (int j = 0; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } else { + // for batch detection + // image[0] -> output[0] shape {145, 6} + // image[1] -> output[1] shape {176, 6} + // then, + // the batch output shape {321, 6} + // the lod {{0, 145, 321}} + // so we should append output[0] to {176, 6} + size_t max_dim = 0; + for (size_t j = 1; j < lod[0].size(); j++) { + max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); + } + size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); + if (max_dim > 0) { + data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); + } + for (size_t j = 1; j < lod[0].size(); j++) { + size_t start = lod[0][j - 1] * common_dim; + size_t end = lod[0][j] * common_dim; + if (end > start) { + std::copy(output_ptr + start, + output_ptr + end, + data.begin() + (j - 1) * max_dim * common_dim); + } + } + shape.push_back(lod[0].size() - 1); + shape.push_back(max_dim); + for (int j = 1; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } + + outputs->at(i).shape = shape; + outputs->at(i).data.length = sizeof(float) * data.size(); + outputs->at(i).data.data = malloc(outputs->at(i).data.length); + std::memcpy( + outputs->at(i).data.data, data.data(), outputs->at(i).data.length); + outputs->at(i).dtype = PaddleDType::FLOAT32; + // TODO(panyx0718): support other types? fill tensor name? avoid a copy. + } + return true; +} + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config) { + VLOG(3) << "create PaddlePredictorImpl"; + // 1. GPU memeroy + std::vector flags; + if (config.fraction_of_gpu_memory >= 0.0f || + config.fraction_of_gpu_memory <= 0.95f) { + flags.push_back("dummpy"); + std::string flag = "--fraction_of_gpu_memory_to_use=" + + num2str(config.fraction_of_gpu_memory); + flags.push_back(flag); + VLOG(3) << "set flag: " << flag; + framework::InitGflags(flags); + } + + std::unique_ptr predictor( + new PaddlePredictorImpl(config)); + if (!predictor->Init()) { + return nullptr; + } + return predictor; +} + +} // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..831abce5da58f90b38e27b5638e953de5167647a --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api.h" + +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/init.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/profiler.h" + +namespace paddle { + +struct VisConfig : public PaddlePredictor::Config { + int device; + float fraction_of_gpu_memory; + std::string prog_file; + std::string param_file; + bool share_variables; +}; + +/* + * Do not use this, just a demo indicating how to customize a Predictor. + */ +class PaddlePredictorImpl : public PaddlePredictor { +public: + explicit PaddlePredictorImpl(const VisConfig &config) : config_(config) {} + + bool Init(); + + bool Run(const std::vector &inputs, + std::vector *output_data) override; + + std::unique_ptr Clone() override; + + ~PaddlePredictorImpl() override{}; + +private: + bool InitShared(PaddlePredictorImpl *cls); + bool SetFeed(const std::vector &input_datas, + std::vector *feeds); + bool GetFetch(const std::vector &fetchs, + std::vector *output_data); + + VisConfig config_; + paddle::platform::Place place_; + std::unique_ptr executor_; + std::unique_ptr scope_; + std::unique_ptr ctx_; + std::unique_ptr inference_program_; + std::vector feed_target_names_; + std::vector fetch_target_names_; +}; + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config); + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api.cc b/paddle/contrib/inference/test_paddle_inference_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..a19173087649e8493b8c72e758456cc5b8970e23 --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api.cc @@ -0,0 +1,64 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/contrib/inference/paddle_inference_api.h" + +#include +#include + +namespace paddle { + +/* + * Do not use this, just a demo indicating how to customize a config for a + * specific predictor. + */ +struct DemoConfig : public PaddlePredictor::Config { + float other_config; +}; + +/* + * Do not use this, just a demo indicating how to customize a Predictor. + */ +class DemoPredictor : public PaddlePredictor { +public: + explicit DemoPredictor(const DemoConfig &config) { + LOG(INFO) << "I get other_config " << config.other_config; + } + bool Run(const std::vector &inputs, + std::vector *output_data) override { + LOG(INFO) << "Run"; + return false; + } + + std::unique_ptr Clone() override { return nullptr; } + + ~DemoPredictor() override {} +}; + +template <> +std::unique_ptr CreatePaddlePredictor( + const DemoConfig &config) { + std::unique_ptr x(new DemoPredictor(config)); + return x; +} + +TEST(paddle_inference_api, demo) { + DemoConfig config; + config.other_config = 1.7; + auto predictor = CreatePaddlePredictor(config); + std::vector outputs; + predictor->Run({}, &outputs); +} + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..43b068fb42c5e5c58f2932c3e528fd0fa0502ec3 --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "gflags/gflags.h" +#include "paddle/contrib/inference/paddle_inference_api_impl.h" +#include "paddle/fluid/inference/tests/test_helper.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); + +namespace paddle { + +PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { + PaddleTensor pt; + pt.data.data = t->data(); + + if (t->type() == typeid(int64_t)) { + pt.data.length = t->numel() * sizeof(int64_t); + pt.dtype = PaddleDType::INT64; + } else if (t->type() == typeid(float)) { + pt.data.length = t->numel() * sizeof(float); + pt.dtype = PaddleDType::FLOAT32; + } else { + LOG(FATAL) << "unsupported type."; + } + pt.shape = framework::vectorize2int(t->dims()); + return pt; +} + +TEST(paddle_inference_api_impl, word2vec) { + VisConfig config; + config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + LOG(INFO) << "dirname " << config.model_dir; + config.fraction_of_gpu_memory = 0.85; + config.device = 0; + config.share_variables = true; + + std::unique_ptr predictor = + CreatePaddlePredictorImpl(config); + + framework::LoDTensor first_word, second_word, third_word, fourth_word; + framework::LoD lod{{0, 1}}; + int64_t dict_size = 2073; // The size of dictionary + + SetupLoDTensor(&first_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&second_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&third_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&fourth_word, lod, static_cast(0), dict_size - 1); + + std::vector cpu_feeds; + cpu_feeds.push_back(LodTensorToPaddleTensor(&first_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&second_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&third_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&fourth_word)); + + std::vector outputs; + ASSERT_TRUE(predictor->Run(cpu_feeds, &outputs)); + ASSERT_EQ(outputs.size(), 1); + for (size_t i = 0; i < outputs.size(); ++i) { + size_t len = outputs[i].data.length; + float* data = static_cast(outputs[i].data.data); + for (int j = 0; j < len / sizeof(float); ++j) { + ASSERT_LT(data[j], 1.0); + ASSERT_GT(data[j], -1.0); + } + free(outputs[i].data.data); + } +} + +} // namespace paddle diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 1b9c685866763ed126a1bf5d7fdd851c38ac1c63..09b67e5a1741c68c5f5487340e8fc86ff31e00a4 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -243,13 +243,8 @@ const std::unordered_map &OpDesc::GetAttrMap() const { } void OpDesc::Rename(const std::string &old_name, const std::string &new_name) { - for (auto &input : inputs_) { - std::replace(input.second.begin(), input.second.end(), old_name, new_name); - } - for (auto &output : outputs_) { - std::replace(output.second.begin(), output.second.end(), old_name, - new_name); - } + RenameInput(old_name, new_name); + RenameOutput(old_name, new_name); need_update_ = true; } @@ -274,6 +269,13 @@ void OpDesc::RenameInput(const std::string &old_name, for (auto &input : inputs_) { std::replace(input.second.begin(), input.second.end(), old_name, new_name); } + + auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName()); + if (it != attrs_.end()) { + auto &op_vars = boost::get>(it->second); + std::replace(op_vars.begin(), op_vars.end(), old_name, new_name); + } + need_update_ = true; } diff --git a/paddle/fluid/framework/shape_inference.h b/paddle/fluid/framework/shape_inference.h index 46c8feec001584a872f7f62682080e0e72c06f50..5f497cafa0f75f7c23d550ef767d55274de7c900 100644 --- a/paddle/fluid/framework/shape_inference.h +++ b/paddle/fluid/framework/shape_inference.h @@ -63,6 +63,7 @@ class InferShapeContext { std::vector GetInputVarPtrs(const std::string &name); std::vector GetOutputVarPtrs(const std::string &name); + virtual InferShapeVarPtr GetVarPtr(const std::string &name) = 0; // Note: In while op, we need this to be public void SetDims(const std::vector &names, @@ -81,8 +82,6 @@ class InferShapeContext { const std::vector &names) const; virtual proto::VarType::Type GetVarType(const std::string &name) const = 0; - - virtual InferShapeVarPtr GetVarPtr(const std::string &name) = 0; }; } // namespace framework diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index b98aeed8a0aaabfd39560fad3c074a6668b4f024..cc4a725dfb3b3e7723a3a3a4008b20acdb53899d 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,5 +1,6 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) +# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? cc_library(paddle_fluid_api SRCS io.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 79b1a248a0acfded0d2fcfadc041a6ad2a92ff3d..7cd777de27e9457260a1b2f5936dc917f0821984 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,5 +1,7 @@ nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) -nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc - DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine - SERIAL) +# This test is not stable +# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828 +#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc +# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine +# SERIAL) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index 3bae56532d655a1725e18276e09e0cade47b5c68..507b465435609a91ebca97dd70b176c3b79bee02 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -149,12 +149,14 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } if (platform::is_gpu_place(ctx.GetPlace())) { +#ifdef PADDLE_WITH_CUDA // GPU data is copied to CPU buffer when sending, // free the buffer when possible. destroy_callback = [](void* backing) { platform::CUDAPinnedPlace cuda_pinned; memory::Free(cuda_pinned, backing); }; +#endif } std::string header; diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index a5bb58c2f4047a3bf2f8592b605772b4fa166c57..20d960f9fee1eae42b2241fb96c163e15db5e24d 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -24,6 +24,8 @@ detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu) detection_library(target_assign_op SRCS target_assign_op.cc target_assign_op.cu) +detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc + polygon_box_transform_op.cu) # Export local libraries to parent set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE) diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..335e8dd470f851d8c5f6bdbc94cfc343da269034 --- /dev/null +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -0,0 +1,105 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class PolygonBoxTransformCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), + "It must use CUDAPlace."); + auto* in = ctx.Input("Input"); + auto in_dims = in->dims(); + const T* in_data = in->data(); + auto* out = ctx.Output("Output"); + T* out_data = out->mutable_data(ctx.GetPlace()); + + int batch_size = in_dims[0]; + int geo_channel = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + int id = 0; + for (int id_n = 0; id_n < batch_size * geo_channel; ++id_n) { + for (int id_h = 0; id_h < height; ++id_h) { + for (int id_w = 0; id_w < width; ++id_w) { + id = id_n * height * width + width * id_h + id_w; + if (id_n % 2 == 0) { + out_data[id] = id_w - in_data[id]; + } else { + out_data[id] = id_h - in_data[id]; + } + } + } + } + } +}; + +class PolygonBoxTransformOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE( + ctx->HasInput("Input"), + "Input (Input) of polygon_box transform op should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("Output"), + "Output (Output) of polygon_box transform op should not be null."); + + auto in_dim = ctx->GetInputDim("Input"); + + PADDLE_ENFORCE_EQ(in_dim.size(), 4, "input's rank must be 4."); + PADDLE_ENFORCE_EQ(in_dim[1] % 2, 0, + "input's second dimension must be even."); + + ctx->SetOutputDim("Output", in_dim); + } +}; + +class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput( + "Input", + "The input with shape [batch_size, geometry_channels, height, width]"); + AddOutput("Output", "The output with the same shape as input"); + + AddComment(R"DOC( +PolygonBoxTransform Operator. +The input is the final geometry output in detection network. +We use 2*n numbers to denote the coordinate shift from n corner vertices of +the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi), +the geometry output contains 2*n channels. +PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(polygon_box_transform, ops::PolygonBoxTransformOp, + ops::PolygonBoxTransformOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + polygon_box_transform, + ops::PolygonBoxTransformCPUKernel, + ops::PolygonBoxTransformCPUKernel); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..6187ac6622c65d2bbc525c3fe2cb397cf74ac612 --- /dev/null +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -0,0 +1,76 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_info.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using platform::PADDLE_CUDA_NUM_THREADS; +#define CUDA_BLOCK_SIZE 16 + +template +__global__ void PolygonBoxTransformKernel(const int n, const int h, const int w, + const T* input, T* output) { + int id_n = threadIdx.x + blockDim.x * blockIdx.x; + int id_h = threadIdx.y + blockDim.y * blockIdx.y; + int id_w = threadIdx.z + blockDim.z * blockIdx.z; + if (id_n < n && id_h < h && id_w < w) { + int id = id_n * h * w + w * id_h + id_w; + if (id_n % 2 == 0) { + output[id] = id_w - input[id]; + } else { + output[id] = id_h - input[id]; + } + } +} + +template +class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "It must use CUDAPlace."); + auto* in = ctx.Input("Input"); + auto in_dims = in->dims(); + const T* in_data = in->data(); + auto* out = ctx.Output("Output"); + T* out_data = out->mutable_data(ctx.GetPlace()); + + int batch_size = in_dims[0]; + int geo_channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + dim3 threadsPerBlock( + PADDLE_CUDA_NUM_THREADS / (CUDA_BLOCK_SIZE * CUDA_BLOCK_SIZE), + CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE); + dim3 numBlocks((batch_size * geo_channels) / threadsPerBlock.x, + (height + threadsPerBlock.y - 1) / threadsPerBlock.y, + (width + threadsPerBlock.z - 1) / threadsPerBlock.z); + auto stream = ctx.cuda_device_context().stream(); + PolygonBoxTransformKernel<<>>( + batch_size * geo_channels, height, width, in_data, out_data); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_CUDA_KERNEL( + polygon_box_transform, + paddle::operators::PolygonBoxTransformOpCUDAKernel, + paddle::operators::PolygonBoxTransformOpCUDAKernel); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h index 2a7df149a9f4b03676f172da980c927d7fa5e8a4..63ea60678f80708f5a8340edd22588553b9ec139 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h @@ -24,6 +24,14 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* out = ctx.Output("Out"); + auto* in = ctx.Input("Input"); + if (in->lod().size() && ctx.Attr("input_dim_idx") == 0) { + // set the correct batch size for the LoDTensor. + auto odims = out->dims(); + int output_dim_idx = ctx.Attr("output_dim_idx"); + odims[output_dim_idx] = static_cast(in->lod().back().size()) - 1; + out->mutable_data(odims, ctx.GetPlace()); + } out->mutable_data(ctx.GetPlace()); auto value = ctx.Attr("value"); diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc index fc0fca5ad3370633b2f60db65fdb7c01c417dc50..caff35e03ae3a144f799d982c859ded62cb3e93d 100644 --- a/paddle/fluid/operators/math/cross_entropy.cc +++ b/paddle/fluid/operators/math/cross_entropy.cc @@ -46,7 +46,10 @@ class CrossEntropyFunctor { const int64_t* label_data = labels->data(); for (int i = 0; i < batch_size; ++i) { - int index = i * class_num + label_data[i]; + int lbl = label_data[i]; + PADDLE_ENFORCE_GE(lbl, 0); + PADDLE_ENFORCE_LT(lbl, class_num); + int index = i * class_num + lbl; loss_data[i] = -math::TolerableValue()(std::log(prob_data[index])); } } diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt index 3106978eb0149b14849dfd1aaad8bbe76791f2f6..62532036f86bfb82465ccd9e0ec526299489932a 100644 --- a/paddle/fluid/operators/reader/CMakeLists.txt +++ b/paddle/fluid/operators/reader/CMakeLists.txt @@ -23,6 +23,7 @@ reader_library(create_recordio_file_reader_op SRCS create_recordio_file_reader_o reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc) reader_library(create_multi_pass_reader_op SRCS create_multi_pass_reader_op.cc) reader_library(create_threaded_reader_op SRCS create_threaded_reader_op.cc) +reader_library(create_custom_reader_op SRCS create_custom_reader_op.cc) cc_test(reader_blocking_queue_test SRCS reader_blocking_queue_test.cc) # Export local libraries to parent diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ecbf8ed4f0473a552b778fd6c64c92b946cd458 --- /dev/null +++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc @@ -0,0 +1,187 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/operators/detail/safe_ref.h" +#include "paddle/fluid/operators/reader/reader_op_registry.h" + +namespace paddle { +namespace operators { +namespace reader { + +class CustomReader : public framework::DecoratedReader { + public: + CustomReader(ReaderBase* reader, const framework::BlockDesc& sub_block, + const platform::Place& dev_place, + const std::vector& source_var_names, + const std::vector& sink_var_names) + : DecoratedReader(reader), + program_(*sub_block.Program()), + sub_block_id_(sub_block.ID()), + exe_(framework::Executor(dev_place)), + source_var_names_(source_var_names), + sink_var_names_(sink_var_names) {} + + void ReadNext(std::vector* out) override; + + private: + const framework::ProgramDesc program_; + int sub_block_id_; + framework::Executor exe_; + + std::vector source_var_names_; + std::vector sink_var_names_; +}; + +class CreateCustomReaderOp : public framework::OperatorBase { + public: + using framework::OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { + auto* out = scope.FindVar(Output("Out")) + ->template GetMutable(); + auto* sub_block = Attr("sub_block"); + if (out->Get() != nullptr) { + return; + } + const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) + ->Get(); + out->Reset( + new CustomReader(underlying_reader.Get(), *sub_block, dev_place, + Attr>("source_var_names"), + Attr>("sink_var_names"))); + } +}; + +class CreateCustomReaderOpMaker : public DecoratedReaderMakerBase { + protected: + void Apply() override { + AddAttr( + "sub_block", "The block to hold all preprocessing operators."); + AddAttr>( + "source_var_names", + "Source variables are starting points of data preprocessing. They hold " + "preprocessing's input tensors. Each source variable corresponds to " + "one of underlying reader's output datas."); + AddAttr>( + "sink_var_names", + "Sink variables are ending points of data preprocessing. They hold " + "preprocessing's output tensors. Each sink variable corresponds to " + "one of custom reader's output datas."); + AddComment(R"DOC( + CreateCustomReader Operator + + A custom reader can be used for input data preprocessing. + A custom reader holds its own sub-block, which will be executed in its + 'ReadNext()' function. Users can configurate their own preprocessing + pipelines by inserting operators into custom reader's sub-block. + )DOC"); + } +}; + +class CustomReaderInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(!ctx->IsRuntime(), + "'CustomReaderInferShape' should only be invoked during " + "compile time."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "The output decorated reader should not be null."); + const auto* sub_block = + ctx->Attrs().Get("sub_block"); + const auto sink_var_names = + ctx->Attrs().Get>("sink_var_names"); + std::vector> res_dims; + std::vector res_lod_levels; + for (const std::string& var_name : sink_var_names) { + auto* sink_var = sub_block->FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(sink_var); + res_dims.emplace_back(sink_var->GetShape()); + res_lod_levels.push_back(sink_var->GetLoDLevel()); + } + auto* out_reader = + boost::get(ctx->GetOutputVarPtrs("Out")[0]); + out_reader->SetShapes(res_dims); + out_reader->SetLoDLevels(res_lod_levels); + } +}; + +class CustomReaderInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const override { + framework::VarDesc* out_reader = block->FindVar(op_desc.Output("Out")[0]); + PADDLE_ENFORCE_NOT_NULL(out_reader); + out_reader->SetType(framework::proto::VarType::READER); + + auto sink_var_names = + boost::get>(op_desc.GetAttr("sink_var_names")); + const auto* sub_block = + boost::get(op_desc.GetAttr("sub_block")); + std::vector res_data_types; + for (const std::string& var_name : sink_var_names) { + framework::VarDesc* var = sub_block->FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(var); + res_data_types.emplace_back(var->GetDataType()); + } + out_reader->SetDataTypes(res_data_types); + } +}; + +void CustomReader::ReadNext(std::vector* out) { + out->clear(); + std::vector underlying_outs; + reader_->ReadNext(&underlying_outs); + if (underlying_outs.empty()) { + // There is not next data. + return; + } + PADDLE_ENFORCE(source_var_names_.size() == underlying_outs.size(), + "The size of source_var_names(%d) and the size of " + "underlying_outs(%d) are not consistent. Each feeding element " + "must have its own source variable.", + source_var_names_.size(), underlying_outs.size()); + // The scope for CustomReader's sub-block should be independent and shouldn't + // be any other computation scope's child. Otherwise, data preprocessing and + // compution cannot be concurrent. + framework::Scope scope; + // 1. Copy LoDTensors from underlying reader's output to source variables. + for (size_t i = 0; i < source_var_names_.size(); ++i) { + framework::Variable* var = scope.Var(source_var_names_[i]); + framework::LoDTensor* tensor = var->GetMutable(); + tensor->ShareDataWith(underlying_outs[i]); + tensor->set_lod(underlying_outs[i].lod()); + } + // 2. Run the sub-block. + exe_.Run(program_, &scope, sub_block_id_, false, true); + // 3. Copy LoDTensors from sink variables to out. + out->resize(sink_var_names_.size()); + for (size_t i = 0; i < sink_var_names_.size(); ++i) { + const auto& tensor = detail::Ref(scope.FindVar(sink_var_names_[i])) + .Get(); + framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]); + } +} + +} // namespace reader +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators::reader; +REGISTER_OPERATOR(create_custom_reader, ops::CreateCustomReaderOp, + ops::CreateCustomReaderOpMaker, ops::CustomReaderInferShape, + ops::CustomReaderInferVarType, + paddle::framework::EmptyGradOpMaker) diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index 11f1ddebc48134158315ea70a2d2b9e07f2e2469..612e1f5eca3a4836db1fd167fc6bb63400d20177 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -115,6 +115,7 @@ void DecoratedReaderInferShape::operator()( boost::get(ctx->GetOutputVarPtrs("Out")[0]); out_reader->SetLoDLevels(in_reader->GetLoDLevels()); } + void DecoratedReaderInferVarType::operator()( const framework::OpDesc& op_desc, framework::BlockDesc* block) const { std::string in_reader_name = op_desc.Input("UnderlyingReader")[0]; diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index bac4659e62b107dd80ef95dd0907b3da4becffbc..8e9dbbd7a154095a7298bb2f59a82d13a60f9bd3 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/function/EigenThreadDevice.h" namespace paddle { @@ -70,25 +70,26 @@ struct EigenBlasGemm { dims[0].first = transA ? 0 : 1; dims[0].second = transB ? 1 : 0; - Eigen::DefaultDevice device; + auto* device = EigenDeviceWarpper::device(); if (N == ldc) { if (alpha == T(1) && beta == T(0)) { - c.device(device) = a.contract(b, dims); + c.device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.device(device) += a.contract(b, dims); + c.device(*device) += a.contract(b, dims); } else { - c.device(device) = alpha * a.contract(b, dims) + beta * c; + c.device(*device) = alpha * a.contract(b, dims) + beta * c; } } else { if (alpha == T(1) && beta == T(0)) { - c.slice(offsetC, extentC).device(device) = a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.slice(offsetC, extentC).device(device) += a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) += a.contract(b, dims); } else { - c.slice(offsetC, extentC).device(device) = + c.slice(offsetC, extentC).device(*device) = alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC); } } + EigenDeviceWarpper::free_device(device); } }; diff --git a/paddle/function/EigenThreadDevice.h b/paddle/function/EigenThreadDevice.h new file mode 100644 index 0000000000000000000000000000000000000000..74269aa664a711c905e12a61958c9ab01e2340c0 --- /dev/null +++ b/paddle/function/EigenThreadDevice.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#if defined(__OSX__) || defined(__APPLE__) +#include +#include +#endif +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { + +#if defined(__ANDROID__) +int GetCpuCount() { + FILE* fp = fopen("/sys/devices/system/cpu/possible", "r"); + if (!fp) { + return 1; + } + int rank0, rank1; + int num = fscanf(fp, "%d-%d", &rank0, &rank1); + fclose(fp); + if (num < 2) return 1; + return rank1 + 1; +} +#elif defined(__OSX__) || defined(__APPLE__) +int GetCpuCount() { + int count = 0; + size_t len = sizeof(int); + sysctlbyname("hw.ncpu", &count, &len, NULL, 0); + return count > 0 ? count : 1; +} +#else +int GetCpuCount() { return 1; } +#endif + +class EigenDeviceWarpper { +public: // NOLINT +#if EIGEN_USE_THREADS + static Eigen::ThreadPoolDevice* device() { + const int num_cpus = GetCpuCount(); + const int num_threads = (num_cpus > 2) ? 2 : num_cpus; + static Eigen::ThreadPool tp(num_threads); + static Eigen::ThreadPoolDevice* device = + new Eigen::ThreadPoolDevice(&tp, num_threads); + return device; + } + + static void free_device(Eigen::ThreadPoolDevice* device) { + // do nothing + } +#else + static Eigen::DefaultDevice* device() { + Eigen::DefaultDevice* device = new Eigen::DefaultDevice; + return device; + } + + static void free_device(Eigen::DefaultDevice* device) { delete device; } +#endif +}; + +} // namespace paddle diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 25fc35311fc63988c64a445d72fc6255e49e8d4b..7c80faa48ce960a3a7eb7d88eda4f2b09756410e 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -7,6 +7,10 @@ set(OPITMIZER_SRCS sgd_optimizer.cc ) -cc_library(paddle_optimizer STATIC SRCS ${OPITMIZER_SRCS} DEPS paddle_proto glog) -cc_test(serialization_test SRCS serialization_test.cc DEPS paddle_proto) -cc_test(parameter_optimizer_test SRCS parameter_optimizer_test.cc DEPS paddle_optimizer) +add_library(paddle_optimizer ${OPITMIZER_SRCS}) +target_link_libraries(paddle_optimizer paddle_proto glog) + +if (WITH_TESTING) + add_unittest(serialization_test serialization_test.cc) + add_unittest(parameter_optimizer_test parameter_optimizer_test.cc) +endif() diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh deleted file mode 100755 index baff7628ea01caa0248af82c6eed2c3b546cdb35..0000000000000000000000000000000000000000 --- a/paddle/scripts/docker/build.sh +++ /dev/null @@ -1,256 +0,0 @@ -#!/bin/bash - -function cmake_gen() { - mkdir -p /paddle/build - cd /paddle/build - - # build script will not fail if *.deb does not exist - rm *.deb 2>/dev/null || true - # delete previous built whl packages - rm -rf /paddle/paddle/dist 2>/dev/null || true - - # Support build for all python versions, currently - # including cp27-cp27m and cp27-cp27mu. - PYTHON_FLAGS="" - if [ "$1" != "" ]; then - echo "using python abi: $1" - if [ "$1" == "cp27-cp27m" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} - export PATH=/opt/python/cp27-cp27m/bin/:${PATH} - PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python - -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" - elif [ "$1" == "cp27-cp27mu" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} - export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} - PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python - -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" - fi - fi - - cat < /paddle/build/Dockerfile < - ENV HOME /root -EOF - - if [[ ${WITH_GPU} == "ON" ]]; then - NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&" - else - NCCL_DEPS="" - fi - - if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then - PADDLE_VERSION="paddle version" - CMD='"paddle", "version"' - else - PADDLE_VERSION="true" - CMD='"true"' - fi - - cat >> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <= 21." - ANDROID_API=21 - fi -else # armeabi, armeabi-v7a - ANDROID_ARCH=arm -fi - -ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API - -cat <&2 - echo "Please use pre-commit to check what is wrong." 1>&2 - exit 1 -} - -trap 'abort' 0 -set -e - -# install glide -curl https://glide.sh/get | bash -eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - -# set up go environment for running gometalinter -mkdir -p $GOPATH/src/github.com/PaddlePaddle/ -ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle -cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd - - -go get github.com/alecthomas/gometalinter -gometalinter --install - -cd $TRAVIS_BUILD_DIR -export PATH=/usr/bin:$PATH -pre-commit install -clang-format --version - - - -if ! pre-commit run -a ; then - git diff - exit 1 -fi - -trap : 0 diff --git a/paddle/scripts/travis/deploy_key.enc b/paddle/scripts/travis/deploy_key.enc deleted file mode 100644 index b0aa45c5ac626c735735fd8541a43bf8b099d0a0..0000000000000000000000000000000000000000 Binary files a/paddle/scripts/travis/deploy_key.enc and /dev/null differ diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 1470f8c2e50004abb08e75980decd9485c22dece..03d4602f7a99dc335260cffdcdc30a839f3988cd 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import contextlib from .. import core from ..framework import convert_np_dtype_to_dtype_, default_main_program, default_startup_program, Program @@ -21,7 +22,8 @@ from ..executor import global_scope __all__ = [ 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', - 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer' + 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', + 'random_data_generator', 'Preprocessor' ] @@ -535,8 +537,6 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None): inputs={'UnderlyingReader': reader}, outputs={'Out': [new_reader]}, attrs=attrs) - new_reader.persistable = True - new_reader.stop_gradient = True return monkey_patch_reader_methods(new_reader) @@ -581,3 +581,82 @@ def read_file(file_obj): return out[0] else: return out + + +class Preprocessor(object): + BEFORE_SUB_BLOCK = 0 + IN_SUB_BLOCK = 1 + AFTER_SUB_BLOCK = 2 + + def __init__(self, reader, name=None): + self.underlying_reader = reader + new_reader_name = name if name is not None else unique_name( + "create_custom_reader") + self.main_prog = default_main_program() + self.reader = self.main_prog.current_block().create_var( + name=new_reader_name) + self.sub_block = None + self.source_var_names = None + self.sink_var_names = None + self.status = Preprocessor.BEFORE_SUB_BLOCK + + def is_completed(self): + return self.sub_block and self.source_var_names and self.sink_var_names + + @contextlib.contextmanager + def block(self): + self.status = Preprocessor.IN_SUB_BLOCK + self.sub_block = self.main_prog.create_block() + yield + self.main_prog.rollback() + self.status = Preprocessor.AFTER_SUB_BLOCK + if not self.is_completed(): + raise RuntimeError( + "The definition of preprocessor is incompleted! " + "Please make sure that you have set input and output " + "variables by invoking 'inputs' and 'outputs' in " + "Preprocessor's sub-block.") + + def inputs(self): + if self.status != Preprocessor.IN_SUB_BLOCK: + raise RuntimeError( + "Preprocessor.inputs() can only be invoked inside the sub-block." + ) + + source_shapes = self.underlying_reader.desc.shapes() + source_dtypes = self.underlying_reader.desc.dtypes() + source_lod_levels = self.underlying_reader.desc.lod_levels() + self.source_var_names = [ + unique_name("preprocessor_source") + for _ in xrange(len(source_shapes)) + ] + source_vars = [] + for var_name, shape, dtype, lod_level in zip( + self.source_var_names, source_shapes, source_dtypes, + source_lod_levels): + source_vars.append(self.main_prog.current_block().create_var( + name=var_name, shape=shape, dtype=dtype, lod_level=lod_level)) + return source_vars + + def outputs(self, *outs): + if self.status != Preprocessor.IN_SUB_BLOCK: + raise RuntimeError( + "Preprocessor.outputs() can only be invoked inside the sub-block." + ) + self.sink_var_names = [var.name for var in outs] + + def __call__(self, *args, **kwargs): + if self.status != Preprocessor.AFTER_SUB_BLOCK: + raise RuntimeError( + "Preprocessor output can only be retrieved after rnn block.") + + self.main_prog.current_block().append_op( + type="create_custom_reader", + inputs={'UnderlyingReader': self.underlying_reader}, + outputs={'Out': [self.reader]}, + attrs={ + "sub_block": self.sub_block, + "source_var_names": self.source_var_names, + "sink_var_names": self.sink_var_names + }) + return monkey_patch_reader_methods(self.reader) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 42e26dd36653ed39680bc1fca84540349716f006..c337e0f4f202959688ba992e60370eae1e572c2f 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -24,19 +24,65 @@ from tensor import concat import utils __all__ = [ - 'fc', 'embedding', 'dynamic_lstm', 'dynamic_lstmp', 'dynamic_gru', - 'gru_unit', 'linear_chain_crf', 'crf_decoding', 'cos_sim', 'cross_entropy', - 'square_error_cost', 'chunk_eval', 'sequence_conv', 'conv2d', - 'sequence_pool', 'sequence_softmax', 'softmax', 'pool2d', 'batch_norm', - 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', 'lstm_unit', - 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', 'reduce_prod', - 'sequence_first_step', 'sequence_last_step', 'dropout', 'split', - 'ctc_greedy_decoder', 'edit_distance', 'l2_normalize', 'matmul', 'topk', - 'warpctc', 'sequence_reshape', 'transpose', 'im2sequence', 'nce', - 'beam_search', 'row_conv', 'multiplex', 'layer_norm', - 'softmax_with_cross_entropy', 'smooth_l1', 'one_hot', - 'autoincreased_step_counter', 'reshape', 'lod_reset', 'lrn', 'pad', - 'label_smooth', 'roi_pool', 'dice_loss', 'bilinear_interp', 'random_crop' + 'fc', + 'embedding', + 'dynamic_lstm', + 'dynamic_lstmp', + 'dynamic_gru', + 'gru_unit', + 'linear_chain_crf', + 'crf_decoding', + 'cos_sim', + 'cross_entropy', + 'square_error_cost', + 'chunk_eval', + 'sequence_conv', + 'conv2d', + 'sequence_pool', + 'sequence_softmax', + 'softmax', + 'pool2d', + 'batch_norm', + 'beam_search_decode', + 'conv2d_transpose', + 'sequence_expand', + 'lstm_unit', + 'reduce_sum', + 'reduce_mean', + 'reduce_max', + 'reduce_min', + 'reduce_prod', + 'sequence_first_step', + 'sequence_last_step', + 'dropout', + 'split', + 'ctc_greedy_decoder', + 'edit_distance', + 'l2_normalize', + 'matmul', + 'topk', + 'warpctc', + 'sequence_reshape', + 'transpose', + 'im2sequence', + 'nce', + 'beam_search', + 'row_conv', + 'multiplex', + 'layer_norm', + 'softmax_with_cross_entropy', + 'smooth_l1', + 'one_hot', + 'autoincreased_step_counter', + 'reshape', + 'lod_reset', + 'lrn', + 'pad', + 'label_smooth', + 'roi_pool', + 'dice_loss', + 'upsampling_bilinear2d', + 'random_crop', ] @@ -3881,8 +3927,10 @@ def dice_loss(input, label, epsilon=0.00001): return reduce_mean(dice_score) -def bilinear_interp(input, out_h, out_w, name=None): +def upsampling_bilinear2d(input, out_shape=None, scale=None, name=None): """ + The mathematical meaning of upsampling_bilinear2d is also called + Bilinear interpolation. Bilinear interpolation is an extension of linear interpolation for interpolating functions of two variables (e.g. H-direction and W-direction in this layer) on a rectilinear 2D grid. @@ -3894,8 +3942,13 @@ def bilinear_interp(input, out_h, out_w, name=None): input (Variable): The input tensor of bilinear interpolation, This is a 4-D tensor of the shape (num_batches, channels, in_h, in_w). - out_h (int): output height of bilinear interpolation layer. - out_w (int): output width of bilinear interpolation layer. + out_shape(list|tuple|None): Output shape of bilinear interpolation + layer, the shape is (out_h, out_w). + Default: None + scale(int|None): The multiplier for the input height or width. + At least one of out_shape or scale must be set. + And out_shape has a higher priority than scale. + Default: None name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -3906,10 +3959,27 @@ def bilinear_interp(input, out_h, out_w, name=None): Examples: .. code-block:: python - out = fluid.layers.bilinear_interp(input, out_h=12, out_w=12) + out = fluid.layers.bilinear_interp(input, out_shape=[12, 12]) """ + if out_shape is None and scale is None: + raise ValueError("One of out_shape and scale must not be None") helper = LayerHelper('bilinear_interp', **locals()) dtype = helper.input_dtype() + + def _is_list_or_turple_(data): + return (isinstance(data, list) or isinstance(data, tuple)) + + if out_shape is not None: + if not (_is_list_or_turple_(out_shape) and len(out_shape) == 2): + raise ValueError('out_shape should be a list or tuple ', + 'with length 2, (out_h, out_w).') + out_shape = list(map(int, out_shape)) + out_h = out_shape[0] + out_w = out_shape[1] + else: + out_h = int(input.shape[2] * scale) + out_w = int(input.shape[3] * scale) + out = helper.create_tmp_variable(dtype) helper.append_op( type="bilinear_interp", diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index 555e371952d0f902063133c2a227eb78f082726c..9946d0a4ff33b2f5040f6d2e31aa20fcf9c609a7 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -93,12 +93,12 @@ def _convert_lod(lod): def create_lod_tensor(data, lod, place): - """Create a lod tensor from a numpy array or an existing lod tensor. + """Create a lod tensor from a numpy array, a list, or an existing lod tensor. Create a lod tensor by doing the following: 1. Check that the length-based input lod is valid. 2. Convert the length-based lod to a offset-based LoD. - 3. Copy the data from a numpy array or a existing lod tensor to + 3. Copy the data from a numpy array, a list or a existing lod tensor to CPU or GPU device (based on input place). 4. Set the level of detail (LoD) using the offset-based LoD. @@ -117,7 +117,7 @@ def create_lod_tensor(data, lod, place): for more details regarding LoD. Args: - data: a numpy array or a LoDTensor holding the data to be copied. + data: a numpy array or a LoDTensor or a list holding the data to be copied. lod: a list of lists indicating the length-based LoD info specified by the user. place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. @@ -126,6 +126,18 @@ def create_lod_tensor(data, lod, place): """ if isinstance(data, core.LoDTensor): return create_lod_tensor(np.array(data), lod, place) + elif isinstance(data, list): + # When input data is a list, it only deal with the case where the base element + # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated + # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number + # of words or other indexes in the sequence. + new_lod = [] + for seq in data: + new_lod.append(len(seq)) + assert [new_lod] == lod, "data and lod do not match" + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + return create_lod_tensor(flattened_data, lod, place) elif isinstance(data, np.ndarray): assert _validate_lod(lod, data.shape[0]), "the provided lod info is invalid" @@ -134,9 +146,8 @@ def create_lod_tensor(data, lod, place): tensor.set_lod(_convert_lod(lod)) return tensor else: - raise Exception( - "data should be either a LoDTensor or a Numpy array, but you pass type %s instead" - % (type(data))) + raise TypeError( + "data should be either a LoDTensor, a Numpy array or a list") def create_random_int_lodtensor(lod, base_shape, place, low, high): diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index 5fba561e024b0690f10939267146f2622c567fa5..de3906fc6a005181b0ab04a846eb2e7ce14004c2 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -48,7 +48,7 @@ def linear(): return avg_loss -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = fluid.Trainer( @@ -68,8 +68,8 @@ def train(use_cuda, train_program, save_dirname): ['15.343549569447836'] ... ''' - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) trainer.stop() trainer.train( @@ -80,13 +80,13 @@ def train(use_cuda, train_program, save_dirname): # infer -def infer(use_cuda, inference_program, save_dirname=None): - if save_dirname is None: +def infer(use_cuda, inference_program, params_dirname=None): + if params_dirname is None: return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 10 tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") @@ -100,10 +100,10 @@ def main(use_cuda): return # Directory for saving the trained model - save_dirname = "fit_a_line.inference.model" + params_dirname = "fit_a_line.inference.model" - train(use_cuda, linear, save_dirname) - infer(use_cuda, inference_program, save_dirname) + train(use_cuda, linear, params_dirname) + infer(use_cuda, inference_program, params_dirname) class TestFitALine(unittest.TestCase): diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index 1160e500dbd6db784eeb81b72968386347fec59a..63dc1b6ce30974ede22a3f7772b76bf207bbae39 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -85,7 +85,7 @@ def train_network(): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): BATCH_SIZE = 128 EPOCH_NUM = 1 @@ -105,8 +105,8 @@ def train(use_cuda, train_program, save_dirname): print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) if accuracy > 0.01: # Low threshold for speeding up CI - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -122,10 +122,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['pixel', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -142,12 +142,14 @@ def main(use_cuda): save_path = "image_classification_resnet.inference.model" train( - use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + use_cuda=use_cuda, + train_program=train_network, + params_dirname=save_path) infer( use_cuda=use_cuda, inference_program=inference_network, - save_dirname=save_path) + params_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index 1e3e955ba0299f2cc0fcc02d79ae6fd8ff4c1171..0bf8f265a1c1b11364ecfa11061af183ce20d51e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -64,7 +64,7 @@ def train_network(): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): BATCH_SIZE = 128 train_reader = paddle.batch( paddle.reader.shuffle( @@ -82,8 +82,8 @@ def train(use_cuda, train_program, save_dirname): print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) if accuracy > 0.01: # Low threshold for speeding up CI - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -99,10 +99,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['pixel', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -119,12 +119,14 @@ def main(use_cuda): save_path = "image_classification_vgg.inference.model" train( - use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + use_cuda=use_cuda, + train_program=train_network, + params_dirname=save_path) infer( use_cuda=use_cuda, inference_program=inference_network, - save_dirname=save_path) + params_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py index f4344988141af44af83fda24d73da25f597796ef..9464df59797c0b8c35611ee56de6bf362ac7a4a5 100755 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -141,7 +141,7 @@ def train_program(): return [avg_cost] -def train(use_cuda, train_program, save_path): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.SGD(learning_rate=0.01) @@ -172,7 +172,7 @@ def train(use_cuda, train_program, save_path): print("avg_cost: %s" % avg_cost) if float(avg_cost) < 100.0: # Large value to increase CI speed - trainer.save_params(save_path) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1, float(avg_cost))) @@ -183,7 +183,7 @@ def train(use_cuda, train_program, save_path): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_path) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -197,10 +197,10 @@ def train(use_cuda, train_program, save_path): feed_order=feed_order) -def infer(use_cuda, inference_program, save_path): +def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - inference_program, param_path=save_path, place=place) + inference_program, param_path=params_dirname, place=place) # Setup inputs by creating LoDTensors to represent sequences of words. # Here each word is the basic element of these LoDTensors and the shape of @@ -251,9 +251,9 @@ def infer(use_cuda, inference_program, save_path): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "label_semantic_roles.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "label_semantic_roles.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index 2aac70463c64019ec97b0c3893b4b52f77967797..03439cbd37671b4727879bf3d0793f016f55247a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -57,7 +57,7 @@ def train_program(): return [avg_cost, acc] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adam(learning_rate=0.001) @@ -78,7 +78,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( event.epoch + 1, avg_cost, acc)) @@ -100,11 +100,11 @@ def train(use_cuda, train_program, save_dirname): feed_order=['img', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 1 tensor_img = numpy.random.uniform(-1.0, 1.0, @@ -116,17 +116,17 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): - save_dirname = "recognize_digits_conv.inference.model" + params_dirname = "recognize_digits_conv.inference.model" # call train() with is_local argument to run distributed train train( use_cuda=use_cuda, train_program=train_program, - save_dirname=save_dirname) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_dirname=save_dirname) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index 32653157994f81c46f420c1b55ceddbbbf06f2fe..89bbd21bea7d64a8dd6fc32829b6addb680da62e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -44,7 +44,7 @@ def train_program(): return [avg_cost, acc] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adam(learning_rate=0.001) @@ -62,7 +62,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( event.epoch + 1, avg_cost, acc)) @@ -81,11 +81,11 @@ def train(use_cuda, train_program, save_dirname): feed_order=['img', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 1 tensor_img = numpy.random.uniform(-1.0, 1.0, @@ -97,17 +97,17 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): - save_dirname = "recognize_digits_mlp.inference.model" + params_dirname = "recognize_digits_mlp.inference.model" # call train() with is_local argument to run distributed train train( use_cuda=use_cuda, train_program=train_program, - save_dirname=save_dirname) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_dirname=save_dirname) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py index 259680cb097a12a4fc92107f6fd8595393f88bd5..dfc7325acf23176c05fe42761b9997b98d23372a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py @@ -155,7 +155,7 @@ def train_program(): return [avg_cost, scale_infer] -def train(use_cuda, train_program, save_path): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.SGD(learning_rate=0.2) @@ -180,7 +180,7 @@ def train(use_cuda, train_program, save_path): print("avg_cost: %s" % avg_cost) if float(avg_cost) < 4: # Smaller value to increase CI speed - trainer.save_params(save_path) + trainer.save_params(params_dirname) trainer.stop() else: print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1, @@ -197,43 +197,30 @@ def train(use_cuda, train_program, save_path): num_epochs=1, event_handler=event_handler, reader=train_reader, - feed_order=[ - 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', - 'category_id', 'movie_title', 'score' - ]) + feed_order=feed_order) -def infer(use_cuda, inference_program, save_path): +def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - inference_program, param_path=save_path, place=place) - - def create_lod_tensor(data, lod=None): - tensor = fluid.LoDTensor() - if lod is None: - # Tensor, the shape is [batch_size, 1] - index = 0 - lod_0 = [index] - for l in range(len(data)): - index += 1 - lod_0.append(index) - lod = [lod_0] - tensor.set_lod(lod) - - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - tensor.set(flattened_data, place) - return tensor - - # Generate a random input for inference - user_id = create_lod_tensor([[1]]) - gender_id = create_lod_tensor([[1]]) - age_id = create_lod_tensor([[0]]) - job_id = create_lod_tensor([[10]]) - movie_id = create_lod_tensor([[783]]) - category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]]) - movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]], - [[0, 5]]) + inference_program, param_path=params_dirname, place=place) + + # Use the first data from paddle.dataset.movielens.test() as input. + # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor, + # where `data` is a list of sequences of index numbers, `lod` is + # the level of detail (lod) info associated with `data`. + # For example, data = [[10, 2, 3], [2, 3]] means that it contains + # two sequences of indexes, of length 3 and 2, respectively. + # Correspondingly, lod = [[3, 2]] contains one level of detail info, + # indicating that `data` consists of two sequences of length 3 and 2. + user_id = fluid.create_lod_tensor([[1]], [[1]], place) + gender_id = fluid.create_lod_tensor([[1]], [[1]], place) + age_id = fluid.create_lod_tensor([[0]], [[1]], place) + job_id = fluid.create_lod_tensor([[10]], [[1]], place) + movie_id = fluid.create_lod_tensor([[783]], [[1]], place) + category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) + movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]], + place) results = inferencer.infer( { @@ -253,12 +240,15 @@ def infer(use_cuda, inference_program, save_path): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "recommender_system.inference.model" - train(use_cuda=use_cuda, train_program=train_program, save_path=save_path) + params_dirname = "recommender_system.inference.model" + train( + use_cuda=use_cuda, + train_program=train_program, + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_path=save_path) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt index 673c965b662a022739f8d489c331f4de9455a926..d71147a85e77ea6dc5b6391aa169abd9b02a0aa1 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt @@ -1,6 +1,11 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +# This test is buggy +# py_test(test_understand_sentiment_dynamic_rnn SRCS +# test_understand_sentiment_dynamic_rnn.py SERIAL) +LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn) + # default test foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py index 7e32696f9909a0a440f6bdc401ac9f9594c4dec7..11e9fd1bec1450f6753dbe38c7014090d6e136b6 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -64,7 +64,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -85,7 +85,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -112,13 +112,13 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. @@ -143,9 +143,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py index e50b7920b17f86eada3abc700c5403053fca8771..90757d54f99715163518ce5a094e6ba3a67efed3 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -79,7 +79,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -100,7 +100,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -112,7 +112,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -127,13 +127,13 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. @@ -158,9 +158,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py index d4fb80168814359827708ad921bd3f53b14bb2ee..52b7d4a83779d01936afb3d9d1e4864b05d55b5a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py @@ -71,7 +71,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -92,7 +92,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -104,7 +104,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -119,13 +119,13 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. @@ -150,9 +150,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_stacked_lstm.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_stacked_lstm.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index 16d73d4aff4ba31327e6d8f5ac04a36387f59daa..eeb8e67087334ea96aab9cdb6272e34e2eb99939 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -80,7 +80,7 @@ def train_program(is_sparse): return avg_cost -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): train_reader = paddle.batch( paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) test_reader = paddle.batch( @@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname): print("loss= ", avg_cost) if avg_cost < 10.0: - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() if math.isnan(avg_cost): @@ -115,10 +115,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word # is simply an index to look up for the corresponding word vector and hence @@ -153,17 +153,17 @@ def main(use_cuda, is_sparse): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "word2vec.inference.model" + params_dirname = "word2vec.inference.model" train( use_cuda=use_cuda, train_program=partial(train_program, is_sparse), - save_dirname=save_path) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=partial(inference_program, is_sparse), - save_dirname=save_path) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 7be924f762ddeb045dda890dbfdcd96a65449553..65d6552acc9b3d31a97a45290e4613a633fffa3c 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -173,63 +173,33 @@ def train(use_cuda, save_dirname, is_local=True): test_reader = paddle.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) - feeding = { - 'user_id': 0, - 'gender_id': 1, - 'age_id': 2, - 'job_id': 3, - 'movie_id': 4, - 'category_id': 5, - 'movie_title': 6, - 'score': 7 - } - - def func_feed(feeding, data): - feed_tensors = {} - for (key, idx) in feeding.iteritems(): - tensor = fluid.LoDTensor() - if key != "category_id" and key != "movie_title": - if key == "score": - numpy_data = np.array(map(lambda x: x[idx], data)).astype( - "float32") - else: - numpy_data = np.array(map(lambda x: x[idx], data)).astype( - "int64") - else: - numpy_data = map(lambda x: np.array(x[idx]).astype("int64"), - data) - lod_info = [len(item) for item in numpy_data] - offset = 0 - lod = [offset] - for item in lod_info: - offset += item - lod.append(offset) - numpy_data = np.concatenate(numpy_data, axis=0) - tensor.set_lod([lod]) - - numpy_data = numpy_data.reshape([numpy_data.shape[0], 1]) - tensor.set(numpy_data, place) - feed_tensors[key] = tensor - return feed_tensors + feed_order = [ + 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', + 'movie_title', 'score' + ] def train_loop(main_program): exe.run(framework.default_startup_program()) + feed_list = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list, place) + PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch outs = exe.run(program=main_program, - feed=func_feed(feeding, data), + feed=feeder.feed(data), fetch_list=[avg_cost]) out = np.array(outs[0]) if (batch_id + 1) % 10 == 0: avg_cost_set = [] for test_data in test_reader(): - avg_cost_np = exe.run( - program=test_program, - feed=func_feed(feeding, test_data), - fetch_list=[avg_cost]) + avg_cost_np = exe.run(program=test_program, + feed=feeder.feed(test_data), + fetch_list=[avg_cost]) avg_cost_set.append(avg_cost_np[0]) break # test only 1 segment for speeding up CI @@ -279,23 +249,6 @@ def infer(use_cuda, save_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) - def create_lod_tensor(data, lod=None): - tensor = fluid.LoDTensor() - if lod is None: - # Tensor, the shape is [batch_size, 1] - index = 0 - lod_0 = [index] - for l in range(len(data)): - index += 1 - lod_0.append(index) - lod = [lod_0] - tensor.set_lod(lod) - - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - tensor.set(flattened_data, place) - return tensor - inference_scope = fluid.core.Scope() with fluid.scope_guard(inference_scope): # Use fluid.io.load_inference_model to obtain the inference program desc, @@ -307,26 +260,33 @@ def infer(use_cuda, save_dirname=None): # Use the first data from paddle.dataset.movielens.test() as input assert feed_target_names[0] == "user_id" - user_id = create_lod_tensor([[1]]) + # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor + # where `data` is a list of sequences of index numbers, `lod` is + # the level of detail (lod) info associated with `data`. + # For example, data = [[10, 2, 3], [2, 3]] means that it contains + # two sequences of indexes, of length 3 and 2, respectively. + # Correspondingly, lod = [[3, 2]] contains one level of detail info, + # indicating that `data` consists of two sequences of length 3 and 2. + user_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[1] == "gender_id" - gender_id = create_lod_tensor([[1]]) + gender_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[2] == "age_id" - age_id = create_lod_tensor([[0]]) + age_id = fluid.create_lod_tensor([[0]], [[1]], place) assert feed_target_names[3] == "job_id" - job_id = create_lod_tensor([[10]]) + job_id = fluid.create_lod_tensor([[10]], [[1]], place) assert feed_target_names[4] == "movie_id" - movie_id = create_lod_tensor([[783]]) + movie_id = fluid.create_lod_tensor([[783]], [[1]], place) assert feed_target_names[5] == "category_id" - category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]]) + category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) assert feed_target_names[6] == "movie_title" - movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]], - [[0, 5]]) + movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], + [[5]], place) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py index b11131456a1f87419407c4d8626ebcde26dd7640..013d72f418cf7ac11eb31fd221052039e896e203 100644 --- a/python/paddle/fluid/tests/test_lod_tensor.py +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -53,11 +53,14 @@ class TestLoDTensor(unittest.TestCase): self.assertEqual(_convert_lod(lod), converted_lod) def test_create_lod_tensor(self): - # Only numpy array or a fluid LoDTensor is valid input to - # create_lod_tensor function, currently a list of lists is not. - data = [[1, 2], [3, 4]] - self.assertRaises(Exception, create_lod_tensor, data, [], + # Create LoDTensor from a list + data = [[1, 2, 3], [3, 4]] + wrong_lod = [[2, 2]] + correct_lod = [[3, 2]] + self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod, fluid.CPUPlace()) + tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace()) + self.assertEqual(tensor.lod(), [[0, 3, 5]]) # Create LoDTensor from numpy array data = numpy.random.random([10, 1]) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 9f9ee271f82eb560b2b780f85731d5dec0118aca..b7e62533b33f26456324a9c62d9fd1e324126525 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -481,9 +481,9 @@ class OpTest(unittest.TestCase): def np_dtype_to_fluid_dtype(input): """Change the dtype of float16 numpy array - numpy float16 is binded to paddle::platform::float16 + numpy float16 is binded to paddle::platform::float16 in tensor_py.h via the help of uint16 data type since - the internal memory representation of float16 is + the internal memory representation of float16 is uint16_t in paddle and np.uint16 in numpy, which are themselves binded together by pybind. @@ -491,9 +491,9 @@ class OpTest(unittest.TestCase): input: input numpy array Returns: - input: The dtype of input will be changed to np.uint16 if + input: The dtype of input will be changed to np.uint16 if it is originally np.float16, such that the internal memory - of input will be reinterpreted as of dtype np.uint16. + of input will be reinterpreted as of dtype np.uint16. """ if input.dtype == np.float16: input.dtype = np.uint16 diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py index 66e3e2d51d118756d4881955b4df8eb4d2bbc094..533d8ccfac82a2e298af16181ab16bf7aa3db282 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py @@ -50,5 +50,27 @@ class TestFillConstantBatchSizeLikeWhenSecondDimIsBatchSize(OpTest): self.check_output() +class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest): + def setUp(self): + self.op_type = "fill_constant_batch_size_like" + self.inputs = { + 'Input': (np.random.random((31, 28)).astype("float32"), + [[0, 9, 23, 31]]) + } + self.attrs = { + 'value': 3.5, + 'shape': [-1, 16], + 'input_dim_idx': 0, + 'output_dim_idx': 0 + } + + out = np.random.random((3, 16)).astype("float32") + out.fill(3.5) + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index c44ac59ccdb7fa212ab2a8ab83ee0c70fc498f9f..60dc1f83fc32e2551eb2a04ef35f1c8a0ffec769 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -369,11 +369,13 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) - def test_bilinear_interp(self): + def test_upsampling_bilinear2d(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 9, 6], dtype="float32") - output = layers.bilinear_interp(x, 12, 12) + output = layers.upsampling_bilinear2d(x, out_shape=[12, 12]) + self.assertIsNotNone(output) + output = layers.upsampling_bilinear2d(x, scale=3) self.assertIsNotNone(output) print(str(program)) diff --git a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..2105d320665367e3ec1bfd7b3a353a144c91244f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py @@ -0,0 +1,68 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +def PolygonBoxRestore(input): + shape = input.shape + batch_size = shape[0] + geo_channels = shape[1] + h = shape[2] + w = shape[3] + h_indexes = np.array(range(h) * w).reshape( + [w, h]).transpose()[np.newaxis, :] # [1, h, w] + w_indexes = np.array(range(w) * h).reshape( + [h, w])[np.newaxis, :] # [1, h, w] + indexes = np.concatenate( + (w_indexes, h_indexes))[np.newaxis, :] # [1, 2, h, w] + indexes = indexes.repeat( + [geo_channels / 2], + axis=0)[np.newaxis, :] # [1, geo_channels/2, 2, h, w] + indexes = indexes.repeat( + [batch_size], axis=0) # [batch_size, geo_channels/2, 2, h, w] + return indexes.reshape( + input.shape) - input # [batch_size, geo_channels, h, w] + + +class TestPolygonBoxRestoreOp(OpTest): + def config(self): + self.input_shape = (1, 8, 2, 2) + + def setUp(self): + self.config() + self.op_type = "polygon_box_transform" + input = np.random.random(self.input_shape).astype("float32") + self.inputs = {'Input': input} + output = PolygonBoxRestore(input) + self.outputs = {'Output': output} + + def test_check_output(self): + self.check_output() + + +class TestCase1(TestPolygonBoxRestoreOp): + def config(self): + self.input_shape = (2, 10, 3, 2) + + +class TestCase2(TestPolygonBoxRestoreOp): + def config(self): + self.input_shape = (3, 12, 4, 5) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_preprocessor.py b/python/paddle/fluid/tests/unittests/test_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf1a7e0c50a87cd43507ffdb94109873cf4e5d9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_preprocessor.py @@ -0,0 +1,93 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np + +import paddle.fluid as fluid +import paddle.v2 as paddle +import paddle.v2.dataset.mnist as mnist + + +class TestPreprocessor(unittest.TestCase): + def setUp(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + reader = paddle.batch(mnist.train(), batch_size=32) + feeder = fluid.DataFeeder( + feed_list=[ # order is image and label + fluid.layers.data( + name='image', shape=[784]), + fluid.layers.data( + name='label', shape=[1], dtype='int64'), + ], + place=fluid.CPUPlace()) + self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file( + './mnist_for_preprocessor_test.recordio', reader, feeder) + + def test_main(self): + N = 10 + + img_expected_res = [] + lbl_expected_res = [] + with fluid.program_guard(fluid.Program(), fluid.Program()): + data_file = fluid.layers.io.open_recordio_file( + './mnist_for_preprocessor_test.recordio', + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + img, lbl = fluid.layers.io.read_file(data_file) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for _ in range(N): + img_v, lbl_v = exe.run(fetch_list=[img, lbl]) + img_expected_res.append(img_v / 2) + lbl_expected_res.append(lbl_v + 1) + + img_actual_res = [] + lbl_actual_res = [] + with fluid.program_guard(fluid.Program(), fluid.Program()): + data_file = fluid.layers.io.open_recordio_file( + './mnist_for_preprocessor_test.recordio', + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + preprocessor = fluid.layers.io.Preprocessor(reader=data_file) + with preprocessor.block(): + img, lbl = preprocessor.inputs() + img_out = img / 2 + lbl_out = lbl + 1 + preprocessor.outputs(img_out, lbl_out) + + data_file = fluid.layers.io.double_buffer(preprocessor()) + img, lbl = fluid.layers.io.read_file(data_file) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for _ in range(N): + img_v, lbl_v = exe.run(fetch_list=[img, lbl]) + img_actual_res.append(img_v) + lbl_actual_res.append(lbl_v) + + for idx in range(N): + np.allclose(img_expected_res[idx], img_actual_res[idx]) + np.allclose(lbl_expected_res[idx], lbl_actual_res[idx]) diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index 80a8f7c09cfe521f8f94a27e85fc8d86c02b3e97..9ff0ae6fca27d4681891b2033e2f8f95bd825942 100644 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -107,7 +107,7 @@ class ControlFlowGraph(object): # Repeatedly apply liveness updates until the algorithm stablize # on a complete set live input vars and live output vars. while True: - for i in range(self.op_size, 0, -1): + for i in reversed(range(self.op_size)): live_in[i] = set(self._live_in[i]) live_out[i] = set(self._live_out[i]) for s in self._successors[i]: diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..48100e5bf989520043b5ca372b02883faea8a9fd --- /dev/null +++ b/tools/codestyle/docstring_checker.py @@ -0,0 +1,334 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""DocstringChecker is used to check python doc string's style.""" + +import six +import astroid + +from pylint.checkers import BaseChecker, utils +from pylint.interfaces import IAstroidChecker + +from collections import defaultdict +import re + + +def register(linter): + """Register checkers.""" + linter.register_checker(DocstringChecker(linter)) + + +class Docstring(object): + """Docstring class holds the parsed doc string elements. + """ + + def __init__(self): + self.d = defaultdict(list) #name->[] + self.clear() + + def clear(self): + self.d['Args'] = [] + self.d['Examples'] = [] + self.d['Returns'] = [] + self.d['Raises'] = [] + self.args = {} #arg_name->arg_type + + def get_level(self, string, indent=' '): + level = 0 + unit_size = len(indent) + while string[:unit_size] == indent: + string = string[unit_size:] + level += 1 + + return level + + def parse(self, doc): + """parse gets sections from doc + Such as Args, Returns, Raises, Examples s + Args: + doc (string): is the astroid node doc string. + Returns: + True if doc is parsed successfully. + """ + self.clear() + + lines = doc.splitlines() + state = ("others", -1) + for l in lines: + c = l.strip() + if len(c) <= 0: + continue + + level = self.get_level(l) + if c.startswith("Args:"): + state = ("Args", level) + elif c.startswith("Returns:"): + state = ("Returns", level) + elif c.startswith("Raises:"): + state = ("Raises", level) + elif c.startswith("Examples:"): + state = ("Examples", level) + else: + if level > state[1]: + self.d[state[0]].append(c) + continue + + state = ("others", -1) + self.d[state[0]].append(c) + + self._arg_with_type() + return True + + def get_returns(self): + return self.d['Returns'] + + def get_raises(self): + return self.d['Raises'] + + def get_examples(self): + return self.d['Examples'] + + def _arg_with_type(self): + + for t in self.d['Args']: + m = re.search('([A-Za-z0-9_-]+)\s{0,4}(\(.+\))\s{0,4}:', t) + if m: + self.args[m.group(1)] = m.group(2) + + return self.args + + +class DocstringChecker(BaseChecker): + """DosstringChecker is pylint checker to + check docstring style. + """ + __implements__ = (IAstroidChecker, ) + + POSITIONAL_MESSAGE_ID = 'str-used-on-positional-format-argument' + KEYWORD_MESSAGE_ID = 'str-used-on-keyword-format-argument' + + name = 'doc-string-checker' + symbol = "doc-string" + priority = -1 + msgs = { + 'W9001': ('One line doc string on > 1 lines', symbol + "-one-line", + 'Used when a short doc string is on multiple lines'), + 'W9002': + ('Doc string does not end with "." period', symbol + "-end-with", + 'Used when a doc string does not end with a period'), + 'W9003': ('All args with their types must be mentioned in doc string', + symbol + "-with-all-args", + 'Used when not all arguments are in the doc string '), + 'W9005': ('Missing docstring or docstring is too short', + symbol + "-missing", 'Add docstring longer >=10'), + 'W9006': ('Docstring indent error, use 4 space for indent', + symbol + "-indent-error", 'Use 4 space for indent'), + 'W9007': ('You should add `Returns` in comments', + symbol + "-with-returns", + 'There should be a `Returns` section in comments'), + 'W9008': ('You should add `Raises` section in comments', + symbol + "-with-raises", + 'There should be a `Raises` section in comments'), + } + options = () + + def visit_functiondef(self, node): + """visit_functiondef checks Function node docstring style. + Args: + node (astroid.node): The visiting node. + Returns: + True if successful other wise False. + """ + + self.check_doc_string(node) + + if node.tolineno - node.fromlineno <= 10: + return True + + if not node.doc: + return True + + doc = Docstring() + doc.parse(node.doc) + + self.all_args_in_doc(node, doc) + self.with_returns(node, doc) + self.with_raises(node, doc) + + def visit_module(self, node): + self.check_doc_string(node) + + def visit_classdef(self, node): + self.check_doc_string(node) + + def check_doc_string(self, node): + self.missing_doc_string(node) + self.one_line(node) + self.has_period(node) + self.indent_style(node) + + def missing_doc_string(self, node): + if node.tolineno - node.fromlineno <= 10: + return True + + if node.doc is None or len(node.doc) < 10: + self.add_message('W9005', node=node, line=node.fromlineno) + return False + + # FIXME(gongwb): give the docstring line-no + def indent_style(self, node, indent=4): + """indent_style checks docstring's indent style + Args: + node (astroid.node): The visiting node. + indent (int): The default indent of style + Returns: + True if successful other wise False. + """ + if node.doc is None: + return True + + doc = node.doc + lines = doc.splitlines() + + for l in lines: + cur_indent = len(l) - len(l.lstrip()) + if cur_indent % indent != 0: + self.add_message('W9006', node=node, line=node.fromlineno) + return False + + return True + + def one_line(self, node): + """one_line checks if docstring (len < 40) is on one line. + Args: + node (astroid.node): The node visiting. + Returns: + True if successful otherwise False. + """ + + doc = node.doc + if doc is None: + return True + + if len(doc) > 40: + return True + elif sum(doc.find(nl) for nl in ('\n', '\r', '\n\r')) == -3: + return True + else: + self.add_message('W9001', node=node, line=node.fromlineno) + return False + + return True + + def has_period(self, node): + """has_period checks if one line doc end-with '.' . + Args: + node (astroid.node): the node is visiting. + Returns: + True if successful otherwise False. + """ + if node.doc is None: + return True + + if len(node.doc.splitlines()) > 1: + return True + + if not node.doc.strip().endswith('.'): + self.add_message('W9002', node=node, line=node.fromlineno) + return False + + return True + + def with_raises(self, node, doc): + """with_raises checks if one line doc end-with '.' . + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object. + Returns: + True if successful otherwise False. + """ + + find = False + for t in node.body: + if not isinstance(t, astroid.Raise): + continue + + find = True + break + + if not find: + return True + + if len(doc.get_raises()) == 0: + self.add_message('W9008', node=node, line=node.fromlineno) + return False + + return True + + def with_returns(self, node, doc): + """with_returns checks if docstring comments what are returned . + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object. + Returns: + True if successful otherwise False. + """ + + find = False + for t in node.body: + if not isinstance(t, astroid.Return): + continue + + find = True + break + + if not find: + return True + + if len(doc.get_returns()) == 0: + self.add_message('W9007', node=node, line=node.fromlineno) + return False + + return True + + def all_args_in_doc(self, node, doc): + """all_args_in_doc checks if arguments are mentioned in doc + Args: + node (astroid.node): the node is visiting. + doc (Docstring): Docstring object + Returns: + True if successful otherwise False. + """ + args = [] + for arg in node.args.get_children(): + if (not isinstance(arg, astroid.AssignName)) \ + or arg.name == "self": + continue + args.append(arg.name) + + if len(args) <= 0: + return True + + parsed_args = doc.args + if len(args) > 0 and len(parsed_args) <= 0: + print "debug:parsed args: ", parsed_args + self.add_message('W9003', node=node, line=node.fromlineno) + return False + + for t in args: + if t not in parsed_args: + print t, " with (type) not in ", parsed_args + self.add_message('W9003', node=node, line=node.fromlineno) + return False + + return True diff --git a/tools/codestyle/pylint_pre_commit.hook b/tools/codestyle/pylint_pre_commit.hook new file mode 100755 index 0000000000000000000000000000000000000000..e7c92ba671e0eb778b2ab5447bea7c4b14fe761b --- /dev/null +++ b/tools/codestyle/pylint_pre_commit.hook @@ -0,0 +1,19 @@ +#!/bin/bash + +TOTAL_ERRORS=0 + + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export PYTHONPATH=$DIR:$PYTHONPATH + +# The trick to remove deleted files: https://stackoverflow.com/a/2413151 +for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do + pylint --disable=all --load-plugins=docstring_checker \ + --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file; + TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); +done + +#exit $TOTAL_ERRORS +#For now, just warning: +exit 0 + diff --git a/tools/codestyle/test_docstring_checker.py b/tools/codestyle/test_docstring_checker.py new file mode 100644 index 0000000000000000000000000000000000000000..0547f7d1610c64b0ca6efa9384e97d658c8276fe --- /dev/null +++ b/tools/codestyle/test_docstring_checker.py @@ -0,0 +1,232 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import docstring_checker +import pylint.testutils +import astroid +import pytest +import sys + + +class TestDocstring(pylint.testutils.CheckerTestCase): + CHECKER_CLASS = docstring_checker.DocstringChecker + + def test_one_line(self): + func_node = astroid.extract_node(''' + def test(): + """get + news. + """ + if True: + return 5 + return 5 + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9001' == got[0][0] + + def test_one_line(self): + func_node = astroid.extract_node(''' + def test(): + """get news""" + if True: + return 5 + return 5 + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9002' == got[0][0] + + def test_args(self): + func_node = astroid.extract_node(''' + def test(scale, mean): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9003' == got[0][0] + + def test_missing(self): + func_node = astroid.extract_node(''' + def test(): + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9005' == got[0][0] + + def test_indent(self): + func_node = astroid.extract_node(''' + def test(): + """ get get get get get get get get + get get get get get get get get. + """ + pass + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9006' == got[0][0] + + def test_with_resturns(self): + func_node = astroid.extract_node(''' + def test(): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + return mean + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9007' == got[0][0] + + def test_with_raises(self): + func_node = astroid.extract_node(''' + def test(): + """get news. + Args: + scale (int): scale is the number. + """ + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + mean=scale + raise ValueError('A very specific bad thing happened.') + ''') + + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 1 + assert 'W9008' == got[0][0] + + def test_no_message(self): + p = ''' +def fc(input, + size, + num_flatten_dims=1, + param_attr=None, + bias_attr=None, + act=None, + name=None): + """ + **Fully Connected Layer** + The fully connected layer can take multiple tensors as its inputs. It + creates a variable called weights for each input tensor, which represents + a fully connected weight matrix from each input unit to each output unit. + The fully connected layer multiplies each input tensor with its coresponding + weight to produce an output Tensor. If multiple input tensors are given, + the results of multiple multiplications will be sumed up. If bias_attr is + not None, a bias variable will be created and added to the output. Finally, + if activation is not None, it will be applied to the output as well. + This process can be formulated as follows: + + Args: + input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of + the input tensor(s) is at least 2. + size(int): The number of output units in this layer. + num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than + two dimensions. If this happens, the multidimensional tensor will first be flattened + into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input + tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1) + dimensions will be flatten to form the first dimension of the final matrix (height of + the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to + form the second dimension of the final matrix (width of the matrix). For example, suppose + `X` is a 6-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3. + Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. + param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable + parameters/weights of this layer. + bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias + of this layer. If it is set to None, no bias will be added to the output units. + act (str, default None): Activation to be applied to the output of this layer. + name (str, default None): The name of this layer. + Returns: + A tensor variable storing the transformation result. + Raises: + ValueError: If rank of the input tensor is less than 2. + Examples: + .. code-block:: python + data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + fc = fluid.layers.fc(input=data, size=1000, act="tanh") + """ + raise ValueError('A very specific bad thing happened.') + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + size = 1 + return size + ''' + + func_node = astroid.extract_node(p) + self.checker.visit_functiondef(func_node) + got = self.linter.release_messages() + assert len(got) == 0