diff --git a/CMakeLists.txt b/CMakeLists.txt index d4fe4f9a0e4b90e34b95ddfba52e22ee762273a0..cfaab206e1f321a55119d4a8d65c4a99d3819fff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) -option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) @@ -58,8 +57,10 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) +option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) +option(WITH_CONTRIB "Compile the third-party contributation" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -156,7 +157,6 @@ include(cupti) include(configure) # add paddle env configuration include(generic) # simplify cmake module include(package) # set paddle packages -include(cpplint) # set paddle c++ style include(ccache) # set ccache for compilation include(util) # set unittest and link libs include(rdma) # set rdma libraries @@ -205,7 +205,7 @@ endif(USE_NNPACK) add_subdirectory(proto) -if(NOT MOBILE_INFERENCE) +if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY) # "add_subdirectory(go)" should be placed after the following loine, # because it depends on paddle/optimizer. add_subdirectory(paddle/optimizer) @@ -233,3 +233,7 @@ if(WITH_DOC) find_python_module(recommonmark REQUIRED) add_subdirectory(doc) endif() + +if (WITH_CONTRIB) + add_subdirectory(paddle/contrib) +endif() diff --git a/Dockerfile b/Dockerfile index ea39efd00bb5c0a7deb3f6d57083d83a673b883c..e5508486d6df6a7465998b7e2926b21a1604dfb4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -101,6 +101,3 @@ RUN echo 'root:root' | chpasswd RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config EXPOSE 22 - -# development image default do build work -CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"] diff --git a/Dockerfile.android b/Dockerfile.android index 848a7eba6f1421432addae8acff407b611adb4ae..48db2efea21a648657e3f490c95429b9a29ede52 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -40,5 +40,3 @@ RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \ unzip -q android-ndk-r14b-linux-x86_64.zip && \ mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ rm -rf /opt/android-ndk-tmp - -CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"] diff --git a/benchmark/cluster/README.md b/benchmark/cluster/README.md deleted file mode 100644 index 64816098a524f064ec12474a736cd4c721227a70..0000000000000000000000000000000000000000 --- a/benchmark/cluster/README.md +++ /dev/null @@ -1,196 +0,0 @@ -# Cluster Training Benchmark - -## Setup - -- Platform - - Kubernetes: v1.6.2 - - Linux Kernel: v3.10.0 - -- Resource - - CPU: 10 Cores per Pod - - Memory: 5GB per Pod - -- Docker Image - - We use different base Docker Image to run the benchmark on Kubernetes: - - PaddlePaddle v2: paddlepaddle/paddle:0.11.0 - - PaddlePaddle Fluid: paddlepaddle/paddle:[commit-id] - - TensorFlow: tensorflow/tensorflow:1.5.0-rc0 - -- Model - vgg16 is used in this benchmark. - -## Cases - -- Variable - - Batch Size of training data. - - PServer count of the training job. - - The number of trainers. - -- Invariant - - The resource of trainer/pserver Pod. - -### Measure the Performance for Different Batch Size - -- PServer Count: 40 -- Trainer Count: 100 -- Metrics: mini-batch / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Batch Size 3264128 256
PaddlePaddle Fluid-- - -
PaddlePaddle v2 - - - -
TensorFlow - - - -
- -### Measure the Performance for Different PServer Count - -- Trainer Count: 100 -- Batch Size: 64 -- Metrics: mini-batch / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PServer Count 102040 60
PaddlePaddle Fluid-- - -
PaddlePaddle v2 - - - -
TensorFlow - - - -
- -### Measure Parallel Efficiency By Increasing Trainer Count - -- PServer Count: 20 -- Batch Size: 64 -- Metrics: - -$S = \div(T1, TN)$ - -which S is the ratio of T1 over TN, training time of 1 and N trainers. -The parallel efficiency is: - -$E = \div(S, N)$ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Trainer Counter 11020 30405060 708090100
PaddlePaddle Fluid-- - - -- - - -- -
PaddlePaddle v2 - - - - -- - - -- -
TensorFlow - - - - -- - - -- -
- - -## Reproduce the benchmark - -TODO diff --git a/benchmark/cluster/vgg16/Dockerfile b/benchmark/cluster/vgg16/Dockerfile deleted file mode 100644 index 13ad8e1b6237e6f41a076c4fb54311728832ae33..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04 - -# you can get mirror list here: -# https://launchpad.net/ubuntu/+archivemirrors -ARG UBUNTU_MIRROR -RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' - -RUN apt-get update && apt-get install -y python python-dev python-pip iputils-ping libgtk2.0-dev -RUN pip install -U kubernetes opencv-python - -RUN pip install paddlepaddle -# if network is slowly, you may need to add proxy here. -# ENV https_proxy= -RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python' -RUN pip uninstall -y paddlepaddle -# unset proxy if it is setted. -# ENV https_proxy="" - -# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF, -# so we must build one with distribute support to install in this image. -ADD *.whl / -RUN pip install /*.whl && rm -f /*.whl -ENV LD_LIBRARY_PATH=/usr/local/lib - -# tf k8s -RUN pip install tensorflow==1.4.0 -ADD tf_k8s /usr/bin -RUN chmod +x /usr/bin/tf_k8s -ADD vgg16_tf.py /workspace/ - -# below lines may change a lot for debugging -ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin -ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root -RUN chmod +x /usr/bin/paddle_k8s -ADD vgg16_fluid.py vgg16_v2.py /workspace/ diff --git a/benchmark/cluster/vgg16/README.md b/benchmark/cluster/vgg16/README.md deleted file mode 100644 index d56a912b9b03986e32693363f82df05a34b779e9..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/README.md +++ /dev/null @@ -1,195 +0,0 @@ -# Performance for Distributed vgg16 - -## Test Result - -### Hardware Infomation - -- CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz -- cpu MHz : 2101.000 -- cache size : 20480 KB - -### Blas settings - -Setting environment variable: `MKL_NUM_THREADS=1`. - -### Single Node Single Thread - -- Metrics: samples / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Batch Size 3264128 256
PaddlePaddle Fluid 15.44 16.32 16.74 16.79
PaddlePaddle v2 15.97 17.04 17.60 17.83
TensorFlow 9.09 9.10 9.24 8.66
- - -### Different Batch Size - -- PServer Count: 10 -- Trainer Count: 20 -- Metrics: samples / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Batch Size 3264128 256
PaddlePaddle Fluid 190.20 222.15 247.40 258.18
PaddlePaddle v2 170.96 233.71 256.14 329.23
TensorFlow - - - -
- -### Accelerate Rate - -- Pserver Count: 20 -- Batch Size: 128 -- Metrics: samples / sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Trainer Count 204080100
PaddlePaddle Fluid 263.29 (78.64%) 518.80 (77.47%) 836.26 (62.44%) 1019.29 (60.89%)
PaddlePaddle v2 (need more tests) 326.85 (92.85%) 534.58 (75.93%) 853.30 (60.60%) 1041.99 (59.20%)
TensorFlow - - - -
- - -### Different Pserver Count - -- Trainer Count: 60 -- Batch Size: 128 -- Metrics: samples/ sec - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PServer Count 361020
PaddlePaddle Fluid(should fix in next PR) 589.1 592.6 656.4 655.8
PaddlePaddle v2 (need more tests) 593.4 791.3 729.7 821.7
TensorFlow - - - -
- - -*The performance gap between Fuild and v2 comes from the network interference.* - - -## Steps to Run the Performance Test - -1. You must re-compile PaddlePaddle and enable `-DWITH_DISTRIBUTE` to build PaddlePaddle with distributed support. -1. When the build finishes, copy the output `whl` package located under `build/python/dist` to current directory. -1. Run `docker build -t [image:tag] .` to build the docker image and run `docker push [image:tag]` to push the image to reponsitory so kubernetes can find it. -1. Run `kubectl create -f pserver.yaml && kubectl create -f trainer.yaml` to start the job on your kubernetes cluster (you must configure the `kubectl` client before this step). -1. Run `kubectl get po` to get running pods, and run `kubectl logs [podID]` to fetch the pod log of pservers and trainers. - -Check the logs for the distributed training progress and analyze the performance. - -## Enable Verbos Logs - -Edit `pserver.yaml` and `trainer.yaml` and add an environment variable `GLOG_v=3` and `GLOG_logtostderr=1` to see what happend in detail. diff --git a/benchmark/cluster/vgg16/fluid_pserver.yaml b/benchmark/cluster/vgg16/fluid_pserver.yaml deleted file mode 100644 index ee8b0763b62fc011f40f6197e929a68b48a93e47..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/fluid_pserver.yaml +++ /dev/null @@ -1,72 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: ReplicaSet -metadata: - name: vgg16job-pserver -spec: - replicas: 10 - template: - metadata: - labels: - paddle-job-pserver: vgg16job - spec: - hostNetwork: true - imagePullSecrets: - - name: job-registry-secret - containers: - - name: pserver - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PADDLE_JOB_NAME - value: vgg16job - - name: MKL_NUM_THREADS - value: "1" - - name: TRAINING_ROLE - value: "PSERVER" - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "1" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - command: ["paddle_k8s", "start_fluid"] - resources: - requests: - memory: 10Gi - cpu: 4 - limits: - memory: 10Gi - cpu: 4 diff --git a/benchmark/cluster/vgg16/fluid_trainer.yaml b/benchmark/cluster/vgg16/fluid_trainer.yaml deleted file mode 100644 index 3d56caac009464d1073423bb63abff1f8b0cf28f..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/fluid_trainer.yaml +++ /dev/null @@ -1,69 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: vgg16job-trainer -spec: - parallelism: 20 - completions: 20 - template: - metadata: - labels: - paddle-job: vgg16job - spec: - imagePullSecrets: - - name: job-registry-secret - hostNetwork: true - containers: - - name: trainer - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - command: ["paddle_k8s", "start_fluid"] - env: - - name: PADDLE_JOB_NAME - value: vgg16job - - name: TRAINING_ROLE - value: "TRAINER" - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0 --batch_size 128" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "1" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - resources: - requests: - memory: 40Gi - cpu: 2 - limits: - memory: 40Gi - cpu: 2 - restartPolicy: Never diff --git a/benchmark/cluster/vgg16/run_vgg_dist.sh b/benchmark/cluster/vgg16/run_vgg_dist.sh deleted file mode 100644 index 8c0501439e9d5fa175f5aa9b62d286e690a10904..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/run_vgg_dist.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Update to point to the source file. -VGG_SRC="vgg16_fluid.py" - -export TRAINING_ROLE=PSERVER -export TRAINERS=2 -export POD_IP=127.0.0.1 -export PADDLE_INIT_PORT=6174 -MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 & - -# Need to wait for the ps to start first. -sleep 10 -echo "done start ps" - -export TRAINING_ROLE=TRAINER -export TRAINERS=2 -export POD_IP=127.0.0.1 -export PADDLE_INIT_PORT=6174 -CUDA_VISIBLE_DEVICES=4 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=0 & -CUDA_VISIBLE_DEVICES=5 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=1 & diff --git a/benchmark/cluster/vgg16/tf_k8s b/benchmark/cluster/vgg16/tf_k8s deleted file mode 100644 index 4fc263d5f681aeabfa71f1758714d269d987b272..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/tf_k8s +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash -check_trainer_ret() { - ret=$1 - stdbuf -oL echo "job returned $ret...setting pod return message..." - stdbuf -oL echo "===============================" - - if [ $ret -eq 136 ] ; then - echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log - elif [ $ret -eq 139 ] ; then - echo "Segmentation Fault" > /dev/termination-log - elif [ $ret -eq 1 ] ; then - echo "General Error" > /dev/termination-log - elif [ $ret -eq 134 ] ; then - echo "Program Abort" > /dev/termination-log - fi - stdbuf -oL echo "termination log wroted..." - exit $ret -} - -g_pservers="" -g_trainers="" - -wait_running_pods(){ - pserver_label="tf-job-pserver=${JOB_NAME}" - trainer_label="tf-job-trainer=${JOB_NAME}" - - stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS_NUM} - stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS_NUM} - - g_pservers=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PORT}) - g_trainers=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PORT}) -} - -start_tf_pserver(){ - wait_running_pods - - label="tf-job-pserver=${JOB_NAME}" - pserver_id=$(python /root/k8s_tools.py fetch_id ${label}) - - cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \ - --job_name=${TF_JOB_NAME} --task_index=${pserver_id}" - - stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}" -} - -start_tf_trainer(){ - wait_running_pods - - label="tf-job-trainer=${JOB_NAME}" - trainer_id=$(python /root/k8s_tools.py fetch_id ${label}) - - cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \ - --job_name=${TF_JOB_NAME} --task_index=${trainer_id} --batch_size=${BATCH_SIZE}" - - stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}" - check_trainer_ret $? -} - -start_tf(){ - if [[ "${TF_JOB_NAME}" == "worker" ]]; then - start_tf_trainer - else - start_tf_pserver - fi -} - -usage() { - echo "usage: tf_k8s []:" - echo " start_tf Start tensorflow jobs" -} - -case "$1" in - start_tf) - start_tf - ;; - --help) - usage - ;; - *) - usage - ;; -esac diff --git a/benchmark/cluster/vgg16/tf_pserver.yaml b/benchmark/cluster/vgg16/tf_pserver.yaml deleted file mode 100644 index 5e37c700819119c8af05c40fe4b8d13911efc3e1..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/tf_pserver.yaml +++ /dev/null @@ -1,56 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: ReplicaSet -metadata: - name: vgg16job-tf-pserver -spec: - replicas: 10 - template: - metadata: - labels: - tf-job-pserver: vgg16job-tf - spec: - hostNetwork: true - imagePullSecrets: - - name: job-registry-secret - containers: - - name: pserver - image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16" - imagePullPolicy: Always - command: ["tf_k8s", "start_tf"] - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PORT - value: "32036" - - name: ENTRY - value: "python vgg16_tf.py" - - name: JOB_NAME - value: vgg16job-tf - - name: PSERVERS_NUM - value: "10" - - name: TF_JOB_NAME - value: "ps" - - name: TRAINERS_NUM - value: "20" - - name: BATCH_SIZE - value: "128" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: NUM_PASSES - value: "1" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - resources: - requests: - memory: 10Gi - cpu: 4 - limits: - memory: 10Gi - cpu: 4 diff --git a/benchmark/cluster/vgg16/tf_trainer.yaml b/benchmark/cluster/vgg16/tf_trainer.yaml deleted file mode 100644 index 08795df3addfa7b618db24a65e57be190e268f06..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/tf_trainer.yaml +++ /dev/null @@ -1,58 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: vgg16job-tf-trainer -spec: - parallelism: 20 - completions: 20 - template: - metadata: - labels: - tf-job-trainer: vgg16job-tf - spec: - imagePullSecrets: - - name: job-registry-secret - hostNetwork: true - containers: - - name: trainer - image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16" - imagePullPolicy: Always - command: ["tf_k8s", "start_tf"] - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PORT - value: "32036" - - name: JOB_NAME - value: vgg16job-tf - - name: TF_JOB_NAME - value: "worker" - - name: ENTRY - value: "python vgg16_tf.py" - - name: PSERVERS_NUM - value: "10" - - name: BATCH_SIZE - value: "128" - - name: TRAINERS_NUM - value: "20" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: NUM_PASSES - value: "1" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: "status.podIP" - resources: - requests: - memory: 40Gi - cpu: 2 - limits: - memory: 40Gi - cpu: 2 - restartPolicy: Never diff --git a/benchmark/cluster/vgg16/v2_pserver.yaml b/benchmark/cluster/vgg16/v2_pserver.yaml deleted file mode 100644 index dd1271e0cf399184134c06b3200ee1202c65cef0..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/v2_pserver.yaml +++ /dev/null @@ -1,64 +0,0 @@ -apiVersion: extensions/v1beta1 -kind: ReplicaSet -metadata: - name: vgg16v2job-pserver -spec: - replicas: 10 - template: - metadata: - labels: - paddle-job-pserver: vgg16v2job - spec: - hostNetwork: true - imagePullSecrets: - - name: job-registry-secret - containers: - - name: pserver - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - ports: - - name: jobport-30236 - containerPort: 30236 - env: - - name: PADDLE_JOB_NAME - value: vgg16v2job - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "python train.py" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "1" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - command: ["paddle_k8s", "start_pserver"] - resources: - requests: - memory: 10Gi - cpu: 4 - limits: - memory: 10Gi - cpu: 4 diff --git a/benchmark/cluster/vgg16/v2_trainer.yaml b/benchmark/cluster/vgg16/v2_trainer.yaml deleted file mode 100644 index 12c8964066cbcfe8d2a44de2f51a3d12ea422fe2..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/v2_trainer.yaml +++ /dev/null @@ -1,65 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: vgg16v2job-trainer -spec: - parallelism: 20 - completions: 20 - template: - metadata: - labels: - paddle-job: vgg16v2job - spec: - imagePullSecrets: - - name: job-registry-secret - hostNetwork: true - containers: - - name: trainer - image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16" - imagePullPolicy: Always - command: ["paddle_k8s", "start_trainer", "v2"] - env: - - name: PADDLE_JOB_NAME - value: vgg16v2job - - name: BATCH_SIZE - value: "256" - - name: TRAINERS - value: "20" - - name: PSERVERS - value: "10" - - name: TOPOLOGY - value: "" - - name: ENTRY - value: "cd /workspace && MKL_NUM_THREADS=1 python /workspace/vgg16_v2.py" - - name: TRAINER_PACKAGE - value: "/workspace" - - name: PADDLE_INIT_PORT - value: "30236" - - name: PADDLE_INIT_NICS - value: "xgbe0" - - name: PADDLE_INIT_TRAINER_COUNT - value: "1" - - name: PADDLE_INIT_PORTS_NUM - value: "1" - - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE - value: "1" - - name: PADDLE_INIT_NUM_GRADIENT_SERVERS - value: "20" - - name: PADDLE_INIT_NUM_PASSES - value: "2" - - name: PADDLE_INIT_USE_GPU - value: "0" - - name: LD_LIBRARY_PATH - value: "/usr/local/lib:/usr/local/nvidia/lib64" - - name: NAMESPACE - valueFrom: - fieldRef: - fieldPath: "metadata.namespace" - resources: - requests: - memory: 40Gi - cpu: 2 - limits: - memory: 40Gi - cpu: 2 - restartPolicy: Never diff --git a/benchmark/cluster/vgg16/vgg16_fluid.py b/benchmark/cluster/vgg16/vgg16_fluid.py deleted file mode 100644 index 05b5f3977cbed2f08df73c6d8ba2fff687db3313..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/vgg16_fluid.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""VGG16 benchmark in Fluid""" -from __future__ import print_function - -import sys -import time -import numpy as np -import paddle.v2 as paddle -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.profiler as profiler -import argparse -import functools -import os -from paddle.fluid import debuger - - -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') - - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - '--batch_size', type=int, default=128, help="Batch size for training.") -parser.add_argument( - '--learning_rate', - type=float, - default=1e-3, - help="Learning rate for training.") -parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.") -parser.add_argument( - '--device', - type=str, - default='CPU', - choices=['CPU', 'GPU'], - help="The device type.") -parser.add_argument('--device_id', type=int, default=0, help="The device id.") -parser.add_argument( - '--data_format', - type=str, - default='NCHW', - choices=['NCHW', 'NHWC'], - help='The data order, now only support NCHW.') -parser.add_argument( - '--data_set', - type=str, - default='cifar10', - choices=['cifar10', 'flowers'], - help='Optional dataset for benchmark.') -parser.add_argument( - '--local', - type=str2bool, - default=True, - help='Whether to run as local mode.') - -parser.add_argument( - "--ps_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--trainer_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--profile", action='store_true', help="If set, profile a few steps.") - -# Flags for defining the tf.train.Server -parser.add_argument( - "--task_index", type=int, default=0, help="Index of task within the job") -args = parser.parse_args() - - -def vgg16_bn_drop(input): - def conv_block(input, num_filter, groups, dropouts): - return fluid.nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max') - - conv1 = conv_block(input, 64, 2, [0.3, 0]) - conv2 = conv_block(conv1, 128, 2, [0.4, 0]) - conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) - conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) - conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - - drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) - fc1 = fluid.layers.fc(input=drop, size=4096, act=None) - bn = fluid.layers.batch_norm(input=fc1, act='relu') - drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) - fc2 = fluid.layers.fc(input=drop2, size=4096, act=None) - return fc2 - - -def main(): - if args.data_set == "cifar10": - classdim = 10 - if args.data_format == 'NCHW': - data_shape = [3, 32, 32] - else: - data_shape = [32, 32, 3] - else: - classdim = 102 - if args.data_format == 'NCHW': - data_shape = [3, 224, 224] - else: - data_shape = [224, 224, 3] - - # Input data - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - - # Train program - net = vgg16_bn_drop(images) - predict = fluid.layers.fc(input=net, size=classdim, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(x=cost) - - # Evaluator - batch_size = fluid.layers.create_tensor(dtype='int64') - batch_acc = fluid.layers.accuracy( - input=predict, label=label, total=batch_size) - - # inference program - inference_program = fluid.default_main_program().clone() - with fluid.program_guard(inference_program): - inference_program = fluid.io.get_inference_program(batch_acc) - - # Optimization - optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) - optimize_ops, params_grads = optimizer.minimize(avg_cost) - - # Initialize executor - place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace( - args.device_id) - exe = fluid.Executor(place) - - # test - def test(exe): - test_pass_acc = fluid.average.WeightedAverage() - for batch_id, data in enumerate(test_reader()): - img_data = np.array(map(lambda x: x[0].reshape(data_shape), - data)).astype("float32") - y_data = np.array(map(lambda x: x[1], data)).astype("int64") - y_data = y_data.reshape([-1, 1]) - - outs = exe.run(inference_program, - feed={"pixel": img_data, - "label": y_data}, - fetch_list=[batch_acc, batch_size]) - test_pass_acc.add(value=np.array(outs[0]), weight=np.array(outs[1])) - - return test_pass_acc.eval() - - def train_loop(exe, trainer_prog): - iters = 0 - ts = time.time() - train_pass_acc = fluid.average.WeightedAverage() - for pass_id in range(args.num_passes): - # train - start_time = time.time() - num_samples = 0 - train_pass_acc.reset() - - def run_step(batch_id, data): - img_data = np.array( - map(lambda x: x[0].reshape(data_shape), data)).astype( - "float32") - y_data = np.array(map(lambda x: x[1], data)).astype("int64") - y_data = y_data.reshape([-1, 1]) - - loss, acc, b_size = exe.run( - trainer_prog, - feed={"pixel": img_data, - "label": y_data}, - fetch_list=[avg_cost, batch_acc, batch_size]) - return loss, acc, b_size - - if args.profile and args.task_index == 0: - # warmup. - for batch_id, data in enumerate(train_reader()): - if batch_id > 5: break - run_step(batch_id, data) - with profiler.profiler('All', 'total', '/tmp/profile_vgg'): - for batch_id, data in enumerate(train_reader()): - if batch_id > 5: break - run_step(batch_id, data) - - for batch_id, data in enumerate(train_reader()): - ts = time.time() - loss, acc, b_size = run_step(batch_id, data) - iters += 1 - num_samples += len(data) - train_pass_acc.add(value=acc, weight=b_size) - print( - "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, " - "Speed = %.2f img/s" % (pass_id, iters, loss, acc, - len(data) / (time.time() - ts)) - ) # The accuracy is the accumulation of batches, but not the current batch. - - pass_elapsed = time.time() - start_time - pass_train_acc = train_pass_acc.eval() - pass_test_acc = test(exe) - print("Task:%d Pass = %d, Training performance = %f imgs/s, " - "Train accuracy = %f, Test accuracy = %f\n" % - (args.task_index, pass_id, num_samples / pass_elapsed, - pass_train_acc, pass_test_acc)) - - if args.local: - # Parameter initialization - exe.run(fluid.default_startup_program()) - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() if args.data_set == 'cifar10' - else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.dataset.cifar.test10() - if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), - batch_size=args.batch_size) - train_loop(exe, fluid.default_main_program()) - else: - trainers = int(os.getenv("TRAINERS")) # total trainer count - print("trainers total: ", trainers) - - training_role = os.getenv( - "TRAINING_ROLE", - "TRAINER") # get the training role: trainer/pserver - - t = fluid.DistributeTranspiler() - t.transpile( - trainer_id=args.task_index, - pservers=args.ps_hosts, - trainers=trainers) - - if training_role == "PSERVER": - current_endpoint = os.getenv("POD_IP") + ":" + os.getenv( - "PADDLE_INIT_PORT") - if not current_endpoint: - print("need env SERVER_ENDPOINT") - exit(1) - pserver_prog = t.get_pserver_program(current_endpoint) - pserver_startup = t.get_startup_program(current_endpoint, - pserver_prog) - exe.run(pserver_startup) - exe.run(pserver_prog) - elif training_role == "TRAINER": - # Parameter initialization - exe.run(fluid.default_startup_program()) - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() if args.data_set == 'cifar10' - else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else - paddle.dataset.flowers.test(), - batch_size=args.batch_size) - - trainer_prog = t.get_trainer_program() - feeder = fluid.DataFeeder(feed_list=[images, label], place=place) - # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver - exe.run(fluid.default_startup_program()) - train_loop(exe, trainer_prog) - else: - print("environment var TRAINER_ROLE should be TRAINER os PSERVER") - - -def print_arguments(): - print('----------- Configuration Arguments -----------') - for arg, value in sorted(vars(args).iteritems()): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -if __name__ == "__main__": - print_arguments() - main() diff --git a/benchmark/cluster/vgg16/vgg16_tf.py b/benchmark/cluster/vgg16/vgg16_tf.py deleted file mode 100644 index 2d220478acae46566760209dbc012cff316946aa..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/vgg16_tf.py +++ /dev/null @@ -1,366 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""VGG16 benchmark in TensorFlow -You can get distribution example template structure here: -https://medium.com/clusterone/how-to-write-distributed-tensorflow-code-with-an-example-on-tensorport-70bf3306adcb -https://www.tensorflow.org/deploy/distributed -""" - -import tensorflow as tf -import paddle.v2 as paddle -import numpy as np -import argparse -import time - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument( - '--batch_size', type=int, default=128, help="Batch size for training.") -parser.add_argument( - '--learning_rate', - type=float, - default=1e-3, - help="Learning rate for training.") -parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.") -parser.add_argument( - '--device', - type=str, - default='CPU', - choices=['CPU', 'GPU'], - help="The device type.") -parser.add_argument( - '--data_format', - type=str, - default='NHWC', - choices=['NCHW', 'NHWC'], - help='The data order, NCHW=[batch, channels, height, width].' - 'Only support NHWC right now.') -parser.add_argument( - '--data_set', - type=str, - default='cifar10', - choices=['cifar10', 'flowers'], - help='Optional dataset for benchmark.') - -parser.add_argument( - "--ps_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--worker_hosts", - type=str, - default="", - help="Comma-separated list of hostname:port pairs") -parser.add_argument( - "--job_name", type=str, default="", help="One of 'worker', 'ps'") -# Flags for defining the tf.train.Server -parser.add_argument( - "--task_index", type=int, default=0, help="Index of task within the job") - -args = parser.parse_args() - - -class VGG16Model(object): - def __init__(self): - self.parameters = [] - - def batch_norm_relu(self, inputs, is_training): - """Performs a batch normalization followed by a ReLU.""" - # We set fused=True for a significant speed boost. See - # https://www.tensorflow.org/speed/speed_guide#common_fused_ops - inputs = tf.layers.batch_normalization( - inputs=inputs, - axis=1 if args.data_format == 'NCHW' else -1, - momentum=0.9, - epsilon=1e-05, - center=True, - scale=True, - training=is_training, - fused=True) - inputs = tf.nn.relu(inputs) - return inputs - - def conv_bn_layer(self, - name, - images, - kernel_shape, - is_training, - drop_rate=0.0): - with tf.name_scope(name) as scope: - kernel = tf.Variable( - tf.truncated_normal( - kernel_shape, dtype=tf.float32, stddev=1e-1), - name='weights') - conv = tf.nn.conv2d( - images, - kernel, [1, 1, 1, 1], - data_format=args.data_format, - padding='SAME') - biases = tf.Variable( - tf.constant( - 0.0, shape=[kernel_shape[-1]], dtype=tf.float32), - trainable=True, - name='biases') - out = tf.nn.bias_add(conv, biases) - out = self.batch_norm_relu(out, is_training) - out = tf.layers.dropout(out, rate=drop_rate, training=is_training) - return out - - def fc_layer(self, name, inputs, shape): - with tf.name_scope(name) as scope: - fc_w = tf.Variable( - tf.truncated_normal( - shape, dtype=tf.float32, stddev=1e-1), - name='weights') - fc_b = tf.Variable( - tf.constant( - 0.0, shape=[shape[-1]], dtype=tf.float32), - trainable=True, - name='biases') - out = tf.nn.bias_add(tf.matmul(inputs, fc_w), fc_b) - return out - - def network(self, images, class_dim, is_training): - """ VGG16 model structure. - - TODO(kuke): enable this network to support the 'NCHW' data format - """ - - # conv1 - conv1_1 = self.conv_bn_layer( - 'conv1_1', images, [3, 3, 3, 64], is_training, drop_rate=0.3) - conv1_2 = self.conv_bn_layer( - 'conv1_2', conv1_1, [3, 3, 64, 64], is_training, drop_rate=0.0) - # pool1 - pool1 = tf.nn.max_pool( - conv1_2, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool1') - # conv2 - conv2_1 = self.conv_bn_layer( - 'conv2_1', pool1, [3, 3, 64, 128], is_training, drop_rate=0.4) - conv2_2 = self.conv_bn_layer( - 'conv2_2', conv2_1, [3, 3, 128, 128], is_training, drop_rate=0.0) - # pool2 - pool2 = tf.nn.max_pool( - conv2_2, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool2') - # conv3 - conv3_1 = self.conv_bn_layer( - 'conv3_1', pool2, [3, 3, 128, 256], is_training, drop_rate=0.4) - conv3_2 = self.conv_bn_layer( - 'conv3_2', conv3_1, [3, 3, 256, 256], is_training, drop_rate=0.4) - conv3_3 = self.conv_bn_layer( - 'conv3_3', conv3_2, [3, 3, 256, 256], is_training, drop_rate=0.0) - # pool3 - pool3 = tf.nn.max_pool( - conv3_3, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool3') - # conv4 - conv4_1 = self.conv_bn_layer( - 'conv4_1', pool3, [3, 3, 256, 512], is_training, drop_rate=0.4) - conv4_2 = self.conv_bn_layer( - 'conv4_2', conv4_1, [3, 3, 512, 512], is_training, drop_rate=0.4) - conv4_3 = self.conv_bn_layer( - 'conv4_3', conv4_2, [3, 3, 512, 512], is_training, drop_rate=0.0) - # pool4 - pool4 = tf.nn.max_pool( - conv4_3, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool4') - # conv5 - conv5_1 = self.conv_bn_layer( - 'conv5_1', pool4, [3, 3, 512, 512], is_training, drop_rate=0.4) - conv5_2 = self.conv_bn_layer( - 'conv5_2', conv5_1, [3, 3, 512, 512], is_training, drop_rate=0.4) - conv5_3 = self.conv_bn_layer( - 'conv5_3', conv5_2, [3, 3, 512, 512], is_training, drop_rate=0.0) - # pool5 - pool5 = tf.nn.max_pool( - conv5_3, - ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], - padding='SAME', - name='pool4') - # flatten - shape = int(np.prod(pool5.get_shape()[1:])) - pool5_flat = tf.reshape(pool5, [-1, shape]) - # fc1 - drop = tf.layers.dropout(pool5_flat, rate=0.5, training=is_training) - fc1 = self.fc_layer('fc1', drop, [shape, 512]) - # fc2 - bn = self.batch_norm_relu(fc1, is_training) - drop = tf.layers.dropout(bn, rate=0.5, training=is_training) - fc2 = self.fc_layer('fc2', drop, [512, 512]) - - fc3 = self.fc_layer('fc3', fc2, [512, class_dim]) - - return fc3 - - -def run_benchmark(cluster_spec, server): - """Run benchmark on cifar10 or flowers.""" - - if args.data_set == "cifar10": - class_dim = 10 - raw_shape = (3, 32, 32) - dat_shape = (None, 32, 32, 3) if args.data_format == 'NHWC' else ( - None, 3, 32, 32) - else: - class_dim = 102 - raw_shape = (3, 224, 224) - dat_shape = (None, 224, 224, 3) if args.data_format == 'NHWC' else ( - None, 3, 224, 224) - - device = tf.train.replica_device_setter( - worker_device="/job:worker/task:{}".format(args.task_index), - cluster=cluster_spec) - - with tf.device(device): - images = tf.placeholder(tf.float32, shape=dat_shape) - labels = tf.placeholder(tf.int64, shape=(None, )) - is_training = tf.placeholder('bool') - onehot_labels = tf.one_hot(labels, depth=class_dim) - - vgg16 = VGG16Model() - logits = vgg16.network(images, class_dim, is_training) - loss = tf.losses.softmax_cross_entropy( - onehot_labels=onehot_labels, logits=logits) - avg_loss = tf.reduce_mean(loss) - - correct = tf.equal(tf.argmax(logits, 1), labels) - accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) - - optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - global_step = tf.Variable(0, name='global_step', trainable=False) - with tf.control_dependencies(update_ops): - train_op = optimizer.minimize(avg_loss, global_step=global_step) - - summary_op = tf.summary.merge_all() - init_op = tf.global_variables_initializer() - - # data reader - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10() - if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), - buf_size=5120), - batch_size=args.batch_size) - test_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.test10() - if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), - buf_size=5120), - batch_size=args.batch_size) - - # test - def test(): - test_accs = [] - for batch_id, data in enumerate(test_reader()): - test_images = np.array( - map(lambda x: np.transpose(x[0].reshape(raw_shape), - axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32") - test_labels = np.array(map(lambda x: x[1], data)).astype('int64') - test_accs.append( - accuracy.eval(feed_dict={ - images: test_images, - labels: test_labels, - is_training: False - })) - return np.mean(test_accs) - - config = tf.ConfigProto( - intra_op_parallelism_threads=1, - inter_op_parallelism_threads=1, - log_device_placement=True) - config.gpu_options.allow_growth = True - - hooks = [tf.train.StopAtStepHook(last_step=1000000)] - - with tf.train.MonitoredTrainingSession( - master=server.target, - is_chief=(args.task_index == 0), - hooks=hooks, - config=config) as sess: - iters, num_samples, start_time = 0, 0, 0.0 - for pass_id in range(args.num_passes): - # train - num_samples = 0 - start_time = time.time() - for batch_id, data in enumerate(train_reader()): - train_images = np.array( - map(lambda x: np.transpose(x[0].reshape(raw_shape), - axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32") - train_labels = np.array(map(lambda x: x[1], data)).astype( - 'int64') - iter_begin_time = time.time() - _, loss, acc = sess.run([train_op, avg_loss, accuracy], - feed_dict={ - images: train_images, - labels: train_labels, - is_training: True - }) - iters += 1 - print( - "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed=%.2f imgs/sec" - % (pass_id, iters, loss, acc, - len(data) / (time.time() - iter_begin_time))) - num_samples += len(data) - train_elapsed = time.time() - start_time - # test - pass_test_acc = test() - print("Pass = %d, Train speed = %f imgs/s, Test accuracy = %f\n" % - (pass_id, num_samples / train_elapsed, pass_test_acc)) - - -def print_arguments(): - print('----------- Configuration Arguments -----------') - for arg, value in sorted(vars(args).iteritems()): - print('%s: %s' % (arg, value)) - print('------------------------------------------------') - - -if __name__ == '__main__': - print_arguments() - - ps_hosts = args.ps_hosts.split(",") - worker_hosts = args.worker_hosts.split(",") - - # Create a cluster from the parameter server and worker hosts. - cluster_spec = tf.train.ClusterSpec({ - "ps": ps_hosts, - "worker": worker_hosts - }) - - # Create and start a server for the local task. - server = tf.train.Server( - cluster_spec, job_name=args.job_name, task_index=args.task_index) - - if args.job_name == "ps": - print("start pserver") - server.join() - elif args.job_name == "worker": - print("start worker") - run_benchmark(cluster_spec, server) diff --git a/benchmark/cluster/vgg16/vgg16_v2.py b/benchmark/cluster/vgg16/vgg16_v2.py deleted file mode 100644 index 1a66af32d7131997c63bd3c3042875f33a467084..0000000000000000000000000000000000000000 --- a/benchmark/cluster/vgg16/vgg16_v2.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import gzip - -import paddle.v2.dataset.cifar as cifar -import paddle.v2 as paddle -import time -import os - -DATA_DIM = 3 * 32 * 32 -CLASS_DIM = 10 -BATCH_SIZE = os.getenv("BATCH_SIZE") -if BATCH_SIZE: - BATCH_SIZE = int(BATCH_SIZE) -else: - BATCH_SIZE = 128 -print "batch_size", BATCH_SIZE -NODE_COUNT = int(os.getenv("TRAINERS")) -ts = 0 - - -def vgg(input, nums, class_dim): - def conv_block(input, num_filter, groups, num_channels=None): - return paddle.networks.img_conv_group( - input=input, - num_channels=num_channels, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act=paddle.activation.Relu(), - pool_type=paddle.pooling.Max()) - - assert len(nums) == 5 - # the channel of input feature is 3 - conv1 = conv_block(input, 64, nums[0], 3) - conv2 = conv_block(conv1, 128, nums[1]) - conv3 = conv_block(conv2, 256, nums[2]) - conv4 = conv_block(conv3, 512, nums[3]) - conv5 = conv_block(conv4, 512, nums[4]) - - fc_dim = 512 - fc1 = paddle.layer.fc(input=conv5, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - fc2 = paddle.layer.fc(input=fc1, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - out = paddle.layer.fc(input=fc2, - size=class_dim, - act=paddle.activation.Softmax()) - return out - - -def vgg13(input, class_dim): - nums = [2, 2, 2, 2, 2] - return vgg(input, nums, class_dim) - - -def vgg16(input, class_dim): - nums = [2, 2, 3, 3, 3] - return vgg(input, nums, class_dim) - - -def vgg19(input, class_dim): - nums = [2, 2, 4, 4, 4] - return vgg(input, nums, class_dim) - - -def main(): - global ts - paddle.init(use_gpu=False) - image = paddle.layer.data( - name="image", type=paddle.data_type.dense_vector(DATA_DIM)) - lbl = paddle.layer.data( - name="label", type=paddle.data_type.integer_value(CLASS_DIM)) - - extra_layers = None - # NOTE: for v2 distributed training need averaging updates. - learning_rate = 1e-3 / NODE_COUNT - out = vgg16(image, class_dim=CLASS_DIM) - cost = paddle.layer.classification_cost(input=out, label=lbl) - - # Create parameters - parameters = paddle.parameters.create(cost) - - # Create optimizer - optimizer = paddle.optimizer.Momentum( - momentum=0.9, - regularization=paddle.optimizer.L2Regularization(rate=0.0005 * - BATCH_SIZE), - learning_rate=learning_rate / BATCH_SIZE, - learning_rate_decay_a=0.1, - learning_rate_decay_b=128000 * 35, - learning_rate_schedule="discexp", ) - - train_reader = paddle.batch( - paddle.reader.shuffle( - cifar.train10(), - # To use other data, replace the above line with: - # reader.train_reader('train.list'), - buf_size=1000), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - cifar.test10(), - # To use other data, replace the above line with: - # reader.test_reader('val.list'), - batch_size=BATCH_SIZE) - - # Create trainer - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=optimizer, - extra_layers=extra_layers, - is_local=False) - - # End batch and end pass event handler - def event_handler(event): - global ts, ts_pass - if isinstance(event, paddle.event.BeginPass): - ts_pass = time.time() - if isinstance(event, paddle.event.BeginIteration): - ts = time.time() - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 1 == 0: - print "\nPass %d, Batch %d, Cost %f, %s, spent: %f" % ( - event.pass_id, event.batch_id, event.cost, event.metrics, - time.time() - ts) - if isinstance(event, paddle.event.EndPass): - print "Pass %d end, spent: %f" % (event.pass_id, - time.time() - ts_pass) - result = trainer.test(reader=test_reader) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) - - trainer.train( - reader=train_reader, num_passes=200, event_handler=event_handler) - - -if __name__ == '__main__': - main() diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index 1d8f27440d0f1438e0520684ee3e90e8a5891a17..30b070e4acac60caa97a4e8ffd07462cb347ee93 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -94,6 +94,10 @@ def parse_args(): '--memory_optimize', action='store_true', help='If set, optimize runtime memory before start.') + parser.add_argument( + '--use_fake_data', + action='store_true', + help='If set ommit the actual read data operators.') parser.add_argument( '--update_method', type=str, @@ -198,6 +202,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, exe.run(train_prog) return + if args.use_fake_data: + raise Exception( + "fake data is not supported in single GPU test for now.") + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_prog) @@ -244,7 +252,31 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, args, train_prog, startup_prog, nccl_id_var, num_trainers, trainer_id): + feed_var_list = [ + var for var in train_prog.global_block().vars.itervalues() + if var.is_data + ] + # generate fake: + if args.use_fake_data: + for var in feed_var_list: + v = startup_prog.global_block().clone_variable(var) + var.persistable = True + v.persistable = True + + real_shape = list(var.shape) + real_shape[0] = args.batch_size / args.gpus + startup_prog.global_block().append_op( + outputs={"Out": v}, + type="fill_constant", + attrs={"shape": real_shape, + "value": 1.0, + "dtype": var.dtype}) + place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) + if nccl_id_var and trainer_id == 0: + #FIXME(wuyi): wait other trainer to start listening + time.sleep(30) + startup_exe = fluid.Executor(place) startup_exe.run(startup_prog) strategy = fluid.ExecutionStrategy() @@ -256,10 +288,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, exec_strategy=strategy, num_trainers=num_trainers, trainer_id=trainer_id) - feed_var_list = [ - var for var in train_prog.global_block().vars.itervalues() - if var.is_data - ] + feeder = fluid.DataFeeder(feed_var_list, place) for pass_id in range(args.pass_num): num_samples = 0 @@ -271,7 +300,10 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, num_samples = 0 if iters == args.iterations: break - loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) + if args.use_fake_data: + loss, = exe.run([avg_loss.name]) + else: + loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) if args.update_method == "pserver": exe.bcast_params() num_samples += len(data) diff --git a/benchmark/fluid/kube_gen_job.py b/benchmark/fluid/kube_gen_job.py index 3dbb4b8c5dd13657f8d1853003b321ad047e1349..39ba207fd96f71563504017e77dc0e87c249b3f8 100644 --- a/benchmark/fluid/kube_gen_job.py +++ b/benchmark/fluid/kube_gen_job.py @@ -112,6 +112,7 @@ def gen_job(): envs.append({"name": "PSERVERS", "value": str(args.pservers)}) envs.append({"name": "ENTRY", "value": args.entry}) envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)}) + envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) # NOTE: these directories below are cluster specific, please modify # this settings before you run on your own cluster. envs.append({ diff --git a/benchmark/fluid/kube_templates/__init__.py b/benchmark/fluid/kube_templates/__init__.py index b64a7f78ff10d03987ea4a8c13a0e34bb433f64c..2d09d940a5ee638e4b55405d05924e2d76006cfc 100644 --- a/benchmark/fluid/kube_templates/__init__.py +++ b/benchmark/fluid/kube_templates/__init__.py @@ -54,5 +54,13 @@ envs = [ "fieldPath": "status.podIP" } } + }, + { + "name": "PADDLE_CURRENT_IP", + "valueFrom": { + "fieldRef": { + "fieldPath": "status.podIP" + } + } } ] diff --git a/cmake/configure.cmake b/cmake/configure.cmake index e490397cc0624c310949a4b571bd00cac6e8953b..682614742cf1bd3130c638020a2545e16226d4d6 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS) add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) endif(USE_EIGEN_FOR_BLAS) +if(EIGEN_USE_THREADS) + add_definitions(-DEIGEN_USE_THREADS) +endif(EIGEN_USE_THREADS) + if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake deleted file mode 100644 index 4823dc3e91390002aefac70f7931b4197db05789..0000000000000000000000000000000000000000 --- a/cmake/cpplint.cmake +++ /dev/null @@ -1,62 +0,0 @@ -# util to check C++ file style -# * it basically use google cpplint.py. -# * It provide "add_style_check_target" for cmake. -# Usage see add_style_check_target's document -# -# TODO(yuyang18): Add python style check. - -set(STYLE_FILTER) - -# diable unwanted filters - -# paddle do not indent public/potected/private in class -set(STYLE_FILTER "${STYLE_FILTER}-whitespace/indent,") -# paddle use mutable reference. BUT IT IS NOT RECOMMANDED -set(STYLE_FILTER "${STYLE_FILTER}-runtime/references,") -# paddle use relative path for include. -set(STYLE_FILTER "${STYLE_FILTER}-build/include,") -# paddle use , , etc. -set(STYLE_FILTER "${STYLE_FILTER}-build/c++11,") -# paddle use c style casting. BUT IT IS NOT RECOMMANDED -set(STYLE_FILTER "${STYLE_FILTER}-readability/casting") - - -# IGNORE SOME FILES -set(IGNORE_PATTERN - .*ImportanceSampler.* - .*cblas\\.h.* - .*\\.pb\\.txt - .*MultiDataProvider.* - .*pb.* - .*pybind.h) - -# add_style_check_target -# -# attach check code style step for target. -# -# first argument: target name to attach -# rest arguments: source list to check code style. -# -# NOTE: If WITH_STYLE_CHECK is OFF, then this macro just do nothing. -macro(add_style_check_target TARGET_NAME) - if(WITH_STYLE_CHECK) - set(SOURCES_LIST ${ARGN}) - list(REMOVE_DUPLICATES SOURCES_LIST) - foreach(filename ${SOURCES_LIST}) - foreach(pattern ${IGNORE_PATTERN}) - if(filename MATCHES ${pattern}) - list(REMOVE_ITEM SOURCES_LIST ${filename}) - endif() - endforeach() - endforeach() - - if(SOURCES_LIST) - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py" - "--filter=${STYLE_FILTER}" - ${SOURCES_LIST} - COMMENT "cpplint: Checking source code style" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - endif() -endmacro() diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index e90948782bb5e333bbdb47ef9d61c1e37e3cf9e4..9459f1ddfe85f5607880d3fdd968b494d6af592a 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -23,17 +23,20 @@ SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc) SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) + +include(ProcessorCount) +ProcessorCount(NUM_OF_PROCESSOR) + IF(APPLE) - SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh) + SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j ${NUM_OF_PROCESSOR} static grpc_cpp_plugin | sed "s/-Werror//g" | sh) ELSE() - SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin) + SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j ${NUM_OF_PROCESSOR} static grpc_cpp_plugin) ENDIF() ExternalProject_Add( extern_grpc DEPENDS protobuf zlib - GIT_REPOSITORY "https://github.com/grpc/grpc.git" - GIT_TAG "v1.10.x" + URL "http://paddlepaddledeps.bj.bcebos.com/grpc.tar.xz" PREFIX ${GRPC_SOURCES_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 0fde4373a4be58e71ff1a305bd4991cc554d7a34..2665996432b1f6681927320a85d6835094abe4cd 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -212,6 +212,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake ${OPTIONAL_ARGS} -Dprotobuf_BUILD_TESTS=OFF + -DCMAKE_SKIP_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 65d61b7a38dde870a9217c8a68e81f7e593f88ec..9ddd05b3d9404df29ca1bf634105314b7e6a5b70 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -206,8 +206,6 @@ function(cc_library TARGET_NAME) list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) endif() endforeach() - add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS}) - else(cc_library_SRCS) if(cc_library_DEPS) merge_static_libs(${TARGET_NAME} ${cc_library_DEPS}) @@ -271,7 +269,6 @@ function(nv_library TARGET_NAME) list(APPEND nv_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) endif() endforeach() - add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS}) else(nv_library_SRCS) if (nv_library_DEPS) merge_static_libs(${TARGET_NAME} ${nv_library_DEPS}) @@ -344,7 +341,6 @@ function(hip_library TARGET_NAME) list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) endif() endforeach() - add_style_check_target(${TARGET_NAME} ${hip_library_SRCS} ${hip_library_HEADERS}) else(hip_library_SRCS) if (hip_library_DEPS) merge_static_libs(${TARGET_NAME} ${hip_library_DEPS}) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index b730ab43c49af005c00218c7430ab3c4d1a89510..3b13b2150514bd615667241272d287c7e55d4e74 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -172,6 +172,7 @@ add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) # paddle fluid version execute_process( COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE PADDLE_GIT_COMMIT) set(version_file ${FLUID_INSTALL_DIR}/version.txt) file(WRITE ${version_file} diff --git a/doc/fluid/CMakeLists.txt b/doc/fluid/CMakeLists.txt index 8086507bb4b7e870ad6d6091945ed07a00b5100b..be92af3902769a65c77953c9f3cb1f3aa3738d79 100644 --- a/doc/fluid/CMakeLists.txt +++ b/doc/fluid/CMakeLists.txt @@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") # HTML output director set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") +set(IMPORT_PADDLE_STRING "") +set(IMPORT_PADDLEV2_STRING "") + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in" "${BINARY_BUILD_DIR_EN}/conf.py" @@ -27,8 +30,6 @@ sphinx_add_target(paddle_fluid_docs ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_EN}) -add_dependencies(paddle_fluid_docs gen_proto_py paddle_python) - # configured documentation tools and intermediate build results set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") @@ -50,6 +51,4 @@ sphinx_add_target(paddle_fluid_docs_cn ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_CN}) -add_dependencies(paddle_fluid_docs_cn gen_proto_py paddle_python) - add_subdirectory(api) diff --git a/doc/fluid/api/CMakeLists.txt b/doc/fluid/api/CMakeLists.txt index 48b396f0786adad1ba6cd41f72497f853e54bc38..435d6e10fb02e9b2a8147f37da33e8848cc9b98a 100644 --- a/doc/fluid/api/CMakeLists.txt +++ b/doc/fluid/api/CMakeLists.txt @@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") # HTML output director set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") +set(IMPORT_PADDLE_STRING "import paddle") +set(IMPORT_PADDLEV2_STRING "import paddle.v2") + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in" "${BINARY_BUILD_DIR_EN}/conf.py" diff --git a/doc/fluid/api/clip.rst b/doc/fluid/api/clip.rst new file mode 100644 index 0000000000000000000000000000000000000000..3ba096388fc87dda3096a9030fe5749e61112c06 --- /dev/null +++ b/doc/fluid/api/clip.rst @@ -0,0 +1,47 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +==== +clip +==== + +ErrorClipByValue +---------------- + +.. autoclass:: paddle.fluid.clip.ErrorClipByValue + :members: + :noindex: + +GradientClipByValue +------------------- + +.. autoclass:: paddle.fluid.clip.GradientClipByValue + :members: + :noindex: + +GradientClipByNorm +------------------ + +.. autoclass:: paddle.fluid.clip.GradientClipByNorm + :members: + :noindex: + +GradientClipByGlobalNorm +------------------------ + +.. autoclass:: paddle.fluid.clip.GradientClipByGlobalNorm + :members: + :noindex: + +append_gradient_clip_ops +------------------------ + +.. autofunction:: paddle.fluid.clip.append_gradient_clip_ops + :noindex: + +error_clip_callback +------------------- + +.. autofunction:: paddle.fluid.clip.error_clip_callback + :noindex: + diff --git a/doc/fluid/api/evaluator.rst b/doc/fluid/api/evaluator.rst index f80b87c7d2704a144c02028c4925530a67d11289..c0dc9a0d1d9f2f70948dc3c905dca25d7dd43742 100644 --- a/doc/fluid/api/evaluator.rst +++ b/doc/fluid/api/evaluator.rst @@ -5,24 +5,3 @@ evaluator ========= -ChunkEvaluator --------------- - -.. autoclass:: paddle.fluid.evaluator.ChunkEvaluator - :members: - :noindex: - -EditDistance --------------- - -.. autoclass:: paddle.fluid.evaluator.EditDistance - :members: - :noindex: - -DetectionMAP --------------- - -.. autoclass:: paddle.fluid.evaluator.DetectionMAP - :members: - :noindex: - diff --git a/doc/fluid/api/executor.rst b/doc/fluid/api/executor.rst index a9cdf264e49691afc4b9425b7bfe54f8157ae6c2..f67a14c49f372e67d18ec8e6f87da01109376d22 100644 --- a/doc/fluid/api/executor.rst +++ b/doc/fluid/api/executor.rst @@ -30,3 +30,9 @@ switch_scope .. autofunction:: paddle.fluid.executor.switch_scope :noindex: +fetch_var +--------- + +.. autofunction:: paddle.fluid.executor.fetch_var + :noindex: + diff --git a/doc/fluid/api/gen_doc.sh b/doc/fluid/api/gen_doc.sh index ba7b7ba8e51399deb852b0a7c8ddd3128f521e85..0f0539355559446fd91f659d61b636db214b5a40 100755 --- a/doc/fluid/api/gen_doc.sh +++ b/doc/fluid/api/gen_doc.sh @@ -1,7 +1,7 @@ #!/bin/bash python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst -for module in io data_feeder evaluator executor initializer io nets optimizer param_attr profiler regularizer +for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer do python gen_doc.py ${module} > ${module}.rst done diff --git a/doc/fluid/api/index_en.rst b/doc/fluid/api/index_en.rst index 06c686d9508635abd41571983e00be174e94743e..29cea9c68221b921939e8e09072d87f9f604e21b 100644 --- a/doc/fluid/api/index_en.rst +++ b/doc/fluid/api/index_en.rst @@ -9,8 +9,9 @@ Fluid data_feeder.rst executor.rst initializer.rst - evaluator.rst + metrics.rst nets.rst + clip.rst optimizer.rst param_attr.rst profiler.rst diff --git a/doc/fluid/api/initializer.rst b/doc/fluid/api/initializer.rst index 2f02c5de097945a45a3e053427104bd17bea1279..c49a98c744cdf907630ea8c74791ff2021d996e8 100644 --- a/doc/fluid/api/initializer.rst +++ b/doc/fluid/api/initializer.rst @@ -33,11 +33,16 @@ Xavier :members: :noindex: -MSRA ------- +force_init_on_cpu +----------------- -.. autoclass:: paddle.fluid.initializer.MSRA - :members: +.. autofunction:: paddle.fluid.initializer.force_init_on_cpu + :noindex: + +init_on_cpu +----------- + +.. autofunction:: paddle.fluid.initializer.init_on_cpu :noindex: ConstantInitializer @@ -68,9 +73,3 @@ XavierInitializer :members: :noindex: - -MSRAInitializer ------------------ -.. autoclass:: paddle.fluid.initializer.MSRAInitializer - :members: - :noindex: diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst index 9ae7ffb2604250aebfd9ecd8966384c3ef05f97b..f53da4d194f8d2428b4121fa1bb31f3fc95a9f64 100644 --- a/doc/fluid/api/layers.rst +++ b/doc/fluid/api/layers.rst @@ -55,6 +55,13 @@ While :members: :noindex: +Switch +------ + +.. autoclass:: paddle.fluid.layers.Switch + :members: + :noindex: + lod_rank_table -------------- @@ -67,12 +74,6 @@ max_sequence_len .. autofunction:: paddle.fluid.layers.max_sequence_len :noindex: -topk ----- - -.. autofunction:: paddle.fluid.layers.topk - :noindex: - lod_tensor_to_array ------------------- @@ -109,6 +110,12 @@ less_than .. autofunction:: paddle.fluid.layers.less_than :noindex: +equal +----- + +.. autofunction:: paddle.fluid.layers.equal + :noindex: + array_read ---------- @@ -212,6 +219,42 @@ Send .. autofunction:: paddle.fluid.layers.Send :noindex: +open_recordio_file +------------------ + +.. autofunction:: paddle.fluid.layers.open_recordio_file + :noindex: + +open_files +---------- + +.. autofunction:: paddle.fluid.layers.open_files + :noindex: + +read_file +--------- + +.. autofunction:: paddle.fluid.layers.read_file + :noindex: + +shuffle +------- + +.. autofunction:: paddle.fluid.layers.shuffle + :noindex: + +batch +----- + +.. autofunction:: paddle.fluid.layers.batch + :noindex: + +double_buffer +------------- + +.. autofunction:: paddle.fluid.layers.double_buffer + :noindex: + nn == @@ -281,12 +324,6 @@ square_error_cost .. autofunction:: paddle.fluid.layers.square_error_cost :noindex: -accuracy --------- - -.. autofunction:: paddle.fluid.layers.accuracy - :noindex: - chunk_eval ---------- @@ -311,6 +348,18 @@ sequence_pool .. autofunction:: paddle.fluid.layers.sequence_pool :noindex: +sequence_softmax +---------------- + +.. autofunction:: paddle.fluid.layers.sequence_softmax + :noindex: + +softmax +------- + +.. autofunction:: paddle.fluid.layers.softmax + :noindex: + pool2d ------ @@ -323,12 +372,6 @@ batch_norm .. autofunction:: paddle.fluid.layers.batch_norm :noindex: -layer_norm ----------- - -.. autofunction:: paddle.fluid.layers.layer_norm - :noindex: - beam_search_decode ------------------ @@ -377,6 +420,12 @@ reduce_min .. autofunction:: paddle.fluid.layers.reduce_min :noindex: +reduce_prod +----------- + +.. autofunction:: paddle.fluid.layers.reduce_prod + :noindex: + sequence_first_step ------------------- @@ -425,6 +474,12 @@ matmul .. autofunction:: paddle.fluid.layers.matmul :noindex: +topk +---- + +.. autofunction:: paddle.fluid.layers.topk + :noindex: + warpctc ------- @@ -473,6 +528,60 @@ multiplex .. autofunction:: paddle.fluid.layers.multiplex :noindex: +layer_norm +---------- + +.. autofunction:: paddle.fluid.layers.layer_norm + :noindex: + +softmax_with_cross_entropy +-------------------------- + +.. autofunction:: paddle.fluid.layers.softmax_with_cross_entropy + :noindex: + +smooth_l1 +--------- + +.. autofunction:: paddle.fluid.layers.smooth_l1 + :noindex: + +one_hot +------- + +.. autofunction:: paddle.fluid.layers.one_hot + :noindex: + +autoincreased_step_counter +-------------------------- + +.. autofunction:: paddle.fluid.layers.autoincreased_step_counter + :noindex: + +reshape +------- + +.. autofunction:: paddle.fluid.layers.reshape + :noindex: + +lod_reset +--------- + +.. autofunction:: paddle.fluid.layers.lod_reset + :noindex: + +lrn +--- + +.. autofunction:: paddle.fluid.layers.lrn + :noindex: + +pad +--- + +.. autofunction:: paddle.fluid.layers.pad + :noindex: + label_smooth ------------ @@ -480,7 +589,7 @@ label_smooth :noindex: roi_pool ---------- +-------- .. autofunction:: paddle.fluid.layers.roi_pool :noindex: @@ -501,18 +610,6 @@ mul .. autofunction:: paddle.fluid.layers.mul :noindex: -reshape -------- - -.. autofunction:: paddle.fluid.layers.reshape - :noindex: - -pad ---- - -.. autofunction:: paddle.fluid.layers.pad - :noindex: - scale ----- @@ -579,10 +676,70 @@ clip_by_norm .. autofunction:: paddle.fluid.layers.clip_by_norm :noindex: -sequence_softmax ----------------- +logical_and +----------- -.. autofunction:: paddle.fluid.layers.sequence_softmax +.. autofunction:: paddle.fluid.layers.logical_and + :noindex: + +logical_or +---------- + +.. autofunction:: paddle.fluid.layers.logical_or + :noindex: + +logical_xor +----------- + +.. autofunction:: paddle.fluid.layers.logical_xor + :noindex: + +logical_not +----------- + +.. autofunction:: paddle.fluid.layers.logical_not + :noindex: + +uniform_random +-------------- + +.. autofunction:: paddle.fluid.layers.uniform_random + :noindex: + +uniform_random_batch_size_like +------------------------------ + +.. autofunction:: paddle.fluid.layers.uniform_random_batch_size_like + :noindex: + +gaussian_random +--------------- + +.. autofunction:: paddle.fluid.layers.gaussian_random + :noindex: + +gaussian_random_batch_size_like +------------------------------- + +.. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like + :noindex: + +cumsum +------ + +.. autofunction:: paddle.fluid.layers.cumsum + :noindex: + +scatter +------- + +.. autofunction:: paddle.fluid.layers.scatter + :noindex: + +sum +--- + +.. autofunction:: paddle.fluid.layers.sum :noindex: sigmoid @@ -651,6 +808,18 @@ floor .. autofunction:: paddle.fluid.layers.floor :noindex: +cos +--- + +.. autofunction:: paddle.fluid.layers.cos + :noindex: + +sin +--- + +.. autofunction:: paddle.fluid.layers.sin + :noindex: + round ----- @@ -834,4 +1003,9 @@ dice_loss .. autofunction:: paddle.fluid.layers.dice_loss :noindex: +upsampling_bilinear2d +____ + +.. autofunction:: paddle.fluid.layers.upsampling_bilinear2d + :noindex: diff --git a/doc/fluid/api/metrics.rst b/doc/fluid/api/metrics.rst new file mode 100644 index 0000000000000000000000000000000000000000..ddf07775d7ea293acd421b8549d03b277ff0611d --- /dev/null +++ b/doc/fluid/api/metrics.rst @@ -0,0 +1,56 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +======= +metrics +======= + +MetricBase +---------- + +.. autoclass:: paddle.fluid.metrics.MetricBase + :members: + :noindex: + +CompositeMetric +--------------- + +.. autoclass:: paddle.fluid.metrics.CompositeMetric + :members: + :noindex: + +Accuracy +-------- + +.. autoclass:: paddle.fluid.metrics.Accuracy + :members: + :noindex: + +ChunkEvaluator +-------------- + +.. autoclass:: paddle.fluid.metrics.ChunkEvaluator + :members: + :noindex: + +EditDistance +------------ + +.. autoclass:: paddle.fluid.metrics.EditDistance + :members: + :noindex: + +DetectionMAP +------------ + +.. autoclass:: paddle.fluid.metrics.DetectionMAP + :members: + :noindex: + +Auc +--- + +.. autoclass:: paddle.fluid.metrics.Auc + :members: + :noindex: + diff --git a/doc/fluid/api/optimizer.rst b/doc/fluid/api/optimizer.rst index b90d481d9d91519d302ada7b3d22671382d71105..df2bd2eace52e78805433bea320f5de95d45bfc7 100644 --- a/doc/fluid/api/optimizer.rst +++ b/doc/fluid/api/optimizer.rst @@ -111,6 +111,7 @@ DecayedAdagradOptimizer :members: :noindex: + AdadeltaOptimizer ----------------- @@ -118,9 +119,17 @@ AdadeltaOptimizer :members: :noindex: + RMSPropOptimizer ----------------- .. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer :members: :noindex: + +Optimizer +--------- + +.. autoclass:: paddle.fluid.optimizer.Optimizer + :members: + :noindex: diff --git a/doc/fluid/api/regularizer.rst b/doc/fluid/api/regularizer.rst index 837c67111c6e98e6a3859be802addc20a1c64f2b..756bc53baa0625aef48dad0c35e7ae57421a70d0 100644 --- a/doc/fluid/api/regularizer.rst +++ b/doc/fluid/api/regularizer.rst @@ -11,6 +11,13 @@ append_regularization_ops .. autofunction:: paddle.fluid.regularizer.append_regularization_ops :noindex: +WeightDecayRegularizer +---------------------- + +.. autoclass:: paddle.fluid.regularizer.WeightDecayRegularizer + :members: + :noindex: + L1Decay ------- @@ -26,15 +33,16 @@ L2Decay :noindex: L1DecayRegularizer ---------------------- +------------------ .. autoclass:: paddle.fluid.regularizer.L1DecayRegularizer :members: :noindex: L2DecayRegularizer ---------------------- +------------------ .. autoclass:: paddle.fluid.regularizer.L2DecayRegularizer :members: :noindex: + diff --git a/doc/mobile/CMakeLists.txt b/doc/mobile/CMakeLists.txt index b104a6318d474d6531670b8ac3569448774850c7..7b34ba8d0768427802b11614c6962f3c3f6ef4e3 100644 --- a/doc/mobile/CMakeLists.txt +++ b/doc/mobile/CMakeLists.txt @@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") # HTML output director set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") +set(IMPORT_PADDLE_STRING "") +set(IMPORT_PADDLEV2_STRING "") + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in" "${BINARY_BUILD_DIR_EN}/conf.py" @@ -27,8 +30,6 @@ sphinx_add_target(paddle_mobile_docs ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_EN}) -add_dependencies(paddle_mobile_docs gen_proto_py paddle_python) - # configured documentation tools and intermediate build results set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") @@ -49,5 +50,3 @@ sphinx_add_target(paddle_mobile_docs_cn ${SPHINX_CACHE_DIR_CN} ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_CN}) - -add_dependencies(paddle_mobile_docs_cn gen_proto_py paddle_python) diff --git a/doc/mobile/index_cn.rst b/doc/mobile/index_cn.rst index 8297316e8fbb2b8f41954030293feadbcd81295e..56d1515005f6e40b084c6b2184c6a0b3e3a00496 100644 --- a/doc/mobile/index_cn.rst +++ b/doc/mobile/index_cn.rst @@ -1,9 +1,9 @@ 移动端 -===== +====== .. toctree:: :maxdepth: 1 cross_compiling_for_android_cn.md cross_compiling_for_ios_cn.md - cross_compiling_for_raspberry_cn.md \ No newline at end of file + cross_compiling_for_raspberry_cn.md diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 76b82fd97f1ed642696c4414676b694ebda9ad81..890f70615538af23cd05b9ffd685e870a5644cdb 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -16,8 +16,8 @@ import os, subprocess sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python')) import shlex from recommonmark import parser, transform -import paddle -import paddle.v2 +@IMPORT_PADDLE_STRING@ +@IMPORT_PADDLEV2_STRING@ MarkdownParser = parser.CommonMarkParser AutoStructify = transform.AutoStructify diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index 5aa5c1381fa3fad4ebc181c7868da03ae0138016..5b09464cb991f96127edec40f7dbbc97a8d82582 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -16,8 +16,8 @@ import os, subprocess sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python')) import shlex from recommonmark import parser, transform -import paddle -import paddle.v2 +@IMPORT_PADDLE_STRING@ +@IMPORT_PADDLEV2_STRING@ MarkdownParser = parser.CommonMarkParser diff --git a/doc/v2/CMakeLists.txt b/doc/v2/CMakeLists.txt index be957d37b14c618e9346251b3bd3dbaf1541773f..d230a1b9217eea6740419822f350096e361a4435 100644 --- a/doc/v2/CMakeLists.txt +++ b/doc/v2/CMakeLists.txt @@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") # HTML output director set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") +set(IMPORT_PADDLE_STRING "") +set(IMPORT_PADDLEV2_STRING "") + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in" "${BINARY_BUILD_DIR_EN}/conf.py" @@ -27,8 +30,6 @@ sphinx_add_target(paddle_v2_docs ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_EN}) -add_dependencies(paddle_v2_docs gen_proto_py paddle_python) - # configured documentation tools and intermediate build results set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") @@ -50,6 +51,4 @@ sphinx_add_target(paddle_v2_docs_cn ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_CN}) -add_dependencies(paddle_v2_docs_cn gen_proto_py paddle_python) - add_subdirectory(api) diff --git a/doc/v2/api/CMakeLists.txt b/doc/v2/api/CMakeLists.txt index 2670a21a227546ffcee4f10f395feef3c58df9b4..0c74522cb089b17c8419e9058f76631b0fe0df93 100644 --- a/doc/v2/api/CMakeLists.txt +++ b/doc/v2/api/CMakeLists.txt @@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") # HTML output director set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") +set(IMPORT_PADDLE_STRING "import paddle") +set(IMPORT_PADDLEV2_STRING "import paddle.v2") + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in" "${BINARY_BUILD_DIR_EN}/conf.py" diff --git a/doc/v2/build_and_install/build_from_source_cn.rst b/doc/v2/build_and_install/build_from_source_cn.rst index 330e84346e28db30d16d4a95490ddcab431228a0..741c01ce5428c0046daa5a784da70d4bb492438c 100644 --- a/doc/v2/build_and_install/build_from_source_cn.rst +++ b/doc/v2/build_and_install/build_from_source_cn.rst @@ -19,8 +19,8 @@ ---------------- PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像 -可以在 `这里 `_ 找到,您也可以 -在 `这里 `_ 找到 paddle_manylinux_devel +可以在 `这里 `__ 找到,您也可以 +在 `这里 `__ 找到 paddle_manylinux_devel 镜像的编译以及使用方法。或者参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。 如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。 @@ -35,13 +35,11 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 # 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像 docker build -t paddle:dev . # 3. 执行下面的命令编译CPU-Only的二进制 - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build # 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步) - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build -注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。如果使用自行 -构建的镜像(上述第4步)会执行 :code:`Dockerfile` 描述的默认入口程序 :code:`build.sh` 可以省略步骤3中 -最后的执行脚本的命令。 +注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。 编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装: @@ -72,15 +70,15 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test 如果期望执行其中一个单元测试,(比如 :code:`test_sum_op` ): .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash - bash /paddle/paddle/scripts/docker/build.sh - cd /paddle/build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash + ./paddle/scripts/paddle_build.sh build + cd build ctest -R test_sum_op -V .. _faq_docker: @@ -116,11 +114,10 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行 - ```emacs - (global-set-key "\C-cc" 'compile) - (setq compile-command - "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") - ``` + .. code-block:: emacs + + (global-set-key "\C-cc" 'compile) + (setq compile-command "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") 就可以按 `Ctrl-C` 和 `c` 键来启动编译了。 diff --git a/doc/v2/build_and_install/build_from_source_en.rst b/doc/v2/build_and_install/build_from_source_en.rst index 0a6c33985ed65e24e507744c49cf929c9481195c..b06c43e19dcfc52ad0f074a85517a16744895a3a 100644 --- a/doc/v2/build_and_install/build_from_source_en.rst +++ b/doc/v2/build_and_install/build_from_source_en.rst @@ -23,7 +23,7 @@ You need to use Docker to build PaddlePaddle to avoid installing dependencies by yourself. We have several pre-built Docker images `here `_ , you can also find how to build and use paddle_manylinux_devel Docker image from -`here `_ +`here `__ Or you can build your own image from source as the optional step below: .. code-block:: bash @@ -34,14 +34,12 @@ Or you can build your own image from source as the optional step below: # 2. Optional: build development docker image from source docker build -t paddle:dev . # 3. Run the following command to build a CPU-Only binaries - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build # 4. Or, use your built Docker image to build PaddlePaddle (must run step 2) - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build NOTE: The above command try to mount the current working directory (root directory of source code) -into :code:`/paddle` directory inside docker container. If you are using your own image -(Step 4) it will run default entry-point :code:`build.sh` , so you could omit the last -command in step 3. +into :code:`/paddle` directory inside docker container. When the compile finishes, you can get the output whl package under build/python/dist, then you can choose to install the whl on local @@ -74,21 +72,21 @@ Set :code:`WITH_GPU=ON` Can also run tests on GPU. .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/paddle/scripts/docker/build.sh + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test If you wish to run only one unit test, like :code:`test_sum_op`: .. code-block:: bash - docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash - bash /paddle/paddle/scripts/docker/build.sh - cd /paddle/build + docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash + ./paddle/scripts/paddle_build.sh build + cd build ctest -R test_sum_op -V .. _faq_docker: Frequently Asked Questions ----------------- +--------------------------- - What is Docker? @@ -118,11 +116,10 @@ Frequently Asked Questions Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file: - ```emacs - (global-set-key "\C-cc" 'compile) - (setq compile-command - "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") - ``` + .. code-block:: emacs + + (global-set-key "\C-cc" 'compile) + (setq compile-command "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") so they could type `Ctrl-C` and `c` to build PaddlePaddle from source. @@ -145,7 +142,7 @@ Frequently Asked Questions .. _compile_deps: Appendix: Compile Dependencies ----------------- +------------------------------- PaddlePaddle need the following dependencies when compiling, other dependencies will be downloaded automatically. @@ -166,11 +163,11 @@ will be downloaded automatically. .. _build_options: Appendix: Build Options ----------------- +------------------------- Build options include whether build binaries for CPU or GPU, which BLAS library to use etc. You may pass these settings when running cmake. -For detailed cmake tutorial please refer to `here `_ 。 +For detailed cmake tutorial please refer to `here `__ 。 You can add :code:`-D` argument to pass such options, like: @@ -219,7 +216,7 @@ keep on with latest cuDNN versions. Be sure to run with the same version of cuDN you built. Pass Compile Options -++++++++++++++ +++++++++++++++++++++++ You can pass compile options to use intended BLAS/CUDA/Cudnn libraries. When running cmake command, it will search system paths like diff --git a/doc/v2/build_and_install/docker_install_cn.rst b/doc/v2/build_and_install/docker_install_cn.rst index 79d214635a069a739060e0b79424729f6ff90387..106c86bace075764c84bc2a7f7cb09d466fa8794 100644 --- a/doc/v2/build_and_install/docker_install_cn.rst +++ b/doc/v2/build_and_install/docker_install_cn.rst @@ -73,6 +73,7 @@ 当然,您也可以进入到Docker容器中,以交互式的方式执行或调试您的代码: .. code-block:: bash + docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash cd /work python train.py @@ -97,7 +98,7 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note 国内用户可以使用下面的镜像源来加速访问: - .. code-block: bash + .. code-block:: bash docker run -p 8888:8888 docker.paddlepaddlehub.com/book diff --git a/doc/v2/build_and_install/docker_install_en.rst b/doc/v2/build_and_install/docker_install_en.rst index e0e0559fb858a093db96a9b4ec1c5a45d6c71a38..25aecb8d0da9feb00006da6259b529b7011d91cb 100644 --- a/doc/v2/build_and_install/docker_install_en.rst +++ b/doc/v2/build_and_install/docker_install_en.rst @@ -80,6 +80,7 @@ Also, you can go into the container shell, run or debug your code interactively: .. code-block:: bash + docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash cd /work python train.py @@ -104,7 +105,7 @@ We provide a packaged book image, simply issue the command: For users in China, we provide a faster mirror: - .. code-block: bash + .. code-block:: bash docker run -p 8888:8888 docker.paddlepaddlehub.com/book diff --git a/doc/v2/build_and_install/index_cn.rst b/doc/v2/build_and_install/index_cn.rst index e079bb661f3a5141a09dfbc6893d1bf945697bc9..1a9305ac4b6578c14a962f223c647a71e3b8a72b 100644 --- a/doc/v2/build_and_install/index_cn.rst +++ b/doc/v2/build_and_install/index_cn.rst @@ -6,7 +6,7 @@ PaddlePaddle针对不同的用户群体提供了多种安装方式。 专注深度学习模型开发 ------------------ +-------------------- PaddlePaddle提供了多种python wheel包,可通过pip一键安装: @@ -18,7 +18,7 @@ PaddlePaddle提供了多种python wheel包,可通过pip一键安装: 这是最便捷的安装方式,请根据机器配置和系统选择对应的安装包。 关注底层框架 ----------- +------------- PaddlePaddle提供了基于Docker的安装方式,请参照以下教程: @@ -45,7 +45,7 @@ PaddlePaddle提供了基于Docker的安装方式,请参照以下教程: 常见问题汇总 ------------ +-------------- 如果在安装过程中遇到了问题,请先尝试在下面的页面寻找答案: diff --git a/doc/v2/build_and_install/index_en.rst b/doc/v2/build_and_install/index_en.rst index 5b3de0f8c3e5496060646b5ddb080d0d338a8bfa..7990bacbd6966e88e8763e9c5709e410f7e9fed4 100644 --- a/doc/v2/build_and_install/index_en.rst +++ b/doc/v2/build_and_install/index_en.rst @@ -1,12 +1,12 @@ install and Compile -========== +====================== .. _install_steps: PaddlePaddle provides various methods of installation for many different users Focus on Deep Learning Model Development ------------------ +---------------------------------------- PaddlePaddle provides lots of packages of python wheel , that pip can install: @@ -18,7 +18,7 @@ PaddlePaddle provides lots of packages of python wheel , that pip can install: This is the most convenient way of installation. Please choose the right installation package with machine configure and system. Follow the Bottom Frame ----------- +------------------------ PaddlePaddle also supports installation using Docker. Please refer to the tutorial below: diff --git a/doc/v2/build_and_install/pip_install_cn.rst b/doc/v2/build_and_install/pip_install_cn.rst index 9b84bb6425af1eeb94a4f2f5d6c2b1e28c62e3c8..853bdb21bbcf07ae1742d2196dbcfe4668828b7b 100644 --- a/doc/v2/build_and_install/pip_install_cn.rst +++ b/doc/v2/build_and_install/pip_install_cn.rst @@ -55,11 +55,11 @@ paddlepaddle-gpu==0.11.0 使用CUDA 7.5和cuDNN 5编译的0.11.0版 :header: "版本说明", "cp27-cp27mu", "cp27-cp27m" :widths: 1, 3, 3 - "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" - "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" - "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" + "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__" + "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__" + "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__" .. _pip_dependency: diff --git a/doc/v2/build_and_install/pip_install_en.rst b/doc/v2/build_and_install/pip_install_en.rst index fcac76d6a24eb4905a20f797d614db8f743342d7..fecf6d3712feac3265100a6121901ba784f7d5cc 100644 --- a/doc/v2/build_and_install/pip_install_en.rst +++ b/doc/v2/build_and_install/pip_install_en.rst @@ -58,11 +58,11 @@ If the links below shows up the login form, just click "Log in as guest" to star :header: "version", "cp27-cp27mu", "cp27-cp27m" :widths: 1, 3, 3 - "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" - "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" - "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" + "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__" + "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__" + "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `__" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `__" .. _pip_dependency: diff --git a/doc/v2/howto/capi/workflow_of_capi_cn.md b/doc/v2/howto/capi/workflow_of_capi_cn.md index 1968c1099ac5734cd68b437f2f7aa428d7b5265e..3acdbae28e9b35f8a9104a89c9a5799f8c892334 100644 --- a/doc/v2/howto/capi/workflow_of_capi_cn.md +++ b/doc/v2/howto/capi/workflow_of_capi_cn.md @@ -59,7 +59,7 @@ 代码示例如下: ```python - from paddle.utils.merge_model import merge_v2_modelss + from paddle.utils.merge_model import merge_v2_model from mnist_v2 import network net = network(is_infer=True) diff --git a/go/pserver/client/c/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt index 411dc50332672143d7a1f7bd0556ae86dc37f6f3..4500b1f288372ed0e2d9d383234df97ae976c60b 100644 --- a/go/pserver/client/c/test/CMakeLists.txt +++ b/go/pserver/client/c/test/CMakeLists.txt @@ -13,4 +13,3 @@ # limitations under the License. # cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) -add_style_check_target(test_cclient test_cclient.c) diff --git a/paddle/.gitignore b/paddle/.gitignore index 1c1c0c2c829f088d7e3f52ca007fcb8f33a16a36..01904aa6ef2057afee95ddd6e30cde064b06c52e 100644 --- a/paddle/.gitignore +++ b/paddle/.gitignore @@ -11,7 +11,6 @@ GTAGS *.pb.cc *.pb.h *_pb2.py -paddle_* output/ google/ Makefile diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index e06e9a2b363d1ffc6876b98bcb7304b0a54dbcaa..957b1a3e6b07b058a76605992da387b43657146a 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -33,9 +33,6 @@ add_library(paddle_capi STATIC ${CAPI_HEADERS} ${CAPI_PRIVATE_HEADER} target_include_directories(paddle_capi PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) -add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER} - ${CAPI_PRIVATE_HEADER}) - add_dependencies(paddle_capi paddle_proto paddle_gserver) # TODO: paddle_capi_whole will be removed. diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b19256ef4533a09162edf907f6cd51146517e46 --- /dev/null +++ b/paddle/contrib/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_subdirectory(inference) diff --git a/paddle/contrib/float16/README.md b/paddle/contrib/float16/README.md index ded959c47cb81b9384abbb9815773e25969344ec..58b4a50666bfb622af8acbce29355f2a4a870a82 100644 --- a/paddle/contrib/float16/README.md +++ b/paddle/contrib/float16/README.md @@ -89,7 +89,7 @@ cd Paddle # to `FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04` and similarly for other configurations nvidia-docker build -t paddle:float16 . # After running this, different results will be written to different log files in Paddle/contrib/float16/ -nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/contrib/float16/run_float16_demo.sh +nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/paddle/contrib/float16/run_float16_demo.sh ``` #### Accuracy diff --git a/paddle/contrib/float16/run_float16_demo.sh b/paddle/contrib/float16/run_float16_demo.sh index d8a34ee67b8fab214fa6e96104304689211f84da..031225a85dabb26e5d9ea06f58909c049e7f0c08 100755 --- a/paddle/contrib/float16/run_float16_demo.sh +++ b/paddle/contrib/float16/run_float16_demo.sh @@ -3,7 +3,7 @@ BUILD_PATH=/paddle/fp16_build WHEEL_PATH=$BUILD_PATH/python/dist INFER_PATH=$BUILD_PATH/paddle/fluid/inference/tests/book -DEMO_PATH=/paddle/contrib/float16 +DEMO_PATH=/paddle/paddle/contrib/float16 # Use the single most powerful CUDA GPU on your machine export CUDA_VISIBLE_DEVICES=0 @@ -50,7 +50,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=imagenet \ --dirname=$DEMO_PATH/image_classification_imagenet_vgg.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_vgg.inference.model \ --repeat=$REPEAT \ @@ -68,7 +67,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_resnet \ - --data_set=imagenet \ --dirname=$DEMO_PATH/image_classification_imagenet_resnet.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_resnet.inference.model \ --repeat=$REPEAT \ @@ -86,7 +84,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=cifar10 \ --dirname=$DEMO_PATH/image_classification_cifar10_vgg.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_vgg.inference.model \ --repeat=$REPEAT \ @@ -104,7 +101,6 @@ do --repeat=1 \ $INFER_PATH/test_inference_image_classification_vgg \ - --data_set=cifar10 \ --dirname=$DEMO_PATH/image_classification_cifar10_resnet.inference.model \ --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_resnet.inference.model \ --repeat=$REPEAT \ diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4fe10f708e5bb8b28e34b2d91b2254c346c467f --- /dev/null +++ b/paddle/contrib/inference/CMakeLists.txt @@ -0,0 +1,57 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST) + set(options "") + set(oneValueArgs "") + set(multiValueArgs ARGS) + cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) + set(arg_list "") + if(inference_test_ARGS) + foreach(arg ${inference_test_ARGS}) + list(APPEND arg_list "_${arg}") + endforeach() + else() + list(APPEND arg_list "_") + endif() + foreach(arg ${arg_list}) + string(REGEX REPLACE "^_$" "" arg "${arg}") + cc_test(${TARGET_NAME} + SRCS ${TEST_SRC} + DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl + ARGS --dirname=${PYTHON_TESTS_DIR}/book/) + # set_tests_properties(${TARGET_NAME} + # PROPERTIES DEPENDS ${DEP_TEST}) + endforeach() +endfunction(inference_api_test) + + +cc_library(paddle_inference_api + SRCS paddle_inference_api.cc + DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) + +cc_library(paddle_inference_api_impl + SRCS paddle_inference_api_impl.cc + DEPS paddle_inference_api paddle_fluid_api) + +cc_test(test_paddle_inference_api + SRCS test_paddle_inference_api.cc + DEPS paddle_inference_api) + +inference_api_test(test_paddle_inference_api_impl + test_paddle_inference_api_impl.cc + test_word2vec) diff --git a/paddle/contrib/inference/paddle_inference_api.cc b/paddle/contrib/inference/paddle_inference_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..d67e1e7667800d6dd00cb8915b0d6dc7c664970b --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api.cc @@ -0,0 +1,15 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/contrib/inference/paddle_inference_api.h" diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index dbaa7c95b97e954537707566e5b7458e6afd14c8..9ac8ebdef8151f2a144b479fa258b8bc830fc2e9 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -12,49 +12,74 @@ See the License for the specific language governing permissions and limitations under the License. */ +/* + * This file contains the definition of a simple Inference API for Paddle. + * + * ATTENTION: It requires some C++ features, for lower version C++ or C, we + * might release another API. + */ + #pragma once +#include #include #include namespace paddle { -class Predictor { -public: - struct Attr; - Predictor() = default; +enum PaddleDType { + FLOAT32, + INT64, +}; - // Build the network before inference. - bool Init(const Attr& attr); +struct PaddleBuf { + void* data; // pointer to the data memory. + size_t length; // number of memory bytes. +}; + +struct PaddleTensor { + std::string name; // variable name. + std::vector shape; + PaddleBuf data; // blob of data. + PaddleDType dtype; +}; + +/* +* A simple Inference API for Paddle. Currently this API might just be used by +* non-sequence scenerios. +* TODO(Superjomn) Prepare another API for NLP-related usages. +*/ +class PaddlePredictor { +public: + struct Config; + PaddlePredictor() = default; + PaddlePredictor(const PaddlePredictor&) = delete; // Predict an record. - // Arguments: - // inputs: the name of the input variables. - // outputs: the name of the output varaibles. - // input_shapes: the shape of the input variables. - // output_shapes: the shape of the output variables. - // input_data: the data of the input variables. - // output_data: the data of the output variables. - bool Run(const std::vector& inputs, - const std::vector& outputs, - const std::vector>& input_shapes, - const std::vector>& output_shapes, - const std::vector>& input_data, - std::vector>* output_data); - - // Clone a predictor that share the model weights. - Predictor* Clone(); + // The caller should be responsible for allocating and releasing the memory of + // `inputs`. `inputs` should be alive until Run returns. caller should be + // responsible for releasing the memory of `output_data`. + virtual bool Run(const std::vector& inputs, + std::vector* output_data) = 0; + + // Clone a predictor that share the model weights, the Cloned predictor should + // be thread-safe. + virtual std::unique_ptr Clone() = 0; // Destroy the Predictor. - ~Predictor(); + virtual ~PaddlePredictor() {} - struct Attr { + friend std::unique_ptr CreatePaddlePredictor( + const PaddlePredictor::Config& config); + + // The common configs for all the predictors. + struct Config { enum class EngineKind; std::string model_dir; // path to the model directory. bool enable_engine{false}; // Enable to execute (part of) the model on - // third-party engines. - EngineKind engine_kind{Attr::EngineKind::kNone}; + // third-party engines. + EngineKind engine_kind{Config::EngineKind::kNone}; enum class EngineKind { kNone = -1, // Use the native Fluid facility. @@ -66,4 +91,8 @@ public: }; }; +// A factory to help create difference predictor. +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + } // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..ecca16d3f82bbeee6858883a0f9e577a479f9d06 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -0,0 +1,309 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api_impl.h" + +namespace paddle { +namespace { + +// Timer for timer +class Timer { +public: + double start; + double startu; + void tic() { + struct timeval tp; + gettimeofday(&tp, NULL); + start = tp.tv_sec; + startu = tp.tv_usec; + } + double toc() { + struct timeval tp; + gettimeofday(&tp, NULL); + double used_time_ms = + (tp.tv_sec - start) * 1000.0 + (tp.tv_usec - startu) / 1000.0; + return used_time_ms; + } +}; + +template +std::string num2str(T a) { + std::stringstream istr; + istr << a; + return istr.str(); +} +} // namespace + +bool PaddlePredictorImpl::Init() { + VLOG(3) << "Predictor::init()"; + + // TODO(panyx0718): Should CPU vs GPU device be decided by id? + if (config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(config_.device); + } else { + place_ = paddle::platform::CPUPlace(); + } + paddle::framework::InitDevices(false); + executor_.reset(new paddle::framework::Executor(place_)); + scope_.reset(new paddle::framework::Scope()); + + // Initialize the inference program + if (!config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.model_dir); + } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.prog_file, config_.param_file); + } else { + LOG(ERROR) << "fail to load inference model."; + return false; + } + ctx_ = executor_->Prepare(*inference_program_, 0); + + // Create variables + // TODO(panyx0718): Why need to test share_variables here? + if (config_.share_variables) { + executor_->CreateVariables(*inference_program_, scope_.get(), 0); + } + // Get the feed_target_names and fetch_target_names + feed_target_names_ = inference_program_->GetFeedTargetNames(); + fetch_target_names_ = inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::Run(const std::vector &inputs, + std::vector *output_data) { + VLOG(3) << "Predictor::predict"; + Timer timer; + timer.tic(); + // set feed variable + std::map feed_targets; + std::vector feeds; + if (!SetFeed(inputs, &feeds)) { + LOG(ERROR) << "fail to set feed"; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + feed_targets[feed_target_names_[i]] = &feeds[i]; + } + // get fetch variable + std::map fetch_targets; + std::vector fetchs; + fetchs.resize(fetch_target_names_.size()); + for (size_t i = 0; i < fetch_target_names_.size(); ++i) { + fetch_targets[fetch_target_names_[i]] = &fetchs[i]; + } + // Run the inference program + // if share variables, we need not create variables + executor_->RunPreparedContext(ctx_.get(), + scope_.get(), + &feed_targets, + &fetch_targets, + !config_.share_variables); + if (!GetFetch(fetchs, output_data)) { + LOG(ERROR) << "fail to get fetchs"; + return false; + } + VLOG(3) << "predict cost: " << timer.toc() << "ms"; + return true; +} + +std::unique_ptr PaddlePredictorImpl::Clone() { + VLOG(3) << "Predictor::clone"; + std::unique_ptr cls(new PaddlePredictorImpl(config_)); + if (!cls->InitShared(this)) { + LOG(ERROR) << "fail to call InitShared"; + return nullptr; + } + return cls; +} + +// TODO(panyx0718): Consider merge with Init()? +bool PaddlePredictorImpl::InitShared(PaddlePredictorImpl *cls) { + VLOG(3) << "Predictor::init_shared"; + // 1. Define place, executor, scope + if (this->config_.device >= 0) { + place_ = paddle::platform::CUDAPlace(); + } else { + place_ = paddle::platform::CPUPlace(); + } + this->executor_.reset(new paddle::framework::Executor(this->place_)); + this->scope_.reset(new paddle::framework::Scope()); + // Initialize the inference program + if (!this->config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + this->inference_program_ = paddle::inference::Load( + this->executor_.get(), this->scope_.get(), this->config_.model_dir); + } else if (!this->config_.prog_file.empty() && + !this->config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + this->inference_program_ = + paddle::inference::Load(this->executor_.get(), + this->scope_.get(), + this->config_.prog_file, + this->config_.param_file); + } + this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0); + // 3. create variables + // TODO(panyx0718): why test share_variables. + if (config_.share_variables) { + this->executor_->CreateVariables( + *this->inference_program_, this->scope_.get(), 0); + } + // 4. Get the feed_target_names and fetch_target_names + this->feed_target_names_ = this->inference_program_->GetFeedTargetNames(); + this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames(); + return true; +} + +bool PaddlePredictorImpl::SetFeed( + const std::vector &inputs, + std::vector *feeds) { + VLOG(3) << "Predictor::set_feed"; + if (inputs.size() != feed_target_names_.size()) { + LOG(ERROR) << "wrong feed input size."; + return false; + } + for (size_t i = 0; i < feed_target_names_.size(); ++i) { + paddle::framework::LoDTensor input; + paddle::framework::DDim ddim = + paddle::framework::make_ddim(inputs[i].shape); + void *input_ptr; + if (inputs[i].dtype == PaddleDType::INT64) { + input_ptr = + input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else if (inputs[i].dtype == PaddleDType::FLOAT32) { + input_ptr = input.mutable_data(ddim, paddle::platform::CPUPlace()); + } else { + LOG(ERROR) << "unsupported feed type " << inputs[i].dtype; + return false; + } + + // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. + std::memcpy(static_cast(input_ptr), + inputs[i].data.data, + inputs[i].data.length); + feeds->push_back(input); + LOG(ERROR) << "Actual feed type " << feeds->back().type().name(); + } + return true; +} + +bool PaddlePredictorImpl::GetFetch( + const std::vector &fetchs, + std::vector *outputs) { + VLOG(3) << "Predictor::get_fetch"; + outputs->resize(fetchs.size()); + for (size_t i = 0; i < fetchs.size(); ++i) { + // TODO(panyx0718): Support fetch of other types. + if (fetchs[i].type() != typeid(float)) { + LOG(ERROR) << "only support fetching float now."; + return false; + } + std::vector shape; + auto dims_i = fetchs[i].dims(); + auto lod = fetchs[i].lod(); + const float *output_ptr = fetchs[i].data(); + // const int64_t* output_ptr = fetchs[i].data(); + auto num = fetchs[i].numel(); + std::vector data; + if (0 == lod.size()) { + std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); + for (int j = 0; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } else { + // for batch detection + // image[0] -> output[0] shape {145, 6} + // image[1] -> output[1] shape {176, 6} + // then, + // the batch output shape {321, 6} + // the lod {{0, 145, 321}} + // so we should append output[0] to {176, 6} + size_t max_dim = 0; + for (size_t j = 1; j < lod[0].size(); j++) { + max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); + } + size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); + if (max_dim > 0) { + data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); + } + for (size_t j = 1; j < lod[0].size(); j++) { + size_t start = lod[0][j - 1] * common_dim; + size_t end = lod[0][j] * common_dim; + if (end > start) { + std::copy(output_ptr + start, + output_ptr + end, + data.begin() + (j - 1) * max_dim * common_dim); + } + } + shape.push_back(lod[0].size() - 1); + shape.push_back(max_dim); + for (int j = 1; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } + + outputs->at(i).shape = shape; + outputs->at(i).data.length = sizeof(float) * data.size(); + outputs->at(i).data.data = malloc(outputs->at(i).data.length); + std::memcpy( + outputs->at(i).data.data, data.data(), outputs->at(i).data.length); + outputs->at(i).dtype = PaddleDType::FLOAT32; + // TODO(panyx0718): support other types? fill tensor name? avoid a copy. + } + return true; +} + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config) { + VLOG(3) << "create PaddlePredictorImpl"; + // 1. GPU memeroy + std::vector flags; + if (config.fraction_of_gpu_memory >= 0.0f || + config.fraction_of_gpu_memory <= 0.95f) { + flags.push_back("dummpy"); + std::string flag = "--fraction_of_gpu_memory_to_use=" + + num2str(config.fraction_of_gpu_memory); + flags.push_back(flag); + VLOG(3) << "set flag: " << flag; + framework::InitGflags(flags); + } + + std::unique_ptr predictor( + new PaddlePredictorImpl(config)); + if (!predictor->Init()) { + return nullptr; + } + return predictor; +} + +} // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..831abce5da58f90b38e27b5638e953de5167647a --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_impl.h @@ -0,0 +1,76 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api.h" + +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/init.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/profiler.h" + +namespace paddle { + +struct VisConfig : public PaddlePredictor::Config { + int device; + float fraction_of_gpu_memory; + std::string prog_file; + std::string param_file; + bool share_variables; +}; + +/* + * Do not use this, just a demo indicating how to customize a Predictor. + */ +class PaddlePredictorImpl : public PaddlePredictor { +public: + explicit PaddlePredictorImpl(const VisConfig &config) : config_(config) {} + + bool Init(); + + bool Run(const std::vector &inputs, + std::vector *output_data) override; + + std::unique_ptr Clone() override; + + ~PaddlePredictorImpl() override{}; + +private: + bool InitShared(PaddlePredictorImpl *cls); + bool SetFeed(const std::vector &input_datas, + std::vector *feeds); + bool GetFetch(const std::vector &fetchs, + std::vector *output_data); + + VisConfig config_; + paddle::platform::Place place_; + std::unique_ptr executor_; + std::unique_ptr scope_; + std::unique_ptr ctx_; + std::unique_ptr inference_program_; + std::vector feed_target_names_; + std::vector fetch_target_names_; +}; + +std::unique_ptr CreatePaddlePredictorImpl( + const VisConfig &config); + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api.cc b/paddle/contrib/inference/test_paddle_inference_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..a19173087649e8493b8c72e758456cc5b8970e23 --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api.cc @@ -0,0 +1,64 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/contrib/inference/paddle_inference_api.h" + +#include +#include + +namespace paddle { + +/* + * Do not use this, just a demo indicating how to customize a config for a + * specific predictor. + */ +struct DemoConfig : public PaddlePredictor::Config { + float other_config; +}; + +/* + * Do not use this, just a demo indicating how to customize a Predictor. + */ +class DemoPredictor : public PaddlePredictor { +public: + explicit DemoPredictor(const DemoConfig &config) { + LOG(INFO) << "I get other_config " << config.other_config; + } + bool Run(const std::vector &inputs, + std::vector *output_data) override { + LOG(INFO) << "Run"; + return false; + } + + std::unique_ptr Clone() override { return nullptr; } + + ~DemoPredictor() override {} +}; + +template <> +std::unique_ptr CreatePaddlePredictor( + const DemoConfig &config) { + std::unique_ptr x(new DemoPredictor(config)); + return x; +} + +TEST(paddle_inference_api, demo) { + DemoConfig config; + config.other_config = 1.7; + auto predictor = CreatePaddlePredictor(config); + std::vector outputs; + predictor->Run({}, &outputs); +} + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..43b068fb42c5e5c58f2932c3e528fd0fa0502ec3 --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -0,0 +1,83 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "gflags/gflags.h" +#include "paddle/contrib/inference/paddle_inference_api_impl.h" +#include "paddle/fluid/inference/tests/test_helper.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); + +namespace paddle { + +PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { + PaddleTensor pt; + pt.data.data = t->data(); + + if (t->type() == typeid(int64_t)) { + pt.data.length = t->numel() * sizeof(int64_t); + pt.dtype = PaddleDType::INT64; + } else if (t->type() == typeid(float)) { + pt.data.length = t->numel() * sizeof(float); + pt.dtype = PaddleDType::FLOAT32; + } else { + LOG(FATAL) << "unsupported type."; + } + pt.shape = framework::vectorize2int(t->dims()); + return pt; +} + +TEST(paddle_inference_api_impl, word2vec) { + VisConfig config; + config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + LOG(INFO) << "dirname " << config.model_dir; + config.fraction_of_gpu_memory = 0.85; + config.device = 0; + config.share_variables = true; + + std::unique_ptr predictor = + CreatePaddlePredictorImpl(config); + + framework::LoDTensor first_word, second_word, third_word, fourth_word; + framework::LoD lod{{0, 1}}; + int64_t dict_size = 2073; // The size of dictionary + + SetupLoDTensor(&first_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&second_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&third_word, lod, static_cast(0), dict_size - 1); + SetupLoDTensor(&fourth_word, lod, static_cast(0), dict_size - 1); + + std::vector cpu_feeds; + cpu_feeds.push_back(LodTensorToPaddleTensor(&first_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&second_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&third_word)); + cpu_feeds.push_back(LodTensorToPaddleTensor(&fourth_word)); + + std::vector outputs; + ASSERT_TRUE(predictor->Run(cpu_feeds, &outputs)); + ASSERT_EQ(outputs.size(), 1); + for (size_t i = 0; i < outputs.size(); ++i) { + size_t len = outputs[i].data.length; + float* data = static_cast(outputs[i].data.data); + for (int j = 0; j < len / sizeof(float); ++j) { + ASSERT_LT(data[j], 1.0); + ASSERT_GT(data[j], -1.0); + } + free(outputs[i].data.data); + } +} + +} // namespace paddle diff --git a/paddle/cuda/CMakeLists.txt b/paddle/cuda/CMakeLists.txt index efd1b7a73e1655f95eb83a5e2f59e82cbf7eba16..9bbb8de78e09829d24faf42c360811084981578f 100755 --- a/paddle/cuda/CMakeLists.txt +++ b/paddle/cuda/CMakeLists.txt @@ -87,8 +87,3 @@ else() endif() add_dependencies(paddle_cuda paddle_proto ${external_project_dependencies}) - -add_style_check_target(paddle_cuda - ${CUDA_SOURCES} - ${CUDA_HEADERS} - ${CUDA_CXX_SOURCES}) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 9de44beafbb69b3510b97afcc43d4b489a029c35..b69de2ced03569d5e9ffe313527ab776ee798496 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -36,5 +36,5 @@ cc_test(broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_ha device_context broadcast_op_handle) cc_test(gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory device_context gather_op_handle) -cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory - device_context reduce_op_handle ) +#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory +# device_context reduce_op_handle ) diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 6b0c0a6b9fb29e641449f0c21109611cccd4e5a9..35d23d68c0dd26a05544a72316d5764129aa8d40 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -18,6 +18,7 @@ #include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" #include "paddle/fluid/framework/details/send_op_handle.h" +#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/scope.h" #ifdef PADDLE_WITH_CUDA @@ -159,25 +160,39 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( if (!is_forwarding && places_.size() > 1) { // Currently, we assume that once gradient is generated, it can be // broadcast, and each gradient is only broadcast once. - for (auto &og : op->OutputArgumentNames()) { - if (IsParameterGradientOnce(og, &og_has_been_broadcast)) { - switch (strategy_.reduce_) { - case BuildStrategy::ReduceStrategy::kReduce: - CreateReduceOp(&result, og, cur_device_id); - var_name_on_devices[cur_device_id].emplace(og); - bcast_var_name_set[cur_device_id].emplace( - og.substr(0, og.size() - strlen(kGradVarSuffix))); - cur_device_id = (cur_device_id + 1) % places_.size(); - break; - case BuildStrategy::ReduceStrategy::kAllReduce: - if (IsSparseGradient(var_types, og)) { - CreateReduceOp(&result, og, 0); - CreateBroadcastOp(&result, og, 0); - } else { - InsertNCCLAllReduceOp(&result, og); - } - break; + if (static_cast(boost::get(op->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())) & + static_cast(OpRole::kBackward))) { + try { + auto backward_vars = + boost::get>(op->GetNullableAttr( + OpProtoAndCheckerMaker::OpRoleVarAttrName())); + + PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0); + + for (size_t i = 0; i < backward_vars.size(); i += 2) { + auto &p_name = backward_vars[i]; + auto &g_name = backward_vars[i + 1]; + VLOG(10) << "Bcast " << g_name << " for parameter " << p_name; + + switch (strategy_.reduce_) { + case BuildStrategy::ReduceStrategy::kReduce: + CreateReduceOp(&result, g_name, cur_device_id); + var_name_on_devices[cur_device_id].emplace(g_name); + bcast_var_name_set[cur_device_id].emplace(p_name); + cur_device_id = (cur_device_id + 1) % places_.size(); + break; + case BuildStrategy::ReduceStrategy::kAllReduce: + if (IsSparseGradient(var_types, g_name)) { + CreateReduceOp(&result, g_name, 0); + CreateBroadcastOp(&result, g_name, 0); + } else { + InsertNCCLAllReduceOp(&result, g_name); + } + break; + } } + } catch (boost::bad_get e) { } } } @@ -398,11 +413,12 @@ void MultiDevSSAGraphBuilder::CreateSendOp(SSAGraph *result, } bool MultiDevSSAGraphBuilder::IsScaleLossOp(const OpDesc &op) const { - // FIXME(yy): Do not hard code like this - return op.OutputArgumentNames().size() == 1 && - op.OutputArgumentNames()[0] == GradVarName(loss_var_name_); + return boost::get( + op.GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) == + (static_cast(OpRole::kBackward) | + static_cast(OpRole::kLoss)) && + !loss_var_name_.empty(); // If loss_var is empty. This is test mode } - } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 1c4b059cd0aeff803ca7436d3f198e97a06cd012..eea7e712f8f6e187cdceedce77cc76d1d4ca2101 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -96,10 +96,7 @@ struct OpInfoFiller { info->proto_ = new proto::OpProto; info->checker_ = new OpAttrChecker(); T maker; - maker.SetProto(info->proto_); - maker.SetChecker(info->checker_); - maker.Make(); - maker.Validate(); + maker(info->proto_, info->checker_); info->proto_->set_type(op_type); PADDLE_ENFORCE( info->proto_->IsInitialized(), diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 076c45713015797f86a3611dd333132bae40044d..09b67e5a1741c68c5f5487340e8fc86ff31e00a4 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include #include "glog/logging.h" #include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/shape_inference.h" @@ -222,6 +223,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const { return it->second; } +Attribute OpDesc::GetNullableAttr(const std::string &name) const { + auto it = attrs_.find(name); + if (it != attrs_.end()) { + return it->second; + } else { + return Attribute(); + } +} + int OpDesc::GetBlockAttr(const std::string &name) const { auto it = attrs_.find(name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); @@ -233,13 +243,8 @@ const std::unordered_map &OpDesc::GetAttrMap() const { } void OpDesc::Rename(const std::string &old_name, const std::string &new_name) { - for (auto &input : inputs_) { - std::replace(input.second.begin(), input.second.end(), old_name, new_name); - } - for (auto &output : outputs_) { - std::replace(output.second.begin(), output.second.end(), old_name, - new_name); - } + RenameInput(old_name, new_name); + RenameOutput(old_name, new_name); need_update_ = true; } @@ -249,6 +254,13 @@ void OpDesc::RenameOutput(const std::string &old_name, std::replace(output.second.begin(), output.second.end(), old_name, new_name); } + + auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName()); + if (it != attrs_.end()) { + auto &op_vars = boost::get>(it->second); + std::replace(op_vars.begin(), op_vars.end(), old_name, new_name); + } + need_update_ = true; } @@ -257,6 +269,13 @@ void OpDesc::RenameInput(const std::string &old_name, for (auto &input : inputs_) { std::replace(input.second.begin(), input.second.end(), old_name, new_name); } + + auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName()); + if (it != attrs_.end()) { + auto &op_vars = boost::get>(it->second); + std::replace(op_vars.begin(), op_vars.end(), old_name, new_name); + } + need_update_ = true; } diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 3ee36a47c156da67a9ff70852665fbbd464bea17..1a330db7cc5555a939950043ac90a321573b292d 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -78,6 +78,8 @@ class OpDesc { Attribute GetAttr(const std::string &name) const; + Attribute GetNullableAttr(const std::string &name) const; + int GetBlockAttr(const std::string &name) const; void Rename(const std::string &old_name, const std::string &new_name); diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index c479d7617cfa34cd381d84d15d5e214d57af52d0..5a4380a83a2e5bf492098032cd9de7bf274fe47e 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_proto_maker.h" #include +#include namespace paddle { namespace framework { @@ -55,5 +56,28 @@ void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { } } +void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, + OpAttrChecker* attr_checker) { + proto_ = proto; + op_checker_ = attr_checker; + Make(); + + AddAttr(OpRoleAttrName(), "The role of this operator") + .InEnum( + {static_cast(OpRole::kForward), + static_cast(OpRole::kBackward), + static_cast(OpRole::kOptimize), + static_cast(OpRole::kLoss) | static_cast(OpRole::kForward), + static_cast(OpRole::kLoss) | + static_cast(OpRole::kBackward), + static_cast(OpRole::kNotSpecified)}) + .SetDefault(static_cast(OpRole::kNotSpecified)); + AddAttr>(OpRoleVarAttrName(), + "Optimized for variable") + .SetDefault({}); + + Validate(); +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index b01a520bba19c1be32363a1a5c381666c82e6afc..9bd6ca6ea32734707a5c37b3ecfe449436c04c8c 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -20,21 +20,31 @@ limitations under the License. */ namespace paddle { namespace framework { +enum class OpRole { + kForward = 0x0000, + kBackward = 0x0001, + kOptimize = 0x0002, + + kLoss = 0x0100, + // The default value of op's role. This should be only used for unittests and + // CreateOp inside a operator. + kNotSpecified = 0x1000, +}; + // this class not only make proto but also init attribute checkers. class OpProtoAndCheckerMaker { public: + static const char *OpRoleAttrName() { return "op_role"; } + static const char *OpRoleVarAttrName() { return "op_role_var"; } + + void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); + virtual void Make() = 0; virtual ~OpProtoAndCheckerMaker() { CHECK(validated_) << "should call Validate after build"; } - void SetProto(proto::OpProto *proto) { proto_ = proto; } - - void SetChecker(OpAttrChecker *attr_checker) { op_checker_ = attr_checker; } - - void Validate(); - protected: struct VariableBuilder { proto::OpProto::Var *var_; @@ -76,6 +86,7 @@ class OpProtoAndCheckerMaker { private: void CheckNoDuplicatedInOutAttrs(); + void Validate(); proto::OpProto *proto_; OpAttrChecker *op_checker_; diff --git a/paddle/fluid/framework/op_proto_maker_test.cc b/paddle/fluid/framework/op_proto_maker_test.cc index 9b5badbc81f9ddf083c81f57f5355e07a8e5e4a2..a8030d377fdb4d4aef74b315e21792dad10fac96 100644 --- a/paddle/fluid/framework/op_proto_maker_test.cc +++ b/paddle/fluid/framework/op_proto_maker_test.cc @@ -28,10 +28,8 @@ TEST(ProtoMaker, DuplicatedAttr) { paddle::framework::proto::OpProto op_proto; paddle::framework::OpAttrChecker op_checker; TestAttrProtoMaker proto_maker; - proto_maker.SetProto(&op_proto); - proto_maker.SetChecker(&op_checker); - proto_maker.Make(); - ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); + ASSERT_THROW(proto_maker(&op_proto, &op_checker), + paddle::platform::EnforceNotMet); } class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { @@ -46,8 +44,6 @@ TEST(ProtoMaker, DuplicatedInOut) { paddle::framework::proto::OpProto op_proto; paddle::framework::OpAttrChecker op_checker; TestAttrProtoMaker proto_maker; - proto_maker.SetProto(&op_proto); - proto_maker.SetChecker(&op_checker); - proto_maker.Make(); - ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); + ASSERT_THROW(proto_maker(&op_proto, &op_checker), + paddle::platform::EnforceNotMet); } diff --git a/paddle/fluid/framework/shape_inference.h b/paddle/fluid/framework/shape_inference.h index 46c8feec001584a872f7f62682080e0e72c06f50..5f497cafa0f75f7c23d550ef767d55274de7c900 100644 --- a/paddle/fluid/framework/shape_inference.h +++ b/paddle/fluid/framework/shape_inference.h @@ -63,6 +63,7 @@ class InferShapeContext { std::vector GetInputVarPtrs(const std::string &name); std::vector GetOutputVarPtrs(const std::string &name); + virtual InferShapeVarPtr GetVarPtr(const std::string &name) = 0; // Note: In while op, we need this to be public void SetDims(const std::vector &names, @@ -81,8 +82,6 @@ class InferShapeContext { const std::vector &names) const; virtual proto::VarType::Type GetVarType(const std::string &name) const = 0; - - virtual InferShapeVarPtr GetVarPtr(const std::string &name) = 0; }; } // namespace framework diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index b98aeed8a0aaabfd39560fad3c074a6668b4f024..cc4a725dfb3b3e7723a3a3a4008b20acdb53899d 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,5 +1,6 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) +# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? cc_library(paddle_fluid_api SRCS io.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) diff --git a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc index 15eddca1c760a44afb986796b08e2b8533695d60..51d38d6251d853fa8a02a4e22f819cfc44294453 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/ut_helper.h" diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc index 52851a9acb7f90826d520cc944d04fe1c90a22e0..f848a7d1add79c3032da7defc34a406dccf29d2e 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include diff --git a/paddle/fluid/inference/analysis/graph_traits.cc b/paddle/fluid/inference/analysis/graph_traits.cc index 272dbb799f3759a3f6e34e93bc115cf2cea6ec3b..2ea70a1d2060e03769d67060dc6f008207342b52 100644 --- a/paddle/fluid/inference/analysis/graph_traits.cc +++ b/paddle/fluid/inference/analysis/graph_traits.cc @@ -1,15 +1,15 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/fluid/inference/analysis/graph_traits.h" diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index a79e9cbda105f3bcaf100b3f6ea2c2634e0e2451..ea39ba4ddb5e8d5d6cce9b116ab968764e578c26 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -1,16 +1,16 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/fluid/inference/analysis/node_tester.cc b/paddle/fluid/inference/analysis/node_tester.cc index 47fea0fdff808c930ca73edb25f5b16fef397e9a..ea832a3a7e47758be9b6bd59a4325ddb576ec446 100644 --- a/paddle/fluid/inference/analysis/node_tester.cc +++ b/paddle/fluid/inference/analysis/node_tester.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/fluid/inference/analysis/node.h" diff --git a/paddle/fluid/inference/analysis/pass.cc b/paddle/fluid/inference/analysis/pass.cc index b48a4fd83497a068392fd941028fef542e45d763..121b72c0a0aa9a0c568b04f7ee9a5bc5c1d6f5f8 100644 --- a/paddle/fluid/inference/analysis/pass.cc +++ b/paddle/fluid/inference/analysis/pass.cc @@ -12,4 +12,4 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/pass.h" \ No newline at end of file +#include "paddle/fluid/inference/analysis/pass.h" diff --git a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc index 6f695965afc9b462963660cabf988bfd81f0ba5c..0644c0db12e3daabba76dbaad33847f5624b157a 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/fluid/inference/analysis/subgraph_splitter.h" #include "paddle/fluid/inference/analysis/ut_helper.h" diff --git a/paddle/fluid/inference/analysis/ut_helper.h b/paddle/fluid/inference/analysis/ut_helper.h index f63550dba3549ad300ab54f5eab63770848f9833..c86083d12153921672e15c172b874f77a8b46cde 100644 --- a/paddle/fluid/inference/analysis/ut_helper.h +++ b/paddle/fluid/inference/analysis/ut_helper.h @@ -1,16 +1,16 @@ /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include @@ -29,11 +29,10 @@ DEFINE_string(inference_model_dir, "", "inference test model dir"); static framework::proto::ProgramDesc LoadProgramDesc( const std::string& model_dir = FLAGS_inference_model_dir) { - // TODO(Superjomn) update latter. - auto place = paddle::platform::CPUPlace(); - auto executor = paddle::framework::Executor(place); - auto* scope = new paddle::framework::Scope(); - auto program = Load(&executor, scope, model_dir); + paddle::platform::CPUPlace place; + paddle::framework::Executor executor(place); + paddle::framework::Scope scope; + auto program = Load(&executor, &scope, model_dir); return *program->Proto(); } diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 602246d75d708db5108e5320e50a27fd9cd580f8..7cd777de27e9457260a1b2f5936dc917f0821984 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,5 +1,7 @@ nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) -nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc - DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine - SERIAL) +# This test is not stable +# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828 +#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc +# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine +# SERIAL) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index bc7faef8cd499e63af4d0ab2282897c39f2b7faa..f72997ca24ed837f761b52cbecdc05998424a675 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -201,9 +201,9 @@ if(WITH_DISTRIBUTE) set_source_files_properties(send_vars_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) op_library(send_barrier_op DEPS ${DISTRIBUTE_DEPS}) set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op - listen_and_serv_op sum_op executor SERIAL) + #set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + #cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op + # listen_and_serv_op sum_op executor SERIAL) if(WITH_GPU) set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op diff --git a/paddle/fluid/operators/detail/grpc_client.cc b/paddle/fluid/operators/detail/grpc_client.cc index ae60ab15325ef101feb7270a4f5d840cb2112be0..47892b1bcc073d24ea617ea1c680138a88925177 100644 --- a/paddle/fluid/operators/detail/grpc_client.cc +++ b/paddle/fluid/operators/detail/grpc_client.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/threadpool.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { @@ -196,9 +197,14 @@ bool RPCClient::Wait() { const size_t kReqCnt = req_count_; bool a[kReqCnt]; std::vector> waits(req_count_); + std::mutex mu; for (int i = 0; i < req_count_; i++) { - waits[i] = framework::AsyncIO([i, &a, this] { a[i] = Proceed(); }); + waits[i] = framework::AsyncIO([i, &a, &mu, this] { + bool ret = Proceed(); + std::lock_guard l(mu); + a[i] = ret; + }); } for (int i = 0; i < req_count_; i++) { diff --git a/paddle/fluid/operators/detail/grpc_server.cc b/paddle/fluid/operators/detail/grpc_server.cc index eb114a47d99541402f748bfffcf6b10fde3e78e2..58faead2bdf9a89749e08207d964836bbf5cb68e 100644 --- a/paddle/fluid/operators/detail/grpc_server.cc +++ b/paddle/fluid/operators/detail/grpc_server.cc @@ -19,10 +19,16 @@ limitations under the License. */ using ::grpc::ServerAsyncResponseWriter; +DEFINE_int32(rpc_server_handle_send_threads, 20, + "Number of threads used to handle send at rpc server."); +DEFINE_int32(rpc_server_handle_get_threads, 20, + "Number of threads used to handle get at rpc server."); +DEFINE_int32(rpc_server_handle_prefetch_threads, 1, + "Number of threads used to handle prefetch at rpc server."); + namespace paddle { namespace operators { namespace detail { - enum CallStatus { PROCESS = 0, FINISH }; // reference: @@ -63,18 +69,20 @@ class RequestSend final : public RequestBase { explicit RequestSend(GrpcService::AsyncService* service, ::grpc::ServerCompletionQueue* cq, bool sync_mode, framework::Scope* scope, ReceivedQueue* queue, - const platform::DeviceContext* dev_ctx) + const platform::DeviceContext* dev_ctx, int req_id) : RequestBase(service, cq, sync_mode, dev_ctx), queue_(queue), - responder_(&ctx_) { + responder_(&ctx_), + req_id_(req_id) { if (sync_mode_) { request_.reset(new VariableResponse(scope, dev_ctx_, false)); } else { request_.reset(new VariableResponse(scope, dev_ctx_, true)); } int method_id = static_cast(detail::GrpcMethod::kSendVariable); - service_->RequestAsyncUnary(method_id, &ctx_, request_.get(), &responder_, - cq_, cq_, this); + service_->RequestAsyncUnary( + method_id, &ctx_, request_.get(), &responder_, cq_, cq_, + reinterpret_cast(static_cast(req_id))); } virtual ~RequestSend() {} @@ -86,15 +94,17 @@ class RequestSend final : public RequestBase { VLOG(3) << "RequestSend " << var_name; queue_->Push(std::make_pair(var_name, request_)); - sendrecv::VoidMessage reply; - responder_.Finish(reply, ::grpc::Status::OK, this); status_ = FINISH; + responder_.Finish(reply_, ::grpc::Status::OK, + reinterpret_cast(static_cast(req_id_))); } protected: + sendrecv::VoidMessage reply_; std::shared_ptr request_; ReceivedQueue* queue_; ServerAsyncResponseWriter responder_; + int req_id_; }; class RequestGet final : public RequestBase { @@ -103,14 +113,17 @@ class RequestGet final : public RequestBase { ::grpc::ServerCompletionQueue* cq, bool sync_mode, framework::Scope* scope, const platform::DeviceContext* dev_ctx, - framework::BlockingQueue* queue) + framework::BlockingQueue* queue, + int req_id) : RequestBase(service, cq, sync_mode, dev_ctx), responder_(&ctx_), scope_(scope), - queue_(queue) { + queue_(queue), + req_id_(req_id) { auto method_id = static_cast(detail::GrpcMethod::kGetVariable); - service_->RequestAsyncUnary(method_id, &ctx_, &request_, &responder_, cq_, - cq_, this); + service_->RequestAsyncUnary( + method_id, &ctx_, &request_, &responder_, cq_, cq_, + reinterpret_cast(static_cast(req_id_))); } virtual ~RequestGet() {} @@ -123,13 +136,13 @@ class RequestGet final : public RequestBase { VLOG(3) << "RequestGet " << var_name; auto* var = scope_->FindVar(var_name); - ::grpc::ByteBuffer reply; if (var_name != FETCH_BARRIER_MESSAGE) { - SerializeToByteBuffer(var_name, var, *dev_ctx_, &reply); + SerializeToByteBuffer(var_name, var, *dev_ctx_, &reply_); } - responder_.Finish(reply, ::grpc::Status::OK, this); status_ = FINISH; + responder_.Finish(reply_, ::grpc::Status::OK, + reinterpret_cast(static_cast(req_id_))); if (var_name == FETCH_BARRIER_MESSAGE) { sendrecv::VariableMessage msg; @@ -140,9 +153,11 @@ class RequestGet final : public RequestBase { protected: sendrecv::VariableMessage request_; + ::grpc::ByteBuffer reply_; ServerAsyncResponseWriter<::grpc::ByteBuffer> responder_; framework::Scope* scope_; framework::BlockingQueue* queue_; + int req_id_; }; class RequestPrefetch final : public RequestBase { @@ -153,21 +168,24 @@ class RequestPrefetch final : public RequestBase { const platform::DeviceContext* dev_ctx, framework::Executor* executor, framework::ProgramDesc* program, - framework::ExecutorPrepareContext* prefetch_ctx) + framework::ExecutorPrepareContext* prefetch_ctx, + int req_id) : RequestBase(service, cq, sync_mode, dev_ctx), responder_(&ctx_), scope_(scope), executor_(executor), program_(program), - prefetch_ctx_(prefetch_ctx) { + prefetch_ctx_(prefetch_ctx), + req_id_(req_id) { if (sync_mode_) { request_.reset(new VariableResponse(scope, dev_ctx_, false)); } else { request_.reset(new VariableResponse(scope, dev_ctx_, true)); } int method_id = static_cast(detail::GrpcMethod::kPrefetchVariable); - service_->RequestAsyncUnary(method_id, &ctx_, request_.get(), &responder_, - cq_, cq_, this); + service_->RequestAsyncUnary( + method_id, &ctx_, request_.get(), &responder_, cq_, cq_, + reinterpret_cast(static_cast(req_id_))); } virtual ~RequestPrefetch() {} @@ -176,7 +194,6 @@ class RequestPrefetch final : public RequestBase { virtual void Process() { // prefetch process... - ::grpc::ByteBuffer reply; std::string var_name = request_->OutVarname(); VLOG(3) << "RequestPrefetch " << var_name; @@ -186,19 +203,22 @@ class RequestPrefetch final : public RequestBase { InitializeVariable(var, var_desc->GetType()); executor_->RunPreparedContext(prefetch_ctx_, scope_); - SerializeToByteBuffer(var_name, var, *dev_ctx_, &reply); + SerializeToByteBuffer(var_name, var, *dev_ctx_, &reply_); - responder_.Finish(reply, ::grpc::Status::OK, this); status_ = FINISH; + responder_.Finish(reply_, ::grpc::Status::OK, + reinterpret_cast(static_cast(req_id_))); } protected: std::shared_ptr request_; + ::grpc::ByteBuffer reply_; ServerAsyncResponseWriter<::grpc::ByteBuffer> responder_; framework::Scope* scope_; framework::Executor* executor_; framework::ProgramDesc* program_; framework::ExecutorPrepareContext* prefetch_ctx_; + int req_id_; }; void AsyncGRPCServer::WaitClientGet(int count) { @@ -232,24 +252,39 @@ void AsyncGRPCServer::RunSyncUpdate() { LOG(INFO) << "Server listening on " << address_ << " selected port: " << selected_port_; - std::function send_register = - std::bind(&AsyncGRPCServer::TryToRegisterNewSendOne, this); - std::function get_register = - std::bind(&AsyncGRPCServer::TryToRegisterNewGetOne, this); - std::function prefetch_register = - std::bind(&AsyncGRPCServer::TryToRegisterNewPrefetchOne, this); - - // TODO(wuyi): Run these "HandleRequest" in thread pool - t_send_.reset( - new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, - cq_send_.get(), "cq_send", send_register))); - t_get_.reset( - new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, - cq_get_.get(), "cq_get", get_register))); - t_prefetch_.reset(new std::thread( - std::bind(&AsyncGRPCServer::HandleRequest, this, cq_prefetch_.get(), - "cq_prefetch", prefetch_register))); + std::function send_register = std::bind( + &AsyncGRPCServer::TryToRegisterNewSendOne, this, std::placeholders::_1); + std::function get_register = std::bind( + &AsyncGRPCServer::TryToRegisterNewGetOne, this, std::placeholders::_1); + std::function prefetch_register = + std::bind(&AsyncGRPCServer::TryToRegisterNewPrefetchOne, this, + std::placeholders::_1); + for (int i = 0; i < kSendReqsBufSize; ++i) { + TryToRegisterNewSendOne(i); + } + for (int i = 0; i < kGetReqsBufSize; ++i) { + TryToRegisterNewGetOne(i); + } + for (int i = 0; i < kPrefetchReqsBufSize; ++i) { + TryToRegisterNewPrefetchOne(i); + } + + for (int i = 0; i < FLAGS_rpc_server_handle_send_threads; ++i) { + t_sends_.emplace_back( + new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, + cq_send_.get(), "cq_send", send_register))); + } + for (int i = 0; i < FLAGS_rpc_server_handle_get_threads; ++i) { + t_gets_.emplace_back( + new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, + cq_get_.get(), "cq_get", get_register))); + } + for (int i = 0; i < FLAGS_rpc_server_handle_prefetch_threads; ++i) { + t_prefetchs_.emplace_back(new std::thread( + std::bind(&AsyncGRPCServer::HandleRequest, this, cq_prefetch_.get(), + "cq_prefetch", prefetch_register))); + } { std::lock_guard lock(this->mutex_ready_); ready_ = 1; @@ -257,9 +292,15 @@ void AsyncGRPCServer::RunSyncUpdate() { condition_ready_.notify_all(); // wait server server_->Wait(); - t_send_->join(); - t_get_->join(); - t_prefetch_->join(); + for (int i = 0; i < FLAGS_rpc_server_handle_send_threads; ++i) { + t_sends_[i]->join(); + } + for (int i = 0; i < FLAGS_rpc_server_handle_get_threads; ++i) { + t_gets_[i]->join(); + } + for (int i = 0; i < FLAGS_rpc_server_handle_prefetch_threads; ++i) { + t_prefetchs_[i]->join(); + } } void AsyncGRPCServer::ShutdownQueue() { @@ -276,47 +317,48 @@ void AsyncGRPCServer::ShutDown() { server_->Shutdown(); } -void AsyncGRPCServer::TryToRegisterNewSendOne() { +void AsyncGRPCServer::TryToRegisterNewSendOne(int i) { std::unique_lock lock(cq_mutex_); if (is_shut_down_) { VLOG(3) << "shutdown, do not TryToRegisterNewSendOne"; return; } RequestSend* send = new RequestSend(&service_, cq_send_.get(), sync_mode_, - scope_, &var_recv_queue_, dev_ctx_); + scope_, &var_recv_queue_, dev_ctx_, i); + send_reqs_[i] = static_cast(send); VLOG(4) << "Create RequestSend status:" << send->Status(); } -void AsyncGRPCServer::TryToRegisterNewGetOne() { +void AsyncGRPCServer::TryToRegisterNewGetOne(int req_id) { std::unique_lock lock(cq_mutex_); if (is_shut_down_) { VLOG(3) << "shutdown, do not TryToRegisterNewGetOne"; return; } RequestGet* get = new RequestGet(&service_, cq_get_.get(), sync_mode_, scope_, - dev_ctx_, &var_get_queue_); + dev_ctx_, &var_get_queue_, req_id); + get_reqs_[req_id] = static_cast(get); VLOG(4) << "Create RequestGet status:" << get->Status(); } -void AsyncGRPCServer::TryToRegisterNewPrefetchOne() { +void AsyncGRPCServer::TryToRegisterNewPrefetchOne(int req_id) { std::unique_lock lock(cq_mutex_); if (is_shut_down_) { VLOG(3) << "shutdown, do not TryToRegisterNewPrefetchOne"; return; } - RequestPrefetch* prefetch = - new RequestPrefetch(&service_, cq_prefetch_.get(), sync_mode_, scope_, - dev_ctx_, executor_, program_, prefetch_ctx_.get()); + RequestPrefetch* prefetch = new RequestPrefetch( + &service_, cq_prefetch_.get(), sync_mode_, scope_, dev_ctx_, executor_, + program_, prefetch_ctx_.get(), req_id); + prefetch_reqs_[req_id] = static_cast(prefetch); VLOG(4) << "Create RequestPrefetch status:" << prefetch->Status(); } // FIXME(typhoonzero): change cq_name to enum. -void AsyncGRPCServer::HandleRequest(::grpc::ServerCompletionQueue* cq, - const std::string& cq_name, - std::function TryToRegisterNewOne) { - TryToRegisterNewOne(); - +void AsyncGRPCServer::HandleRequest( + ::grpc::ServerCompletionQueue* cq, const std::string& cq_name, + std::function TryToRegisterNewOne) { void* tag = NULL; bool ok = false; @@ -327,8 +369,7 @@ void AsyncGRPCServer::HandleRequest(::grpc::ServerCompletionQueue* cq, break; } VLOG(3) << "HandleRequest for " << cq_name << " get Next"; - - PADDLE_ENFORCE(tag); + int req_id = static_cast(reinterpret_cast(tag)); if (sync_mode_) { // FIXME(typhoonzero): de-couple the barriers with recv_op @@ -337,7 +378,17 @@ void AsyncGRPCServer::HandleRequest(::grpc::ServerCompletionQueue* cq, VLOG(3) << "HandleRequest for " << cq_name << " after WaitCond"; } - RequestBase* base = reinterpret_cast(tag); + RequestBase* base = nullptr; + { + std::lock_guard l(cq_mutex_); + if (cq_name == "cq_get") { + base = get_reqs_[req_id]; + } else if (cq_name == "cq_send") { + base = send_reqs_[req_id]; + } else if (cq_name == "cq_prefetch") { + base = prefetch_reqs_[req_id]; + } + } // reference: // https://github.com/tensorflow/tensorflow/issues/5596 // https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM @@ -345,19 +396,19 @@ void AsyncGRPCServer::HandleRequest(::grpc::ServerCompletionQueue* cq, if (!ok) { LOG(WARNING) << cq_name << " recv no regular event:argument name[" << base->GetReqName() << "]"; - TryToRegisterNewOne(); + TryToRegisterNewOne(req_id); delete base; continue; } switch (base->Status()) { case PROCESS: { - TryToRegisterNewOne(); base->Process(); VLOG(4) << cq_name << " PROCESS status:" << base->Status(); break; } case FINISH: { + TryToRegisterNewOne(req_id); VLOG(4) << cq_name << " FINISH status:" << base->Status(); delete base; break; diff --git a/paddle/fluid/operators/detail/grpc_server.h b/paddle/fluid/operators/detail/grpc_server.h index 238aaa29634a7eff65429c27aa3538a185723eb2..bdff9801a928699f8391bfb68c1c7bd2d75aa642 100644 --- a/paddle/fluid/operators/detail/grpc_server.h +++ b/paddle/fluid/operators/detail/grpc_server.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include // NOLINT #include +#include #include "grpc++/grpc++.h" #include "paddle/fluid/framework/blocking_queue.h" @@ -30,6 +31,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detail/send_recv.grpc.pb.h" #include "paddle/fluid/operators/detail/send_recv.pb.h" #include "paddle/fluid/operators/detail/sendrecvop_utils.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { @@ -82,19 +84,27 @@ class AsyncGRPCServer final { protected: void HandleRequest(::grpc::ServerCompletionQueue *cq, const std::string &cq_name, - std::function TryToRegisterNewOne); - void TryToRegisterNewSendOne(); - void TryToRegisterNewGetOne(); - void TryToRegisterNewPrefetchOne(); + std::function TryToRegisterNewOne); + void TryToRegisterNewSendOne(int req_id); + void TryToRegisterNewGetOne(int req_id); + void TryToRegisterNewPrefetchOne(int req_id); void ShutdownQueue(); private: + static const int kSendReqsBufSize = 100; + static const int kGetReqsBufSize = 100; + static const int kPrefetchReqsBufSize = 10; + std::mutex cq_mutex_; volatile bool is_shut_down_ = false; std::unique_ptr<::grpc::ServerCompletionQueue> cq_send_; std::unique_ptr<::grpc::ServerCompletionQueue> cq_get_; std::unique_ptr<::grpc::ServerCompletionQueue> cq_prefetch_; + RequestBase *send_reqs_[kSendReqsBufSize]; + RequestBase *get_reqs_[kGetReqsBufSize]; + RequestBase *prefetch_reqs_[kPrefetchReqsBufSize]; + GrpcService::AsyncService service_; std::unique_ptr<::grpc::Server> server_; @@ -113,8 +123,10 @@ class AsyncGRPCServer final { mutable int barrier_cond_step_; std::condition_variable barrier_condition_; - std::unique_ptr t_send_; - std::unique_ptr t_get_; + std::vector> t_sends_; + std::vector> t_gets_; + std::vector> t_prefetchs_; + std::unique_ptr t_prefetch_; std::unique_ptr prefetch_ctx_; diff --git a/paddle/fluid/operators/detail/grpc_server_test.cc b/paddle/fluid/operators/detail/grpc_server_test.cc index b8db0ad987cdfaec1fc9236c3f26e88891376dce..73e75c9087fef756840c76db249f8996253ced64 100644 --- a/paddle/fluid/operators/detail/grpc_server_test.cc +++ b/paddle/fluid/operators/detail/grpc_server_test.cc @@ -108,7 +108,7 @@ void StartServer(const std::string& endpoint) { rpc_service_->RunSyncUpdate(); } -TEST(PREFETCH, CPU) { +TEST(PREFETCH, DISABLED_CPU) { // start up a server instance backend std::thread server_thread(StartServer, "127.0.0.1:8889"); sleep(2); diff --git a/paddle/fluid/operators/detail/grpc_service.h b/paddle/fluid/operators/detail/grpc_service.h index e6dab2f5a3a4280f3979417c3ca2d884a0b8ff2f..e0505c2b9d0903837713d7e0032b01ab091c2e04 100644 --- a/paddle/fluid/operators/detail/grpc_service.h +++ b/paddle/fluid/operators/detail/grpc_service.h @@ -25,6 +25,8 @@ #include #include "paddle/fluid/operators/detail/variable_response.h" +#include "paddle/fluid/platform/profiler.h" + // NOTE: This method was originally created by tensorflow // (https://github.com/tensorflow/tensorflow/) we borrow this // method and did some modifications so that we can parse gRPC diff --git a/paddle/fluid/operators/detail/send_recv.proto b/paddle/fluid/operators/detail/send_recv.proto index 9478c5702bcbf99fc88207b8c4843dbccf8a5925..a244afc46f3247c7e6e8481b09b5c729a2a569f7 100644 --- a/paddle/fluid/operators/detail/send_recv.proto +++ b/paddle/fluid/operators/detail/send_recv.proto @@ -70,10 +70,10 @@ message VariableMessage { bytes rows = 9; // Look up table block execution output variable name. string out_varname = 10; - // If true, the ps server will start profiling, the ps + // If 1, the ps server will start profiling, the ps // server stops profiling and generates a profile to /tmp/profile_ps_* - // when profile switches from true to false. - bool profile = 11; + // when profile switches from 1 to 2. + int64 profile = 11; } message VoidMessage {} diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index e6ee598db04dd9e0075b39a50d1d4e878d73086d..507b465435609a91ebca97dd70b176c3b79bee02 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -123,7 +123,13 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, // 1 trainer returns true for ShouldSendProfileState(). It tells PS // servers the trainer's profiling state so that PS can follow the // trainer. - request.set_profile(platform::IsProfileEnabled()); + if (platform::ShouldSendProfileState()) { + if (platform::IsProfileEnabled()) { + request.set_profile(platform::kEnableProfiler); + } else { + request.set_profile(platform::kDisableProfiler); + } + } if (!out_name.empty()) { request.set_out_varname(out_name); } @@ -143,12 +149,14 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } if (platform::is_gpu_place(ctx.GetPlace())) { +#ifdef PADDLE_WITH_CUDA // GPU data is copied to CPU buffer when sending, // free the buffer when possible. destroy_callback = [](void* backing) { platform::CUDAPinnedPlace cuda_pinned; memory::Free(cuda_pinned, backing); }; +#endif } std::string header; diff --git a/paddle/fluid/operators/detail/variable_response.cc b/paddle/fluid/operators/detail/variable_response.cc index 462e303096e609c6797ca8cc16266ec3621623fc..24cb91a3bb820a0e5d51aaa49154434919080f69 100644 --- a/paddle/fluid/operators/detail/variable_response.cc +++ b/paddle/fluid/operators/detail/variable_response.cc @@ -449,8 +449,8 @@ int VariableResponse::Parse(Source* source) { break; } case sendrecv::VariableMessage::kProfileFieldNumber: { - bool profiling; - if (!input.ReadRaw(reinterpret_cast(&profiling), 1)) { + uint64_t profiling = 0; + if (!input.ReadVarint64(&profiling)) { return tag; } meta_.set_profile(profiling); @@ -458,9 +458,11 @@ int VariableResponse::Parse(Source* source) { if (listener_id <= 0) { break; } - if (profiling && !platform::IsProfileEnabled()) { + if (profiling == platform::kEnableProfiler && + !platform::IsProfileEnabled()) { platform::EnableProfiler(platform::ProfilerState::kCPU); - } else if (!profiling && platform::IsProfileEnabled()) { + } else if (profiling == platform::kDisableProfiler && + platform::IsProfileEnabled()) { // TODO(panyx0718): Should we allow to customize file dir. platform::DisableProfiler( platform::EventSortingKey::kDefault, diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index a5bb58c2f4047a3bf2f8592b605772b4fa166c57..20d960f9fee1eae42b2241fb96c163e15db5e24d 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -24,6 +24,8 @@ detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu) detection_library(target_assign_op SRCS target_assign_op.cc target_assign_op.cu) +detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc + polygon_box_transform_op.cu) # Export local libraries to parent set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE) diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..335e8dd470f851d8c5f6bdbc94cfc343da269034 --- /dev/null +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -0,0 +1,105 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class PolygonBoxTransformCPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), + "It must use CUDAPlace."); + auto* in = ctx.Input("Input"); + auto in_dims = in->dims(); + const T* in_data = in->data(); + auto* out = ctx.Output("Output"); + T* out_data = out->mutable_data(ctx.GetPlace()); + + int batch_size = in_dims[0]; + int geo_channel = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + int id = 0; + for (int id_n = 0; id_n < batch_size * geo_channel; ++id_n) { + for (int id_h = 0; id_h < height; ++id_h) { + for (int id_w = 0; id_w < width; ++id_w) { + id = id_n * height * width + width * id_h + id_w; + if (id_n % 2 == 0) { + out_data[id] = id_w - in_data[id]; + } else { + out_data[id] = id_h - in_data[id]; + } + } + } + } + } +}; + +class PolygonBoxTransformOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE( + ctx->HasInput("Input"), + "Input (Input) of polygon_box transform op should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("Output"), + "Output (Output) of polygon_box transform op should not be null."); + + auto in_dim = ctx->GetInputDim("Input"); + + PADDLE_ENFORCE_EQ(in_dim.size(), 4, "input's rank must be 4."); + PADDLE_ENFORCE_EQ(in_dim[1] % 2, 0, + "input's second dimension must be even."); + + ctx->SetOutputDim("Output", in_dim); + } +}; + +class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput( + "Input", + "The input with shape [batch_size, geometry_channels, height, width]"); + AddOutput("Output", "The output with the same shape as input"); + + AddComment(R"DOC( +PolygonBoxTransform Operator. +The input is the final geometry output in detection network. +We use 2*n numbers to denote the coordinate shift from n corner vertices of +the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi), +the geometry output contains 2*n channels. +PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(polygon_box_transform, ops::PolygonBoxTransformOp, + ops::PolygonBoxTransformOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + polygon_box_transform, + ops::PolygonBoxTransformCPUKernel, + ops::PolygonBoxTransformCPUKernel); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..6187ac6622c65d2bbc525c3fe2cb397cf74ac612 --- /dev/null +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -0,0 +1,76 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_info.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using platform::PADDLE_CUDA_NUM_THREADS; +#define CUDA_BLOCK_SIZE 16 + +template +__global__ void PolygonBoxTransformKernel(const int n, const int h, const int w, + const T* input, T* output) { + int id_n = threadIdx.x + blockDim.x * blockIdx.x; + int id_h = threadIdx.y + blockDim.y * blockIdx.y; + int id_w = threadIdx.z + blockDim.z * blockIdx.z; + if (id_n < n && id_h < h && id_w < w) { + int id = id_n * h * w + w * id_h + id_w; + if (id_n % 2 == 0) { + output[id] = id_w - input[id]; + } else { + output[id] = id_h - input[id]; + } + } +} + +template +class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "It must use CUDAPlace."); + auto* in = ctx.Input("Input"); + auto in_dims = in->dims(); + const T* in_data = in->data(); + auto* out = ctx.Output("Output"); + T* out_data = out->mutable_data(ctx.GetPlace()); + + int batch_size = in_dims[0]; + int geo_channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + dim3 threadsPerBlock( + PADDLE_CUDA_NUM_THREADS / (CUDA_BLOCK_SIZE * CUDA_BLOCK_SIZE), + CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE); + dim3 numBlocks((batch_size * geo_channels) / threadsPerBlock.x, + (height + threadsPerBlock.y - 1) / threadsPerBlock.y, + (width + threadsPerBlock.z - 1) / threadsPerBlock.z); + auto stream = ctx.cuda_device_context().stream(); + PolygonBoxTransformKernel<<>>( + batch_size * geo_channels, height, width, in_data, out_data); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_CUDA_KERNEL( + polygon_box_transform, + paddle::operators::PolygonBoxTransformOpCUDAKernel, + paddle::operators::PolygonBoxTransformOpCUDAKernel); diff --git a/paddle/fluid/operators/elementwise_add_op.h b/paddle/fluid/operators/elementwise_add_op.h index 253964562c8d34e0fda3b4760761206895f749aa..baf04c30b17cb333fc8a6544afd6c479442f835b 100644 --- a/paddle/fluid/operators/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise_add_op.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once +#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/elementwise_op_function.h" +#include "paddle/fluid/operators/math/blas.h" namespace paddle { namespace operators { @@ -24,19 +26,57 @@ struct AddFunctor { inline HOSTDEVICE T operator()(T a, T b) const { return a + b; } }; +template +void default_elementwise_add(const framework::ExecutionContext& ctx, + const framework::Tensor* x, + const framework::Tensor* y, framework::Tensor* z) { + int axis = ctx.Attr("axis"); + ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, + AddFunctor(), z); +} + +template +typename std::enable_if< + std::is_floating_point::value && + std::is_same::value>::type +elementwise_add(const framework::ExecutionContext& ctx, + const framework::Tensor* x, const framework::Tensor* y, + framework::Tensor* z) { + auto eigen_x = framework::EigenVector::Flatten(*x); + auto eigen_y = framework::EigenVector::Flatten(*y); + auto eigen_z = framework::EigenVector::Flatten(*z); + + auto blas = math::GetBlas(ctx); + blas.VADD(x->numel(), eigen_x.data(), eigen_y.data(), eigen_z.data()); +} + +template +typename std::enable_if< + !std::is_floating_point::value || + !std::is_same::value>::type +elementwise_add(const framework::ExecutionContext& ctx, + const framework::Tensor* x, const framework::Tensor* y, + framework::Tensor* z) { + default_elementwise_add(ctx, x, y, z); +} + template class ElementwiseAddKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { using Tensor = framework::Tensor; - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* z = ctx.Output("Out"); + const auto x = ctx.Input("X"); + const auto y = ctx.Input("Y"); + auto z = ctx.Output("Out"); z->mutable_data(ctx.GetPlace()); - int axis = ctx.Attr("axis"); - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - AddFunctor(), z); + + auto dims_equal = x->dims() == y->dims(); + if (dims_equal) { + elementwise_add(ctx, x, y, z); + } else { + default_elementwise_add(ctx, x, y, z); + } } }; @@ -45,6 +85,55 @@ struct IdentityGrad { HOSTDEVICE T operator()(T x, T y, T out, T dout) const { return dout; } }; +template +void default_elementwise_add_grad(const framework::ExecutionContext& ctx, + const framework::Tensor* x, + const framework::Tensor* y, + const framework::Tensor* out, + const framework::Tensor* dout, + framework::Tensor* dx, + framework::Tensor* dy) { + int axis = ctx.Attr("axis"); + + ElemwiseGradCompute, IdentityGrad>( + ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad(), + IdentityGrad()); +} + +template +typename std::enable_if< + std::is_floating_point::value && + std::is_same::value>::type +elementwise_add_grad(const framework::ExecutionContext& ctx, + const framework::Tensor* x, const framework::Tensor* y, + const framework::Tensor* out, + const framework::Tensor* dout, framework::Tensor* dx, + framework::Tensor* dy) { + auto blas = math::GetBlas(ctx); + + if (dx) { + blas.VCOPY(dout->numel(), dout->data(), + dx->mutable_data(ctx.GetPlace())); + } + + if (dy) { + blas.VCOPY(dout->numel(), dout->data(), + dy->mutable_data(ctx.GetPlace())); + } +} + +template +typename std::enable_if< + !std::is_floating_point::value || + !std::is_same::value>::type +elementwise_add_grad(const framework::ExecutionContext& ctx, + const framework::Tensor* x, const framework::Tensor* y, + const framework::Tensor* out, + const framework::Tensor* dout, framework::Tensor* dx, + framework::Tensor* dy) { + default_elementwise_add_grad(ctx, x, y, out, dout, dx, dy); +} + template class ElementwiseAddGradKernel : public framework::OpKernel { public: @@ -57,10 +146,13 @@ class ElementwiseAddGradKernel : public framework::OpKernel { auto* dout = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); - int axis = ctx.Attr("axis"); - ElemwiseGradCompute, IdentityGrad>( - ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad(), - IdentityGrad()); + + if (platform::is_cpu_place(ctx.GetPlace()) && (x->dims() == y->dims())) { + elementwise_add_grad(ctx, x, y, out, dout, dx, dy); + } else { + default_elementwise_add_grad(ctx, x, y, out, dout, dx, + dy); + } } }; diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index d5b57cc2524efcdee112b2ce41cdcd4697fb79e6..f4cec8ad971abebe8d6dff1a384c8414269148a5 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -46,9 +46,11 @@ class ElementwiseOpInferVarType : public framework::VarTypeInference { public: void operator()(const framework::OpDesc& op_desc, framework::BlockDesc* block) const override { - auto x_var = op_desc.Input("X")[0]; - auto out_var = op_desc.Output("Out")[0]; - block->Var(out_var)->SetType(block->Var(x_var)->GetType()); + auto x_name = op_desc.Input("X")[0]; + auto out_name = op_desc.Output("Out")[0]; + auto& x = block->FindRecursiveOrCreateVar(x_name); + auto& out = block->FindRecursiveOrCreateVar(out_name); + out.SetType(x.GetType()); } }; diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h index 2a7df149a9f4b03676f172da980c927d7fa5e8a4..63ea60678f80708f5a8340edd22588553b9ec139 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h @@ -24,6 +24,14 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* out = ctx.Output("Out"); + auto* in = ctx.Input("Input"); + if (in->lod().size() && ctx.Attr("input_dim_idx") == 0) { + // set the correct batch size for the LoDTensor. + auto odims = out->dims(); + int output_dim_idx = ctx.Attr("output_dim_idx"); + odims[output_dim_idx] = static_cast(in->lod().back().size()) - 1; + out->mutable_data(odims, ctx.GetPlace()); + } out->mutable_data(ctx.GetPlace()); auto value = ctx.Attr("value"); diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h index dabde43850db770d286b13cacd32bee181328d5c..1a37cb39d56066b8380338b9710a441e41518c39 100644 --- a/paddle/fluid/operators/math/blas.h +++ b/paddle/fluid/operators/math/blas.h @@ -125,6 +125,12 @@ class Blas { template void AXPY(int n, T alpha, const T* x, T* y) const; + template + void VADD(int n, const T* x, const T* y, T* z) const; + + template + void VCOPY(int n, const T* x, T* y) const; + template void GEMV(bool trans_a, int M, int N, T alpha, const T* A, const T* B, T beta, T* C) const; @@ -163,6 +169,16 @@ class BlasT : private Blas { Base()->template AXPY(args...); } + template + void VADD(ARGS... args) const { + Base()->template VADD(args...); + } + + template + void VCOPY(ARGS... args) const { + Base()->template VCOPY(args...); + } + template void GEMV(ARGS... args) const { Base()->template GEMV(args...); diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index 14b3624b420cb883b36268c0a5a9e8692dbb5b43..ae20406bc21d5e08359be8295cd98495dda7813b 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -34,6 +34,18 @@ struct CBlas { cblas_saxpy(args...); } +#ifdef PADDLE_WITH_MKLML + template + static void VADD(ARGS... args) { + vsAdd(args...); + } +#endif + + template + static void VCOPY(ARGS... args) { + cblas_scopy(args...); + } + template static void GEMV(ARGS... args) { cblas_sgemv(args...); @@ -59,6 +71,18 @@ struct CBlas { cblas_daxpy(args...); } +#ifdef PADDLE_WITH_MKLML + template + static void VADD(ARGS... args) { + vdAdd(args...); + } +#endif + + template + static void VCOPY(ARGS... args) { + cblas_dcopy(args...); + } + template static void GEMV(ARGS... args) { cblas_dgemv(args...); @@ -139,6 +163,24 @@ void Blas::AXPY(int n, T alpha, const T *x, CBlas::AXPY(n, alpha, x, 1, y, 1); } +template <> +template +void Blas::VCOPY(int n, const T *x, T *y) const { + CBlas::VCOPY(n, x, 1, y, 1); +} + +template <> +template +void Blas::VADD(int n, const T *x, const T *y, + T *z) const { +#ifdef PADDLE_WITH_MKLML + CBlas::VADD(n, x, y, z); +#else + this->template VCOPY(n, y, z); + this->template AXPY(n, 1., x, z); +#endif +} + template <> template void Blas::GEMV(bool trans_a, int M, int N, T alpha, diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt index 3106978eb0149b14849dfd1aaad8bbe76791f2f6..62532036f86bfb82465ccd9e0ec526299489932a 100644 --- a/paddle/fluid/operators/reader/CMakeLists.txt +++ b/paddle/fluid/operators/reader/CMakeLists.txt @@ -23,6 +23,7 @@ reader_library(create_recordio_file_reader_op SRCS create_recordio_file_reader_o reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc) reader_library(create_multi_pass_reader_op SRCS create_multi_pass_reader_op.cc) reader_library(create_threaded_reader_op SRCS create_threaded_reader_op.cc) +reader_library(create_custom_reader_op SRCS create_custom_reader_op.cc) cc_test(reader_blocking_queue_test SRCS reader_blocking_queue_test.cc) # Export local libraries to parent diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ecbf8ed4f0473a552b778fd6c64c92b946cd458 --- /dev/null +++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc @@ -0,0 +1,187 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/operators/detail/safe_ref.h" +#include "paddle/fluid/operators/reader/reader_op_registry.h" + +namespace paddle { +namespace operators { +namespace reader { + +class CustomReader : public framework::DecoratedReader { + public: + CustomReader(ReaderBase* reader, const framework::BlockDesc& sub_block, + const platform::Place& dev_place, + const std::vector& source_var_names, + const std::vector& sink_var_names) + : DecoratedReader(reader), + program_(*sub_block.Program()), + sub_block_id_(sub_block.ID()), + exe_(framework::Executor(dev_place)), + source_var_names_(source_var_names), + sink_var_names_(sink_var_names) {} + + void ReadNext(std::vector* out) override; + + private: + const framework::ProgramDesc program_; + int sub_block_id_; + framework::Executor exe_; + + std::vector source_var_names_; + std::vector sink_var_names_; +}; + +class CreateCustomReaderOp : public framework::OperatorBase { + public: + using framework::OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { + auto* out = scope.FindVar(Output("Out")) + ->template GetMutable(); + auto* sub_block = Attr("sub_block"); + if (out->Get() != nullptr) { + return; + } + const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) + ->Get(); + out->Reset( + new CustomReader(underlying_reader.Get(), *sub_block, dev_place, + Attr>("source_var_names"), + Attr>("sink_var_names"))); + } +}; + +class CreateCustomReaderOpMaker : public DecoratedReaderMakerBase { + protected: + void Apply() override { + AddAttr( + "sub_block", "The block to hold all preprocessing operators."); + AddAttr>( + "source_var_names", + "Source variables are starting points of data preprocessing. They hold " + "preprocessing's input tensors. Each source variable corresponds to " + "one of underlying reader's output datas."); + AddAttr>( + "sink_var_names", + "Sink variables are ending points of data preprocessing. They hold " + "preprocessing's output tensors. Each sink variable corresponds to " + "one of custom reader's output datas."); + AddComment(R"DOC( + CreateCustomReader Operator + + A custom reader can be used for input data preprocessing. + A custom reader holds its own sub-block, which will be executed in its + 'ReadNext()' function. Users can configurate their own preprocessing + pipelines by inserting operators into custom reader's sub-block. + )DOC"); + } +}; + +class CustomReaderInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(!ctx->IsRuntime(), + "'CustomReaderInferShape' should only be invoked during " + "compile time."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "The output decorated reader should not be null."); + const auto* sub_block = + ctx->Attrs().Get("sub_block"); + const auto sink_var_names = + ctx->Attrs().Get>("sink_var_names"); + std::vector> res_dims; + std::vector res_lod_levels; + for (const std::string& var_name : sink_var_names) { + auto* sink_var = sub_block->FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(sink_var); + res_dims.emplace_back(sink_var->GetShape()); + res_lod_levels.push_back(sink_var->GetLoDLevel()); + } + auto* out_reader = + boost::get(ctx->GetOutputVarPtrs("Out")[0]); + out_reader->SetShapes(res_dims); + out_reader->SetLoDLevels(res_lod_levels); + } +}; + +class CustomReaderInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const override { + framework::VarDesc* out_reader = block->FindVar(op_desc.Output("Out")[0]); + PADDLE_ENFORCE_NOT_NULL(out_reader); + out_reader->SetType(framework::proto::VarType::READER); + + auto sink_var_names = + boost::get>(op_desc.GetAttr("sink_var_names")); + const auto* sub_block = + boost::get(op_desc.GetAttr("sub_block")); + std::vector res_data_types; + for (const std::string& var_name : sink_var_names) { + framework::VarDesc* var = sub_block->FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(var); + res_data_types.emplace_back(var->GetDataType()); + } + out_reader->SetDataTypes(res_data_types); + } +}; + +void CustomReader::ReadNext(std::vector* out) { + out->clear(); + std::vector underlying_outs; + reader_->ReadNext(&underlying_outs); + if (underlying_outs.empty()) { + // There is not next data. + return; + } + PADDLE_ENFORCE(source_var_names_.size() == underlying_outs.size(), + "The size of source_var_names(%d) and the size of " + "underlying_outs(%d) are not consistent. Each feeding element " + "must have its own source variable.", + source_var_names_.size(), underlying_outs.size()); + // The scope for CustomReader's sub-block should be independent and shouldn't + // be any other computation scope's child. Otherwise, data preprocessing and + // compution cannot be concurrent. + framework::Scope scope; + // 1. Copy LoDTensors from underlying reader's output to source variables. + for (size_t i = 0; i < source_var_names_.size(); ++i) { + framework::Variable* var = scope.Var(source_var_names_[i]); + framework::LoDTensor* tensor = var->GetMutable(); + tensor->ShareDataWith(underlying_outs[i]); + tensor->set_lod(underlying_outs[i].lod()); + } + // 2. Run the sub-block. + exe_.Run(program_, &scope, sub_block_id_, false, true); + // 3. Copy LoDTensors from sink variables to out. + out->resize(sink_var_names_.size()); + for (size_t i = 0; i < sink_var_names_.size(); ++i) { + const auto& tensor = detail::Ref(scope.FindVar(sink_var_names_[i])) + .Get(); + framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]); + } +} + +} // namespace reader +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators::reader; +REGISTER_OPERATOR(create_custom_reader, ops::CreateCustomReaderOp, + ops::CreateCustomReaderOpMaker, ops::CustomReaderInferShape, + ops::CustomReaderInferVarType, + paddle::framework::EmptyGradOpMaker) diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index 11f1ddebc48134158315ea70a2d2b9e07f2e2469..612e1f5eca3a4836db1fd167fc6bb63400d20177 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -115,6 +115,7 @@ void DecoratedReaderInferShape::operator()( boost::get(ctx->GetOutputVarPtrs("Out")[0]); out_reader->SetLoDLevels(in_reader->GetLoDLevels()); } + void DecoratedReaderInferVarType::operator()( const framework::OpDesc& op_desc, framework::BlockDesc* block) const { std::string in_reader_name = op_desc.Input("UnderlyingReader")[0]; diff --git a/paddle/fluid/operators/test_send_nccl_id.cc b/paddle/fluid/operators/test_send_nccl_id.cc index bbae1d54aa3524fd45cb8ab13c86df8d54b8e643..719f039a0f5fcd7445bf1589a683f122e6d62ba0 100644 --- a/paddle/fluid/operators/test_send_nccl_id.cc +++ b/paddle/fluid/operators/test_send_nccl_id.cc @@ -63,7 +63,7 @@ void StartServer(std::atomic* initialized) { server_thread.join(); } -TEST(SendNcclId, Normal) { +TEST(SendNcclId, DISABLED_Normal) { std::atomic initialized{false}; std::thread server_thread(StartServer, &initialized); while (!initialized) { diff --git a/paddle/fluid/platform/device_tracer.cc b/paddle/fluid/platform/device_tracer.cc index c9e10631680a6ea3876f555a3a6e6c12f79b39d5..1a9be044e024e4b1dda5ef7d515c65f3a7513710 100644 --- a/paddle/fluid/platform/device_tracer.cc +++ b/paddle/fluid/platform/device_tracer.cc @@ -245,7 +245,6 @@ class DeviceTracerImpl : public DeviceTracer { void Enable() { std::lock_guard l(trace_mu_); if (enabled_) { - fprintf(stderr, "DeviceTracer already enabled\n"); return; } EnableActivity(); diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index 643bb6183d144ec11a4890d9ea1ca970acb08b4c..bf43925373a12cd9ff2155d68c42d0266ba4df60 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -116,6 +116,8 @@ void ResetProfiler(); void DisableProfiler(EventSortingKey sorted_key, const std::string& profile_path); +const int kEnableProfiler = 1; +const int kDisableProfiler = 2; // Test if the profiler is currently enabled. bool IsProfileEnabled(); // Whether the trainer should send profiling state to PS. diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc index 3f28e616494ad1322708ad6403aaf50b22d724e6..9111abca5aac97e9d5c7b00ce5173f08e49cda12 100644 --- a/paddle/fluid/pybind/const_value.cc +++ b/paddle/fluid/pybind/const_value.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/pybind/const_value.h" +#include #include "paddle/fluid/framework/operator.h" namespace paddle { @@ -23,6 +24,21 @@ void BindConstValue(pybind11::module* m) { m->def("kTempVarName", [] { return framework::kTempVarName; }); m->def("kGradVarSuffix", [] { return framework::kGradVarSuffix; }); m->def("kZeroVarSuffix", [] { return framework::kZeroVarSuffix; }); + + auto op_proto_and_checker_maker = + m->def_submodule("op_proto_and_checker_maker"); + + pybind11::enum_(op_proto_and_checker_maker, "OpRole") + .value("Forward", framework::OpRole::kForward) + .value("Backward", framework::OpRole::kBackward) + .value("Optimize", framework::OpRole::kOptimize) + .value("Loss", framework::OpRole::kLoss); + + op_proto_and_checker_maker.def( + "kOpRoleAttrName", framework::OpProtoAndCheckerMaker::OpRoleAttrName); + op_proto_and_checker_maker.def( + "kOpRoleVarAttrName", + framework::OpProtoAndCheckerMaker::OpRoleVarAttrName); } } // namespace pybind diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 9b2779b42cad324253dadf27dbff20fd8e8c8e16..29b4ac098e21ee315d5c9b2f2499521d1aa1c322 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -52,9 +52,3 @@ add_simple_unittest(Im2ColTest) add_simple_unittest(GemmConvOpTest) add_simple_unittest(DepthwiseConvOpTest) endif() - -add_style_check_target(paddle_function ${h_files}) -add_style_check_target(paddle_function ${cpp_files}) -if(WITH_GPU) - add_style_check_target(paddle_function ${cu_files}) -endif() diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index bac4659e62b107dd80ef95dd0907b3da4becffbc..8e9dbbd7a154095a7298bb2f59a82d13a60f9bd3 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/function/EigenThreadDevice.h" namespace paddle { @@ -70,25 +70,26 @@ struct EigenBlasGemm { dims[0].first = transA ? 0 : 1; dims[0].second = transB ? 1 : 0; - Eigen::DefaultDevice device; + auto* device = EigenDeviceWarpper::device(); if (N == ldc) { if (alpha == T(1) && beta == T(0)) { - c.device(device) = a.contract(b, dims); + c.device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.device(device) += a.contract(b, dims); + c.device(*device) += a.contract(b, dims); } else { - c.device(device) = alpha * a.contract(b, dims) + beta * c; + c.device(*device) = alpha * a.contract(b, dims) + beta * c; } } else { if (alpha == T(1) && beta == T(0)) { - c.slice(offsetC, extentC).device(device) = a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.slice(offsetC, extentC).device(device) += a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) += a.contract(b, dims); } else { - c.slice(offsetC, extentC).device(device) = + c.slice(offsetC, extentC).device(*device) = alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC); } } + EigenDeviceWarpper::free_device(device); } }; diff --git a/paddle/function/EigenThreadDevice.h b/paddle/function/EigenThreadDevice.h new file mode 100644 index 0000000000000000000000000000000000000000..74269aa664a711c905e12a61958c9ab01e2340c0 --- /dev/null +++ b/paddle/function/EigenThreadDevice.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#if defined(__OSX__) || defined(__APPLE__) +#include +#include +#endif +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { + +#if defined(__ANDROID__) +int GetCpuCount() { + FILE* fp = fopen("/sys/devices/system/cpu/possible", "r"); + if (!fp) { + return 1; + } + int rank0, rank1; + int num = fscanf(fp, "%d-%d", &rank0, &rank1); + fclose(fp); + if (num < 2) return 1; + return rank1 + 1; +} +#elif defined(__OSX__) || defined(__APPLE__) +int GetCpuCount() { + int count = 0; + size_t len = sizeof(int); + sysctlbyname("hw.ncpu", &count, &len, NULL, 0); + return count > 0 ? count : 1; +} +#else +int GetCpuCount() { return 1; } +#endif + +class EigenDeviceWarpper { +public: // NOLINT +#if EIGEN_USE_THREADS + static Eigen::ThreadPoolDevice* device() { + const int num_cpus = GetCpuCount(); + const int num_threads = (num_cpus > 2) ? 2 : num_cpus; + static Eigen::ThreadPool tp(num_threads); + static Eigen::ThreadPoolDevice* device = + new Eigen::ThreadPoolDevice(&tp, num_threads); + return device; + } + + static void free_device(Eigen::ThreadPoolDevice* device) { + // do nothing + } +#else + static Eigen::DefaultDevice* device() { + Eigen::DefaultDevice* device = new Eigen::DefaultDevice; + return device; + } + + static void free_device(Eigen::DefaultDevice* device) { delete device; } +#endif +}; + +} // namespace paddle diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 3d6ced713f00bd72622d8aeed3967642b6774ffe..6dc877dd90ee2ae3d99406299a9244eb3e3d7b53 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -146,8 +146,6 @@ else() ${GSERVER_SOURCES}) endif() -add_style_check_target(paddle_gserver ${GSERVER_SOURCES}) -add_style_check_target(paddle_gserver ${GSERVER_HEADER}) add_dependencies(paddle_gserver paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 922fb5172273da24f9c48786961a6d850b1ed7c5..3c897b5f3e09cd53ddd5b767333ce4759250da71 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -51,10 +51,6 @@ else() endif() - -add_style_check_target(paddle_math ${MATH_SOURCES}) -add_style_check_target(paddle_math ${MATH_HEADERS}) - add_dependencies(paddle_math paddle_proto ${external_project_dependencies}) # depends if(WITH_TESTING) add_subdirectory(tests) diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 25fc35311fc63988c64a445d72fc6255e49e8d4b..7c80faa48ce960a3a7eb7d88eda4f2b09756410e 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -7,6 +7,10 @@ set(OPITMIZER_SRCS sgd_optimizer.cc ) -cc_library(paddle_optimizer STATIC SRCS ${OPITMIZER_SRCS} DEPS paddle_proto glog) -cc_test(serialization_test SRCS serialization_test.cc DEPS paddle_proto) -cc_test(parameter_optimizer_test SRCS parameter_optimizer_test.cc DEPS paddle_optimizer) +add_library(paddle_optimizer ${OPITMIZER_SRCS}) +target_link_libraries(paddle_optimizer paddle_proto glog) + +if (WITH_TESTING) + add_unittest(serialization_test serialization_test.cc) + add_unittest(parameter_optimizer_test parameter_optimizer_test.cc) +endif() diff --git a/paddle/parameter/CMakeLists.txt b/paddle/parameter/CMakeLists.txt index d2ae1c16c6b7316f1a6facdef4b933693d6ba818..19ae07e077e2b8f55ce4050566c9cf6aaa0efa0a 100644 --- a/paddle/parameter/CMakeLists.txt +++ b/paddle/parameter/CMakeLists.txt @@ -5,8 +5,6 @@ file(GLOB PARAMETERS_SOURCES . *.cpp) add_library(paddle_parameter STATIC ${PARAMETERS_SOURCES}) -add_style_check_target(paddle_parameter ${PARAMETERS_SOURCES}) -add_style_check_target(paddle_parameter ${PARAMETERS_HEADERS}) add_dependencies(paddle_parameter paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) diff --git a/paddle/pserver/CMakeLists.txt b/paddle/pserver/CMakeLists.txt index f75475a88f7224ee3889827795088c8aa920b63b..0ae9c6ef6afc6ec5a99a685b08883def0db51cf1 100644 --- a/paddle/pserver/CMakeLists.txt +++ b/paddle/pserver/CMakeLists.txt @@ -14,9 +14,6 @@ set(NETWORK_HEADERS add_library(paddle_network STATIC ${NETWORK_SOURCES}) -add_style_check_target(paddle_network ${NETWORK_SOURCES}) -add_style_check_target(paddle_network ${NETWORK_HEADERS}) - add_dependencies(paddle_network paddle_proto ${external_project_dependencies}) ################### paddle_pserver ###################### @@ -37,9 +34,6 @@ set(PSERVER_HEADERS add_library(paddle_pserver STATIC ${PSERVER_SOURCES}) -add_style_check_target(paddle_pserver ${PSERVER_SOURCES}) -add_style_check_target(paddle_pserver ${PSERVER_HEADERS}) - add_dependencies(paddle_pserver paddle_proto ${external_project_dependencies}) set(PSERVER_MAIN_SOURCES diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh deleted file mode 100755 index 92b8b90880bc78dbc281a959a7472c2822f76fc3..0000000000000000000000000000000000000000 --- a/paddle/scripts/docker/build.sh +++ /dev/null @@ -1,259 +0,0 @@ -#!/bin/bash - -function cmake_gen() { - mkdir -p /paddle/build - cd /paddle/build - - # build script will not fail if *.deb does not exist - rm *.deb 2>/dev/null || true - # delete previous built whl packages - rm -rf /paddle/paddle/dist 2>/dev/null || true - - # Support build for all python versions, currently - # including cp27-cp27m and cp27-cp27mu. - PYTHON_FLAGS="" - if [ "$1" != "" ]; then - echo "using python abi: $1" - if [ "$1" == "cp27-cp27m" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} - export PATH=/opt/python/cp27-cp27m/bin/:${PATH} - PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python - -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" - elif [ "$1" == "cp27-cp27mu" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} - export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} - PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python - -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" - fi - fi - - cat < /paddle/build/Dockerfile < - ENV HOME /root -EOF - - if [[ ${WITH_GPU} == "ON" ]]; then - NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&" - else - NCCL_DEPS="" - fi - - if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then - PADDLE_VERSION="paddle version" - CMD='"paddle", "version"' - else - PADDLE_VERSION="true" - CMD='"true"' - fi - - cat >> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <> /paddle/build/Dockerfile <= 21." - ANDROID_API=21 - fi -else # armeabi, armeabi-v7a - ANDROID_ARCH=arm -fi - -ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API - -cat <&2 - echo "Please use pre-commit to check what is wrong." 1>&2 - exit 1 -} - -trap 'abort' 0 -set -e - -# install glide -curl https://glide.sh/get | bash -eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - -# set up go environment for running gometalinter -mkdir -p $GOPATH/src/github.com/PaddlePaddle/ -ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle -cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd - - -go get github.com/alecthomas/gometalinter -gometalinter --install - -cd $TRAVIS_BUILD_DIR -export PATH=/usr/bin:$PATH -pre-commit install -clang-format --version - - - -if ! pre-commit run -a ; then - git diff - exit 1 -fi - -trap : 0 diff --git a/paddle/scripts/travis/deploy_key.enc b/paddle/scripts/travis/deploy_key.enc deleted file mode 100644 index b0aa45c5ac626c735735fd8541a43bf8b099d0a0..0000000000000000000000000000000000000000 Binary files a/paddle/scripts/travis/deploy_key.enc and /dev/null differ diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index 72911695bd4959d73d783897b0c5e674454c30bc..6192de4388c8c3f5165fb88b443d372748f7a17e 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -36,17 +36,12 @@ endif() add_library(paddle_trainer_lib STATIC ${TRAINER_SOURCES}) -add_style_check_target(paddle_trainer_lib - ${TRAINER_SOURCES}) -add_style_check_target(paddle_trainer_lib - ${TRAINER_HEADERS}) add_dependencies(paddle_trainer_lib paddle_proto ${external_project_dependencies}) macro(add_paddle_exe TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) - add_style_check_target(${TARGET_NAME} ${ARGN}) link_paddle_exe(${TARGET_NAME}) endmacro() diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index 6292e7fa52cd86c71724d9fe84ea622e98ff1e08..b42b2bae968a10c581c594054f853347eb5d5445 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -14,9 +14,6 @@ add_library(paddle_utils STATIC ${UTIL_SOURCES} ${UTIL_ARCH_SOURCES} ${UTIL_RES}) -add_style_check_target(paddle_utils ${UTIL_HEADERS}) -add_style_check_target(paddle_utils ${UTIL_SOURCES} - ${UTIL_ARCH_SOURCES}) add_dependencies(paddle_utils paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 67aa5ec9979dbe3fcdb037e38ad94329d294cdcc..859605d005328c030980a49a349742772de1cb6d 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -48,6 +48,7 @@ from transpiler import DistributeTranspiler, SimpleDistributeTranspiler, \ InferenceTranspiler, memory_optimize, release_memory from concurrency import (Go, make_channel, channel_send, channel_recv, channel_close, Select) +from lod_tensor import create_lod_tensor, create_random_int_lodtensor import clip import profiler import unique_name @@ -59,7 +60,7 @@ Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \ trainer.__all__ + inferencer.__all__ + transpiler.__all__ + \ - parallel_executor.__all__ + [ + parallel_executor.__all__ + lod_tensor.__all__ + [ 'io', 'initializer', 'layers', diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 32b1b65bd97ef1e512a5880843509611b606f52d..4f9622d04dc98f41b503ceb780802d2a4e4c58a0 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -51,6 +51,12 @@ def _create_op_desc_(op_type, inputs, outputs, attrs): op_desc.set_input(para, args) for para, args in outputs.iteritems(): op_desc.set_output(para, args) + + op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() + + if op_role_attr_name not in attrs: + attrs[ + op_role_attr_name] = core.op_proto_and_checker_maker.OpRole.Backward for name, val in attrs.iteritems(): if isinstance(val, framework.Block): op_desc.set_block_attr(name, val.desc) @@ -141,7 +147,7 @@ def _addup_repetitive_outputs_(op_descs): else: if len(renamed_vars[var_name]) == 1: new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 # rename original var_name renamed_vars[var_name][0] = new_name @@ -149,7 +155,7 @@ def _addup_repetitive_outputs_(op_descs): _rename_arg_(pending_sum_ops, var_name, new_name) new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 op_desc.rename_output(var_name, new_name) renamed_vars[var_name].append(new_name) @@ -335,9 +341,12 @@ def _append_backward_ops_(block, no_grad_dict[block.idx]) # append op_desc in grad_op_descs to target_block + op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() + backward = core.op_proto_and_checker_maker.OpRole.Backward for op_desc in grad_op_descs: new_op_desc = target_block.desc.append_op() new_op_desc.copy_from(op_desc) + new_op_desc.set_attr(op_role_attr_name, backward) grad_to_var["__current_op_desc__"] = new_op_desc if callbacks is not None: assert (isinstance(callbacks, list)) @@ -439,6 +448,22 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, (list[(Variable,Variable)]): list of (parameter, gradient) pair. """ assert isinstance(loss, framework.Variable) + + if loss.op is None: + # the loss is from a cloned program. Find loss op manually. + for op in reversed(loss.block.ops): + assert isinstance(op, framework.Operator) + if len(op.output_arg_names) == 1 and op.output_arg_names[ + 0] == loss.name: + loss.op = op + break + if loss.op is None: + raise ValueError("loss.op is None. Should not happend") + + loss.op.set_attr(core.op_proto_and_checker_maker.kOpRoleAttrName(), + int(core.op_proto_and_checker_maker.OpRole.Forward) | + int(core.op_proto_and_checker_maker.OpRole.Loss)) + if callbacks is not None: isinstance(callbacks, list) @@ -456,12 +481,16 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, current_block_idx = program.current_block_idx grad_to_var = dict() - op_desc = _create_op_desc_("fill_constant", {}, { - "Out": [_append_grad_suffix_(loss.name)] - }, {"shape": [1], - "value": 1.0, - "dtype": loss.dtype, - "force_cpu": False}) + op_desc = _create_op_desc_( + "fill_constant", {}, {"Out": [_append_grad_suffix_(loss.name)]}, { + "shape": [1], + "value": 1.0, + "dtype": loss.dtype, + "force_cpu": False, + core.op_proto_and_checker_maker.kOpRoleAttrName(): + int(core.op_proto_and_checker_maker.OpRole.Backward) | + int(core.op_proto_and_checker_maker.OpRole.Loss), + }) root_block.desc.append_op().copy_from(op_desc) block_no_grad_set = set(map(_strip_grad_suffix_, no_grad_dict[0])) @@ -505,6 +534,24 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, params_and_grads.append((param_var, grad_var)) else: params_and_grads.append((param_var, None)) + + op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName() + for p, g in params_and_grads: + if g is None: + continue + for op in reversed(program.global_block().ops): + assert isinstance(op, framework.Operator) + if g.name in op.output_arg_names: + g.op = op + break + + if g.op is None: + raise ValueError("Unexpected branch") + attr_val = [p.name, g.name] + if g.op.has_attr(op_role_var_attr_name): + attr_val.extend(g.op.attr(op_role_var_attr_name)) + g.op.set_attr(op_role_var_attr_name, attr_val) + return params_and_grads diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 12add9e686910c3936cf17fe87a5d0b78443b270..66c3fc6b66d61bc9578f84594409ad0f24c99910 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -214,21 +214,24 @@ def set_gradient_clip(clip, param_list=None, program=None): def append_gradient_clip_ops(param_grad): context = dict() - create_op_callbacks = [] for p, g in param_grad: - clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr()) - if clip_attr is None: - clip_attr = NullGradientClipAttr() - if not isinstance(clip_attr, BaseGradientClipAttr): - raise TypeError( - "clip attribute should be an instance of BaseGradientClipAttr") + with p.block.program.optimized_guard(p): + clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr()) + if clip_attr is None: + clip_attr = NullGradientClipAttr() + if not isinstance(clip_attr, BaseGradientClipAttr): + raise TypeError( + "clip attribute should be an instance of BaseGradientClipAttr" + ) - clip_attr.process_context(context=context, param=p, grad=g) - create_op_callbacks.append( - functools.partial( - clip_attr.create_operators, param=p, grad=g)) + clip_attr.process_context(context=context, param=p, grad=g) + + res = [] + for p, g in param_grad: + with p.block.program.optimized_guard(p): + res.append(clip_attr.create_operators(param=p, grad=g)) - return [each_callback() for each_callback in create_op_callbacks] + return res ClipByValue = GradientClipByValue diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 161ea55586bbb6bde2cbb0084bb67b184f91460e..08b756d95b9b72db5d978afbe437bbfcb52025b0 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -404,6 +404,23 @@ class Operator(object): self.block = block self.desc = desc self.attrs = attrs + if self.attrs is None: + self.attrs = dict() + del attrs + + op_maker = core.op_proto_and_checker_maker + + if op_maker.kOpRoleAttrName() not in self.attrs: + self.attrs[op_maker.kOpRoleAttrName()] = self.block.program.op_role + + role_var_name = op_maker.kOpRoleVarAttrName() + if len(self.block.program. + op_role_var) != 0 and role_var_name not in self.attrs: + self.attrs[role_var_name] = self.block.program.op_role_var + + if role_var_name in self.attrs and len(self.attrs[role_var_name]) == 0: + del self.attrs[role_var_name] + if len(self.desc.type()) != 0: return if type is None: @@ -469,22 +486,23 @@ class Operator(object): arg.op = self self.desc.set_output(out_proto.name, out_arg_names) - if attrs is not None: - if not isinstance(attrs, dict): + if self.attrs is not None: + if not isinstance(self.attrs, dict): raise TypeError("'attrs' should be a dict.") for attr in proto.attrs: attr_name = attr.name - if (attr_name not in attrs) or (attrs[attr_name] is None): + if (attr_name not in self.attrs) or ( + self.attrs[attr_name] is None): continue - if isinstance(attrs[attr_name], Block): - self.desc.set_block_attr(attr_name, attrs[attr_name].desc) - elif isinstance(attrs[attr_name], core.BlockDesc) or \ - isinstance(attrs[attr_name], core.ProgramDesc): + if isinstance(self.attrs[attr_name], Block): + self.desc.set_block_attr(attr_name, + self.attrs[attr_name].desc) + elif isinstance(self.attrs[attr_name], core.BlockDesc) or \ + isinstance(self.attrs[attr_name], core.ProgramDesc): self.desc.set_serialized_attr( - attr_name, attrs[attr_name].serialize_to_string()) + attr_name, self.attrs[attr_name].serialize_to_string()) else: - self.desc.set_attr(attr_name, attrs[attr_name]) - + self.desc.set_attr(attr_name, self.attrs[attr_name]) self.desc.check_attrs() no_kernel_op_set = { 'feed', 'fetch', 'save', 'load', 'recurrent', 'go', @@ -612,6 +630,10 @@ class Operator(object): """ return self.desc.attr_type(name) + def set_attr(self, name, val): + self.attrs[name] = val + self.desc.set_attr(name, val) + @property def attr_names(self): """ @@ -1002,6 +1024,33 @@ class Program(object): self.blocks = [Block(self, 0)] self.current_block_idx = 0 self._seed = 0 + self._current_role = core.op_proto_and_checker_maker.OpRole.Forward + self._op_role_var = [] + + @property + def op_role(self): + return self._current_role + + @op_role.setter + def set_op_role(self, role): + self._current_role = role + + @property + def op_role_var(self): + return self._op_role_var + + @op_role_var.setter + def set_op_role_var(self, var_name): + self._op_role_var = [var_name] + + @contextlib.contextmanager + def optimized_guard(self, var): + OpRole = core.op_proto_and_checker_maker.OpRole + self._current_role = OpRole.Optimize + self._op_role_var = [var.name if isinstance(var, Variable) else var] + yield + self._op_role_var = [] + self._current_role = OpRole.Forward def __str__(self): return self.to_string(True) diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py index 894f6dbfadcaf532556c439daf2c3b4ca24ffeb4..9f242cf29a56573349f192307a68e135a409a4be 100644 --- a/python/paddle/fluid/inferencer.py +++ b/python/paddle/fluid/inferencer.py @@ -56,7 +56,7 @@ class Inferencer(object): else: self.exe = executor.Executor(self.place) - def infer(self, inputs): + def infer(self, inputs, return_numpy=True): """ :param inputs: a map of {"input_name": input_var} that will be feed into the inference program to get the predict value @@ -66,9 +66,11 @@ class Inferencer(object): raise ValueError( "inputs should be a map of {'input_name': input_var}") - with self._prog_and_scope_guard(): - results = self.exe.run(feed=inputs, - fetch_list=[self.predict_var.name]) + with executor.scope_guard(self.scope): + results = self.exe.run(self.inference_program, + feed=inputs, + fetch_list=[self.predict_var], + return_numpy=return_numpy) return results diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 1470f8c2e50004abb08e75980decd9485c22dece..03d4602f7a99dc335260cffdcdc30a839f3988cd 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import contextlib from .. import core from ..framework import convert_np_dtype_to_dtype_, default_main_program, default_startup_program, Program @@ -21,7 +22,8 @@ from ..executor import global_scope __all__ = [ 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', - 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer' + 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', + 'random_data_generator', 'Preprocessor' ] @@ -535,8 +537,6 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None): inputs={'UnderlyingReader': reader}, outputs={'Out': [new_reader]}, attrs=attrs) - new_reader.persistable = True - new_reader.stop_gradient = True return monkey_patch_reader_methods(new_reader) @@ -581,3 +581,82 @@ def read_file(file_obj): return out[0] else: return out + + +class Preprocessor(object): + BEFORE_SUB_BLOCK = 0 + IN_SUB_BLOCK = 1 + AFTER_SUB_BLOCK = 2 + + def __init__(self, reader, name=None): + self.underlying_reader = reader + new_reader_name = name if name is not None else unique_name( + "create_custom_reader") + self.main_prog = default_main_program() + self.reader = self.main_prog.current_block().create_var( + name=new_reader_name) + self.sub_block = None + self.source_var_names = None + self.sink_var_names = None + self.status = Preprocessor.BEFORE_SUB_BLOCK + + def is_completed(self): + return self.sub_block and self.source_var_names and self.sink_var_names + + @contextlib.contextmanager + def block(self): + self.status = Preprocessor.IN_SUB_BLOCK + self.sub_block = self.main_prog.create_block() + yield + self.main_prog.rollback() + self.status = Preprocessor.AFTER_SUB_BLOCK + if not self.is_completed(): + raise RuntimeError( + "The definition of preprocessor is incompleted! " + "Please make sure that you have set input and output " + "variables by invoking 'inputs' and 'outputs' in " + "Preprocessor's sub-block.") + + def inputs(self): + if self.status != Preprocessor.IN_SUB_BLOCK: + raise RuntimeError( + "Preprocessor.inputs() can only be invoked inside the sub-block." + ) + + source_shapes = self.underlying_reader.desc.shapes() + source_dtypes = self.underlying_reader.desc.dtypes() + source_lod_levels = self.underlying_reader.desc.lod_levels() + self.source_var_names = [ + unique_name("preprocessor_source") + for _ in xrange(len(source_shapes)) + ] + source_vars = [] + for var_name, shape, dtype, lod_level in zip( + self.source_var_names, source_shapes, source_dtypes, + source_lod_levels): + source_vars.append(self.main_prog.current_block().create_var( + name=var_name, shape=shape, dtype=dtype, lod_level=lod_level)) + return source_vars + + def outputs(self, *outs): + if self.status != Preprocessor.IN_SUB_BLOCK: + raise RuntimeError( + "Preprocessor.outputs() can only be invoked inside the sub-block." + ) + self.sink_var_names = [var.name for var in outs] + + def __call__(self, *args, **kwargs): + if self.status != Preprocessor.AFTER_SUB_BLOCK: + raise RuntimeError( + "Preprocessor output can only be retrieved after rnn block.") + + self.main_prog.current_block().append_op( + type="create_custom_reader", + inputs={'UnderlyingReader': self.underlying_reader}, + outputs={'Out': [self.reader]}, + attrs={ + "sub_block": self.sub_block, + "source_var_names": self.source_var_names, + "sink_var_names": self.sink_var_names + }) + return monkey_patch_reader_methods(self.reader) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index dd360c2b98414d1cf2c0da5f7c8d5c6ca461a22a..b6c47aa9a65b9145983513715233784d77e3d904 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -81,6 +81,7 @@ __all__ = [ 'label_smooth', 'roi_pool', 'dice_loss', + 'upsampling_bilinear2d', ] @@ -3852,6 +3853,8 @@ def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): (num_rois, channels, pooled_h, pooled_w). Examples: + .. code-block:: python + pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0) """ helper = LayerHelper('roi_pool', **locals()) @@ -3899,6 +3902,8 @@ def dice_loss(input, label, epsilon=0.00001): dice_loss (Variable): The dice loss with shape [1]. Examples: + .. code-block:: python + predictions = fluid.layers.softmax(x) loss = fluid.layers.dice_loss(input=predictions, label=label, 2) """ @@ -3910,3 +3915,66 @@ def dice_loss(input, label, epsilon=0.00001): label, dim=reduce_dim) dice_score = 1 - inse * 2 / (dice_denominator + epsilon) return reduce_mean(dice_score) + + +def upsampling_bilinear2d(input, out_shape=None, scale=None, name=None): + """ + The mathematical meaning of upsampling_bilinear2d is also called + Bilinear interpolation. + Bilinear interpolation is an extension of linear interpolation for + interpolating functions of two variables (e.g. H-direction and + W-direction in this layer) on a rectilinear 2D grid. + + For details, please refer to Wikipedia: + https://en.wikipedia.org/wiki/Bilinear_interpolation + + Args: + input (Variable): The input tensor of bilinear interpolation, + This is a 4-D tensor of the shape + (num_batches, channels, in_h, in_w). + out_shape(list|tuple|None): Output shape of bilinear interpolation + layer, the shape is (out_h, out_w). + Default: None + scale(int|None): The multiplier for the input height or width. + At least one of out_shape or scale must be set. + And out_shape has a higher priority than scale. + Default: None + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + out (Variable): The output is a 4-D tensor of the shape + (num_batches, channls, out_h, out_w). + + Examples: + .. code-block:: python + + out = fluid.layers.bilinear_interp(input, out_shape=[12, 12]) + """ + if out_shape is None and scale is None: + raise ValueError("One of out_shape and scale must not be None") + helper = LayerHelper('bilinear_interp', **locals()) + dtype = helper.input_dtype() + + def _is_list_or_turple_(data): + return (isinstance(data, list) or isinstance(data, tuple)) + + if out_shape is not None: + if not (_is_list_or_turple_(out_shape) and len(out_shape) == 2): + raise ValueError('out_shape should be a list or tuple ', + 'with length 2, (out_h, out_w).') + out_shape = list(map(int, out_shape)) + out_h = out_shape[0] + out_w = out_shape[1] + else: + out_h = int(input.shape[2] * scale) + out_w = int(input.shape[3] * scale) + + out = helper.create_tmp_variable(dtype) + helper.append_op( + type="bilinear_interp", + inputs={"X": input}, + outputs={"Out": out}, + attrs={"out_h": out_h, + "out_w": out_w}) + return out diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..9946d0a4ff33b2f5040f6d2e31aa20fcf9c609a7 --- /dev/null +++ b/python/paddle/fluid/lod_tensor.py @@ -0,0 +1,189 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import core +import numpy as np + +__all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] + + +def _validate_lod(lod, tensor_height=-1): + """Check whether the input length-based lod info is valid. + + There are several things to check: + 1. lod should be a list of lists. Empty list is fine. + 2. The length of each sublist (a lod level) should be at least one. + 3. Each element in each lod level should be an integer greater than 0. + 4. The sum of one lod level should be equal to the length of the next lod level. + 5. The sum of the last lod level should be equal to the tensor height. + Bypass this check if user does not provide tensor_height as input. + + Args: + lod: the length-based lod info, e.g., [[2, 3], [2, 1, 2, 3, 4]]. + tensor_height: the outermost dimension of the tensor with which the input + lod is associated with. + + Returns: + A boolean indicating whether the input lod is valid or not. + """ + assert isinstance(lod, list), "lod should be a list" + # Empty lod is fine + if len(lod) == 0: + return True + + lod_sum = [] + for level in lod: + assert isinstance(level, list), "each item in lod should be a list" + # Each level of lod should have at least one length info + if len(level) < 1: + return False + level_sum = 0 + for lod_len in level: + # Each length in a level should be > 0 + if lod_len <= 0: + return False + level_sum += lod_len + lod_sum.append(level_sum) + + for idx, val in enumerate(lod_sum[:-1]): + # Each level's sum should be equal to + # the number of items in the next level + if val != len(lod[idx + 1]): + return False + + if tensor_height == -1: + return True + else: + # Last level's sum should be equal to the tensor height + return lod_sum[-1] == tensor_height + + +def _convert_lod(lod): + """Convert a length-based lod to a offset-based lod. + + If the length-based lod is [[2, 3], [2, 1, 2, 3, 4]], + then the offset-based lod is [[0, 2, 5], [0, 2, 3, 5, 8, 12]]. + + Args: + lod: a length-based lod info. + + Returns: + A list of lists as the offset-based lod converted to from the input lod. + """ + new_lod = [] + for level in lod: + cur_len = 0 + new_level = [cur_len] + for lod_len in level: + cur_len += lod_len + new_level.append(cur_len) + new_lod.append(new_level) + return new_lod + + +def create_lod_tensor(data, lod, place): + """Create a lod tensor from a numpy array, a list, or an existing lod tensor. + + Create a lod tensor by doing the following: + 1. Check that the length-based input lod is valid. + 2. Convert the length-based lod to a offset-based LoD. + 3. Copy the data from a numpy array, a list or a existing lod tensor to + CPU or GPU device (based on input place). + 4. Set the level of detail (LoD) using the offset-based LoD. + + Use example: + Suppose we want LoDTensor to hold data for sequences of word, where each word is + represented by an integer. If we want to create a LoDTensor to represent two + sentences, one of 2 words, and one of 3 words. + + Then 'data' can be a numpy array of integers with shape (5, 1). + 'lod' will be [[2, 3]], indicating the length(# of words) in each sentence. + This length-based input lod [[2, 3]] will be converted to offset-based lod [[0, 2, 5]] + inside the function call. + + Please refer to + github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md + for more details regarding LoD. + + Args: + data: a numpy array or a LoDTensor or a list holding the data to be copied. + lod: a list of lists indicating the length-based LoD info specified by the user. + place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. + + Returns: + A fluid LoDTensor object with tensor data and lod info. + """ + if isinstance(data, core.LoDTensor): + return create_lod_tensor(np.array(data), lod, place) + elif isinstance(data, list): + # When input data is a list, it only deal with the case where the base element + # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated + # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number + # of words or other indexes in the sequence. + new_lod = [] + for seq in data: + new_lod.append(len(seq)) + assert [new_lod] == lod, "data and lod do not match" + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + return create_lod_tensor(flattened_data, lod, place) + elif isinstance(data, np.ndarray): + assert _validate_lod(lod, + data.shape[0]), "the provided lod info is invalid" + tensor = core.LoDTensor() + tensor.set(data, place) + tensor.set_lod(_convert_lod(lod)) + return tensor + else: + raise TypeError( + "data should be either a LoDTensor, a Numpy array or a list") + + +def create_random_int_lodtensor(lod, base_shape, place, low, high): + """Create a LoDTensor containing random integers. + + This function is frequently used in the book examples. So we revised it based on + the new create_lod_tensor API and put it here in the lod_tensor module to simplify + the code. + + The function does the following: + 1. Calculate the overall shape of the LoDTensor based on the length-based 'lod' input + and the shape of the basic element in 'base_shape'. + 2. Create a numpy array of this shape. + 3. Create the LoDTensor using create_lod_tensor API. + + Suppose we want LoDTensor to hold data for sequences of word, where each word is + represented by an integer. If we want to create a LoDTensor to represent two + sentences, one of 2 words, and one of 3 words. Then 'base_shape' is [1], input + length-based 'lod' is [[2, 3]]. Then the overall shape of the LoDTensor would be + [5, 1], holding 5 words for two sentences. + + Args: + data: a numpy array or a LoDTensor holding the data to be copied. + lod: a list of lists indicating the length-based LoD info specified by the user. + base_shape: the shape of the basic element to be held by the LoDTensor. + place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. + low: the lower bound of the random integers. + high: the upper bound of the random integers. + + Returns: + A fluid LoDTensor object with tensor data and lod info. + """ + assert isinstance(base_shape, list), "base_shape should be a list" + converted_lod = _convert_lod(lod) + # append the total number of basic elements to the front of its shape + overall_shape = [converted_lod[-1][-1]] + base_shape + # the range of integer data elements is [low, high] + data = np.random.random_integers(low, high, overall_shape).astype("int64") + return create_lod_tensor(data, lod, place) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index a0deaca78eed75e9faee7bb38493afc181eb212f..115362c6bf33018342699a442c688e7356f3c206 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -213,11 +213,13 @@ class Optimizer(object): optimize_ops = [] for param_and_grad in parameters_and_grads: - if param_and_grad[0].trainable is True and param_and_grad[ - 1] is not None: - optimize_op = self._append_optimize_op(loss.block, - param_and_grad) - optimize_ops.append(optimize_op) + with param_and_grad[0].block.program.optimized_guard( + param_and_grad[0]): + if param_and_grad[0].trainable is True and param_and_grad[ + 1] is not None: + optimize_op = self._append_optimize_op(loss.block, + param_and_grad) + optimize_ops.append(optimize_op) # Get custom finish ops for subclasses # FIXME: Need to fix this once we figure out how to handle dependencies diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index c006bd9a66ddb422b7d80d2ca87aa7f56a6485db..c4d6829599616cb3ea7791a189e7070974de6ae3 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -43,31 +43,32 @@ def append_regularization_ops(parameters_and_grads, regularization=None): """ params_and_grads = [] for param, grad in parameters_and_grads: - # If no gradient then we don't need to do anything - if grad is None: + with param.block.program.optimized_guard(param): + # If no gradient then we don't need to do anything + if grad is None: + params_and_grads.append((param, grad)) + continue + + regularization_term = None + if param.regularizer is not None: + # Add variable for regularization term in grad block + regularization_term = param.regularizer(param, grad, grad.block) + elif regularization is not None: + regularization_term = regularization(param, grad, grad.block) + + # If no regularization specified, then we don't need to do anything + if regularization_term is None: + params_and_grads.append((param, grad)) + continue + + assert grad.shape == regularization_term.shape + + grad.block.append_op( + type='elementwise_add', + inputs={"X": grad, + "Y": regularization_term}, + outputs={"Out": grad}) params_and_grads.append((param, grad)) - continue - - regularization_term = None - if param.regularizer is not None: - # Add variable for regularization term in grad block - regularization_term = param.regularizer(param, grad, grad.block) - elif regularization is not None: - regularization_term = regularization(param, grad, grad.block) - - # If no regularization specified, then we don't need to do anything - if regularization_term is None: - params_and_grads.append((param, grad)) - continue - - assert grad.shape == regularization_term.shape - - grad.block.append_op( - type='elementwise_add', - inputs={"X": grad, - "Y": regularization_term}, - outputs={"Out": grad}) - params_and_grads.append((param, grad)) return params_and_grads diff --git a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt index 182e30a6a9b4249a895d15cfd65c403bb6813d0d..efa5ee2d06af3d31e7d84122dd7eea37d6dcf3a3 100644 --- a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt @@ -10,3 +10,7 @@ add_subdirectory(fit_a_line) add_subdirectory(recognize_digits) add_subdirectory(image_classification) add_subdirectory(understand_sentiment) +add_subdirectory(label_semantic_roles) +add_subdirectory(word2vec) +add_subdirectory(recommender_system) +add_subdirectory(machine_translation) diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index 4c8505acf322a8ee33799c009b523cd70bd01db3..de3906fc6a005181b0ab04a846eb2e7ce14004c2 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -48,7 +48,7 @@ def linear(): return avg_loss -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = fluid.Trainer( @@ -68,8 +68,8 @@ def train(use_cuda, train_program, save_dirname): ['15.343549569447836'] ... ''' - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) trainer.stop() trainer.train( @@ -80,19 +80,19 @@ def train(use_cuda, train_program, save_dirname): # infer -def infer(use_cuda, inference_program, save_dirname=None): - if save_dirname is None: +def infer(use_cuda, inference_program, params_dirname=None): + if params_dirname is None: return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 10 tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") results = inferencer.infer({'x': tensor_x}) - print("infer results: ", numpy.array(results[0])) + print("infer results: ", results[0]) def main(use_cuda): @@ -100,10 +100,10 @@ def main(use_cuda): return # Directory for saving the trained model - save_dirname = "fit_a_line.inference.model" + params_dirname = "fit_a_line.inference.model" - train(use_cuda, linear, save_dirname) - infer(use_cuda, inference_program, save_dirname) + train(use_cuda, linear, params_dirname) + infer(use_cuda, inference_program, params_dirname) class TestFitALine(unittest.TestCase): diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index 1160e500dbd6db784eeb81b72968386347fec59a..63dc1b6ce30974ede22a3f7772b76bf207bbae39 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -85,7 +85,7 @@ def train_network(): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): BATCH_SIZE = 128 EPOCH_NUM = 1 @@ -105,8 +105,8 @@ def train(use_cuda, train_program, save_dirname): print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) if accuracy > 0.01: # Low threshold for speeding up CI - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -122,10 +122,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['pixel', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -142,12 +142,14 @@ def main(use_cuda): save_path = "image_classification_resnet.inference.model" train( - use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + use_cuda=use_cuda, + train_program=train_network, + params_dirname=save_path) infer( use_cuda=use_cuda, inference_program=inference_network, - save_dirname=save_path) + params_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index 1e3e955ba0299f2cc0fcc02d79ae6fd8ff4c1171..0bf8f265a1c1b11364ecfa11061af183ce20d51e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -64,7 +64,7 @@ def train_network(): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): BATCH_SIZE = 128 train_reader = paddle.batch( paddle.reader.shuffle( @@ -82,8 +82,8 @@ def train(use_cuda, train_program, save_dirname): print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) if accuracy > 0.01: # Low threshold for speeding up CI - if save_dirname is not None: - trainer.save_params(save_dirname) + if params_dirname is not None: + trainer.save_params(params_dirname) return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -99,10 +99,10 @@ def train(use_cuda, train_program, save_dirname): feed_order=['pixel', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -119,12 +119,14 @@ def main(use_cuda): save_path = "image_classification_vgg.inference.model" train( - use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + use_cuda=use_cuda, + train_program=train_network, + params_dirname=save_path) infer( use_cuda=use_cuda, inference_program=inference_network, - save_dirname=save_path) + params_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..673c965b662a022739f8d489c331f4de9455a926 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +# default test +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py deleted file mode 100755 index fe36e55bb5380975ae322eccbcd8ad41e1e6748a..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import paddle -import paddle.fluid as fluid -import numpy - -WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict() -WORD_DICT_LEN = len(WORD_DICT) -LABEL_DICT_LEN = len(LABEL_DICT) -PRED_DICT_LEN = len(VERB_DICT) -MARK_DICT_LEN = 2 - - -def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark): - WORD_DIM = 32 - MARK_DIM = 5 - HIDDEN_DIM = 512 - DEPTH = 8 - EMBEDDING_NAME = 'emb' - - # Data definitions - word = fluid.layers.data( - name='word_data', shape=[1], dtype='int64', lod_level=1) - predicate = fluid.layers.data( - name='verb_data', shape=[1], dtype='int64', lod_level=1) - ctx_n2 = fluid.layers.data( - name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) - ctx_n1 = fluid.layers.data( - name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) - ctx_0 = fluid.layers.data( - name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) - ctx_p1 = fluid.layers.data( - name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) - ctx_p2 = fluid.layers.data( - name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) - mark = fluid.layers.data( - name='mark_data', shape=[1], dtype='int64', lod_level=1) - - # 8 features - predicate_embedding = fluid.layers.embedding( - input=predicate, - size=[PRED_DICT_LEN, WORD_DIM], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='vemb') - - mark_embedding = fluid.layers.embedding( - input=mark, - size=[MARK_DICT_LEN, MARK_DIM], - dtype='float32', - is_sparse=IS_SPARSE) - - word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] - emb_layers = [ - fluid.layers.embedding( - size=[WORD_DICT_LEN, WORD_DIM], - input=x, - param_attr=fluid.ParamAttr( - name=EMBEDDING_NAME, trainable=False)) for x in word_input - ] - emb_layers.append(predicate_embedding) - emb_layers.append(mark_embedding) - - hidden_0_layers = [ - fluid.layers.fc(input=emb, size=HIDDEN_DIM, act='tanh') - for emb in emb_layers - ] - - hidden_0 = fluid.layers.sums(input=hidden_0_layers) - - lstm_0 = fluid.layers.dynamic_lstm( - input=hidden_0, - size=HIDDEN_DIM, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid') - - # stack L-LSTM and R-LSTM with direct edges - input_tmp = [hidden_0, lstm_0] - - for i in range(1, DEPTH): - mix_hidden = fluid.layers.sums(input=[ - fluid.layers.fc(input=input_tmp[0], size=HIDDEN_DIM, act='tanh'), - fluid.layers.fc(input=input_tmp[1], size=HIDDEN_DIM, act='tanh') - ]) - - lstm = fluid.layers.dynamic_lstm( - input=mix_hidden, - size=HIDDEN_DIM, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - is_reverse=((i % 2) == 1)) - - input_tmp = [mix_hidden, lstm] - - feature_out = fluid.layers.sums(input=[ - fluid.layers.fc(input=input_tmp[0], size=LABEL_DICT_LEN, act='tanh'), - fluid.layers.fc(input=input_tmp[1], size=LABEL_DICT_LEN, act='tanh') - ]) - - return feature_out - - -def inference_network(): - predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, - mark) - - crf_decode = fluid.layers.crf_decoding( - input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) - - return crf_decode - - -def train_network(): - MIX_HIDDEN_LR = 1e-3 - - predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, - mark) - target = fluid.layers.data( - name='target', shape=[1], dtype='int64', lod_level=1) - crf_cost = fluid.layers.linear_chain_crf( - input=predict, - label=target, - param_attr=fluid.ParamAttr( - name='crfw', learning_rate=MIX_HIDDEN_LR)) - avg_cost = fluid.layers.mean(crf_cost) - - return avg_cost - - -def train(use_cuda, save_path): - BATCH_SIZE = 128 - EPOCH_NUM = 1 - - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.conll05.train(), buf_size=8192), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.conll05.test(), batch_size=BATCH_SIZE) - - def event_handler(event): - if isinstance(event, fluid.EndIteration): - if (event.batch_id % 10) == 0: - avg_cost = trainer.test(reader=test_reader) - - print('BatchID {0:04}, Loss {1:2.2}'.format(event.batch_id + 1, - avg_cost)) - - if avg_cost > 0.01: # Low threshold for speeding up CI - trainer.save_params(save_path) - return - - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.01, - decay_steps=100000, - decay_rate=0.5, - staircase=True)) - trainer = fluid.Trainer(train_network, optimizer=sgd_optimizer, place=place) - trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler) - - -def infer(use_cuda, save_path): - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( - inference_program, param_path=save_path, place=place) - - def create_random_lodtensor(lod, place, low, high): - data = np.random.random_integers(low, high, - [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - # Create an input example - lod = [0, 4, 10] - word = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1) - pred = create_random_lodtensor(lod, place, low=0, high=PRED_DICT_LEN - 1) - ctx_n2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1) - ctx_n1 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1) - ctx_0 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1) - ctx_p1 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1) - ctx_p2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1) - mark = create_random_lodtensor(lod, place, low=0, high=MARK_DICT_LEN - 1) - - results = inferencer.infer({ - 'word_data': word, - 'verb_data': pred, - 'ctx_n2_data': ctx_n2, - 'ctx_n1_data': ctx_n1, - 'ctx_0_data': ctx_0, - 'ctx_p1_data': ctx_p1, - 'ctx_p2_data': ctx_p2, - 'mark_data': mark - }) - - print("infer results: ", results) - - -def main(use_cuda): - if use_cuda and not fluid.core.is_compiled_with_cuda(): - return - save_path = "label_semantic_roles.inference.model" - train(use_cuda, save_path) - infer(use_cuda, save_path) - - -if __name__ == '__main__': - for use_cuda in (False, True): - main(use_cuda=use_cuda) diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py new file mode 100755 index 0000000000000000000000000000000000000000..9464df59797c0b8c35611ee56de6bf362ac7a4a5 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -0,0 +1,261 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import numpy as np + +WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict() +WORD_DICT_LEN = len(WORD_DICT) +LABEL_DICT_LEN = len(LABEL_DICT) +PRED_DICT_LEN = len(VERB_DICT) +MARK_DICT_LEN = 2 +IS_SPARSE = True +BATCH_SIZE = 10 +EMBEDDING_NAME = 'emb' + + +def lstm_net(): + WORD_DIM = 32 + MARK_DIM = 5 + HIDDEN_DIM = 512 + DEPTH = 8 + + # Data definitions + word = fluid.layers.data( + name='word_data', shape=[1], dtype='int64', lod_level=1) + predicate = fluid.layers.data( + name='verb_data', shape=[1], dtype='int64', lod_level=1) + ctx_n2 = fluid.layers.data( + name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) + ctx_n1 = fluid.layers.data( + name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) + ctx_0 = fluid.layers.data( + name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) + ctx_p1 = fluid.layers.data( + name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) + ctx_p2 = fluid.layers.data( + name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) + mark = fluid.layers.data( + name='mark_data', shape=[1], dtype='int64', lod_level=1) + + # 8 features + predicate_embedding = fluid.layers.embedding( + input=predicate, + size=[PRED_DICT_LEN, WORD_DIM], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='vemb') + + mark_embedding = fluid.layers.embedding( + input=mark, + size=[MARK_DICT_LEN, MARK_DIM], + dtype='float32', + is_sparse=IS_SPARSE) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + fluid.layers.embedding( + size=[WORD_DICT_LEN, WORD_DIM], + input=x, + param_attr=fluid.ParamAttr(name=EMBEDDING_NAME)) + for x in word_input + #name=EMBEDDING_NAME, trainable=False)) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0_layers = [ + fluid.layers.fc(input=emb, size=HIDDEN_DIM, act='tanh') + for emb in emb_layers + ] + + hidden_0 = fluid.layers.sums(input=hidden_0_layers) + + lstm_0 = fluid.layers.dynamic_lstm( + input=hidden_0, + size=HIDDEN_DIM, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') + + # stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, DEPTH): + mix_hidden = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=HIDDEN_DIM, act='tanh'), + fluid.layers.fc(input=input_tmp[1], size=HIDDEN_DIM, act='tanh') + ]) + + lstm = fluid.layers.dynamic_lstm( + input=mix_hidden, + size=HIDDEN_DIM, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) + + input_tmp = [mix_hidden, lstm] + + feature_out = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=LABEL_DICT_LEN, act='tanh'), + fluid.layers.fc(input=input_tmp[1], size=LABEL_DICT_LEN, act='tanh') + ]) + + return feature_out + + +def inference_program(): + predict = lstm_net() + + return predict + + +def train_program(): + MIX_HIDDEN_LR = 1e-3 + + predict = lstm_net() + target = fluid.layers.data( + name='target', shape=[1], dtype='int64', lod_level=1) + crf_cost = fluid.layers.linear_chain_crf( + input=predict, + label=target, + param_attr=fluid.ParamAttr( + name='crfw', learning_rate=MIX_HIDDEN_LR)) + avg_cost = fluid.layers.mean(crf_cost) + + return [avg_cost] + + +def train(use_cuda, train_program, params_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + optimizer = fluid.optimizer.SGD(learning_rate=0.01) + + trainer = fluid.Trainer( + train_func=train_program, place=place, optimizer=optimizer) + + feed_order = [ + 'word_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', + 'ctx_p2_data', 'verb_data', 'mark_data', 'target' + ] + + #embedding_param = fluid.global_scope().find_var( + # EMBEDDING_NAME).get_tensor() + #embedding_param.set( + # load_parameter(conll05.get_embedding(), WORD_DICT_LEN, WORD_DIM), + # place) + + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.conll05.test(), batch_size=BATCH_SIZE) + avg_cost_set = trainer.test( + reader=test_reader, feed_order=feed_order) + + # get avg cost + avg_cost = np.array(avg_cost_set).mean() + + print("avg_cost: %s" % avg_cost) + + if float(avg_cost) < 100.0: # Large value to increase CI speed + trainer.save_params(params_dirname) + else: + print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1, + float(avg_cost))) + if math.isnan(float(avg_cost)): + sys.exit("got NaN loss, training failed.") + + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(params_dirname) + trainer.stop() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=feed_order) + + +def infer(use_cuda, inference_program, params_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + inferencer = fluid.Inferencer( + inference_program, param_path=params_dirname, place=place) + + # Setup inputs by creating LoDTensors to represent sequences of words. + # Here each word is the basic element of these LoDTensors and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensors will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + pred = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=PRED_DICT_LEN - 1) + ctx_n2 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + ctx_n1 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + ctx_0 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + ctx_p1 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + ctx_p2 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + mark = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=MARK_DICT_LEN - 1) + + results = inferencer.infer( + { + 'word_data': word, + 'verb_data': pred, + 'ctx_n2_data': ctx_n2, + 'ctx_n1_data': ctx_n1, + 'ctx_0_data': ctx_0, + 'ctx_p1_data': ctx_p1, + 'ctx_p2_data': ctx_p2, + 'mark_data': mark + }, + return_numpy=False) + + print("infer results: ", np.array(results[0])) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + params_dirname = "label_semantic_roles.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) + + +if __name__ == '__main__': + for use_cuda in (False, True): + main(use_cuda=use_cuda) diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/machine_translation/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..673c965b662a022739f8d489c331f4de9455a926 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/machine_translation/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +# default test +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py new file mode 100644 index 0000000000000000000000000000000000000000..7204c7b3c7648a24de89d41e205db5b18ed2a5fc --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py @@ -0,0 +1,319 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import contextlib + +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.framework as framework +import paddle.fluid.layers as pd +from paddle.fluid.executor import Executor +from functools import partial +import unittest +import os + +dict_size = 30000 +source_dict_dim = target_dict_dim = dict_size +hidden_dim = 32 +word_dim = 16 +batch_size = 2 +max_length = 8 +topk_size = 50 +trg_dic_size = 10000 +beam_size = 2 + +decoder_size = hidden_dim + + +def encoder(is_sparse): + # encoder + src_word_id = pd.data( + name="src_word_id", shape=[1], dtype='int64', lod_level=1) + src_embedding = pd.embedding( + input=src_word_id, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=is_sparse, + param_attr=fluid.ParamAttr(name='vemb')) + + fc1 = pd.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') + lstm_hidden0, lstm_0 = pd.dynamic_lstm(input=fc1, size=hidden_dim * 4) + encoder_out = pd.sequence_last_step(input=lstm_hidden0) + return encoder_out + + +def decoder_train(context, is_sparse): + # decoder + trg_language_word = pd.data( + name="target_language_word", shape=[1], dtype='int64', lod_level=1) + trg_embedding = pd.embedding( + input=trg_language_word, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=is_sparse, + param_attr=fluid.ParamAttr(name='vemb')) + + rnn = pd.DynamicRNN() + with rnn.block(): + current_word = rnn.step_input(trg_embedding) + pre_state = rnn.memory(init=context) + current_state = pd.fc(input=[current_word, pre_state], + size=decoder_size, + act='tanh') + + current_score = pd.fc(input=current_state, + size=target_dict_dim, + act='softmax') + rnn.update_memory(pre_state, current_state) + rnn.output(current_score) + + return rnn() + + +def decoder_decode(context, is_sparse): + init_state = context + array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) + counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) + + # fill the first element with init_state + state_array = pd.create_array('float32') + pd.array_write(init_state, array=state_array, i=counter) + + # ids, scores as memory + ids_array = pd.create_array('int64') + scores_array = pd.create_array('float32') + + init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) + init_scores = pd.data( + name="init_scores", shape=[1], dtype="float32", lod_level=2) + + pd.array_write(init_ids, array=ids_array, i=counter) + pd.array_write(init_scores, array=scores_array, i=counter) + + cond = pd.less_than(x=counter, y=array_len) + + while_op = pd.While(cond=cond) + with while_op.block(): + pre_ids = pd.array_read(array=ids_array, i=counter) + pre_state = pd.array_read(array=state_array, i=counter) + pre_score = pd.array_read(array=scores_array, i=counter) + + # expand the lod of pre_state to be the same with pre_score + pre_state_expanded = pd.sequence_expand(pre_state, pre_score) + + pre_ids_emb = pd.embedding( + input=pre_ids, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=is_sparse) + + # use rnn unit to update rnn + current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], + size=decoder_size, + act='tanh') + current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) + # use score to do beam search + current_score = pd.fc(input=current_state_with_lod, + size=target_dict_dim, + act='softmax') + topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + selected_ids, selected_scores = pd.beam_search( + pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + + pd.increment(x=counter, value=1, in_place=True) + + # update the memories + pd.array_write(current_state, array=state_array, i=counter) + pd.array_write(selected_ids, array=ids_array, i=counter) + pd.array_write(selected_scores, array=scores_array, i=counter) + + pd.less_than(x=counter, y=array_len, cond=cond) + + translation_ids, translation_scores = pd.beam_search_decode( + ids=ids_array, scores=scores_array) + + # return init_ids, init_scores + + return translation_ids, translation_scores + + +def set_init_lod(data, lod, place): + res = fluid.LoDTensor() + res.set(data, place) + res.set_lod(lod) + return res + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def train_program(is_sparse): + context = encoder(is_sparse) + rnn_out = decoder_train(context, is_sparse) + label = pd.data( + name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) + cost = pd.cross_entropy(input=rnn_out, label=label) + avg_cost = pd.mean(cost) + return avg_cost + + +def train(use_cuda, is_sparse, is_local=True): + EPOCH_NUM = 1 + + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) + + feed_order = [ + 'src_word_id', 'target_language_word', 'target_language_next_word' + ] + + def event_handler(event): + if isinstance(event, fluid.EndStepEvent): + print('pass_id=' + str(event.epoch) + ' batch=' + str(event.step)) + if event.step == 10: + trainer.stop() + + trainer = fluid.Trainer( + train_func=partial(train_program, is_sparse), + optimizer=fluid.optimizer.Adagrad( + learning_rate=1e-4, + regularization=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.1)), + place=place) + + trainer.train( + reader=train_reader, + num_epochs=EPOCH_NUM, + event_handler=event_handler, + feed_order=feed_order) + + +def decode_main(use_cuda, is_sparse): + + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + context = encoder(is_sparse) + translation_ids, translation_scores = decoder_decode(context, is_sparse) + + exe = Executor(place) + exe.run(framework.default_startup_program()) + + init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64') + init_scores_data = np.array( + [1. for _ in range(batch_size)], dtype='float32') + init_ids_data = init_ids_data.reshape((batch_size, 1)) + init_scores_data = init_scores_data.reshape((batch_size, 1)) + init_lod = [i for i in range(batch_size)] + [batch_size] + init_lod = [init_lod, init_lod] + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) + for _, data in enumerate(train_data()): + init_ids = set_init_lod(init_ids_data, init_lod, place) + init_scores = set_init_lod(init_scores_data, init_lod, place) + + src_word_data = to_lodtensor(map(lambda x: x[0], data), place) + + result_ids, result_scores = exe.run( + framework.default_main_program(), + feed={ + 'src_word_id': src_word_data, + 'init_ids': init_ids, + 'init_scores': init_scores + }, + fetch_list=[translation_ids, translation_scores], + return_numpy=False) + print result_ids.lod() + break + + +class TestMachineTranslation(unittest.TestCase): + pass + + +@contextlib.contextmanager +def scope_prog_guard(): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + +def inject_test_train(use_cuda, is_sparse): + f_name = 'test_{0}_{1}_train'.format('cuda' if use_cuda else 'cpu', 'sparse' + if is_sparse else 'dense') + + def f(*args): + with scope_prog_guard(): + train(use_cuda, is_sparse) + + setattr(TestMachineTranslation, f_name, f) + + +def inject_test_decode(use_cuda, is_sparse, decorator=None): + f_name = 'test_{0}_{1}_decode'.format('cuda' + if use_cuda else 'cpu', 'sparse' + if is_sparse else 'dense') + + def f(*args): + with scope_prog_guard(): + decode_main(use_cuda, is_sparse) + + if decorator is not None: + f = decorator(f) + + setattr(TestMachineTranslation, f_name, f) + + +for _use_cuda_ in (False, True): + for _is_sparse_ in (False, True): + inject_test_train(_use_cuda_, _is_sparse_) + +for _use_cuda_ in (False, True): + for _is_sparse_ in (False, True): + + _decorator_ = None + if _use_cuda_: + _decorator_ = unittest.skip( + reason='Beam Search does not support CUDA!') + + inject_test_decode( + is_sparse=_is_sparse_, use_cuda=_use_cuda_, decorator=_decorator_) + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index 2128d4c5b87434ebe30930dc0e338b3b50d921c2..03439cbd37671b4727879bf3d0793f016f55247a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -57,7 +57,7 @@ def train_program(): return [avg_cost, acc] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adam(learning_rate=0.001) @@ -78,7 +78,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( event.epoch + 1, avg_cost, acc)) @@ -100,11 +100,11 @@ def train(use_cuda, train_program, save_dirname): feed_order=['img', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 1 tensor_img = numpy.random.uniform(-1.0, 1.0, @@ -112,21 +112,21 @@ def infer(use_cuda, inference_program, save_dirname=None): results = inferencer.infer({'img': tensor_img}) - print("infer results: ", numpy.array(results[0])) + print("infer results: ", results[0]) def main(use_cuda): - save_dirname = "recognize_digits_conv.inference.model" + params_dirname = "recognize_digits_conv.inference.model" # call train() with is_local argument to run distributed train train( use_cuda=use_cuda, train_program=train_program, - save_dirname=save_dirname) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_dirname=save_dirname) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index 041c8d778e5c03aa68dad6ef450934f09c8d2a52..89bbd21bea7d64a8dd6fc32829b6addb680da62e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -44,7 +44,7 @@ def train_program(): return [avg_cost, acc] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adam(learning_rate=0.001) @@ -62,7 +62,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) else: print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( event.epoch + 1, avg_cost, acc)) @@ -81,11 +81,11 @@ def train(use_cuda, train_program, save_dirname): feed_order=['img', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) + infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 1 tensor_img = numpy.random.uniform(-1.0, 1.0, @@ -93,21 +93,21 @@ def infer(use_cuda, inference_program, save_dirname=None): results = inferencer.infer({'img': tensor_img}) - print("infer results: ", numpy.array(results[0])) + print("infer results: ", results[0]) def main(use_cuda): - save_dirname = "recognize_digits_mlp.inference.model" + params_dirname = "recognize_digits_mlp.inference.model" # call train() with is_local argument to run distributed train train( use_cuda=use_cuda, train_program=train_program, - save_dirname=save_dirname) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=inference_program, - save_dirname=save_dirname) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/recommender_system/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..673c965b662a022739f8d489c331f4de9455a926 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +# default test +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py new file mode 100644 index 0000000000000000000000000000000000000000..dfc7325acf23176c05fe42761b9997b98d23372a --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py @@ -0,0 +1,255 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import sys +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +import paddle.fluid.nets as nets + +IS_SPARSE = True +USE_GPU = False +BATCH_SIZE = 256 + + +def get_usr_combined_features(): + # FIXME(dzh) : old API integer_value(10) may have range check. + # currently we don't have user configurated check. + + USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 + + uid = layers.data(name='user_id', shape=[1], dtype='int64') + + usr_emb = layers.embedding( + input=uid, + dtype='float32', + size=[USR_DICT_SIZE, 32], + param_attr='user_table', + is_sparse=IS_SPARSE) + + usr_fc = layers.fc(input=usr_emb, size=32) + + USR_GENDER_DICT_SIZE = 2 + + usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') + + usr_gender_emb = layers.embedding( + input=usr_gender_id, + size=[USR_GENDER_DICT_SIZE, 16], + param_attr='gender_table', + is_sparse=IS_SPARSE) + + usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) + + USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) + usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") + + usr_age_emb = layers.embedding( + input=usr_age_id, + size=[USR_AGE_DICT_SIZE, 16], + is_sparse=IS_SPARSE, + param_attr='age_table') + + usr_age_fc = layers.fc(input=usr_age_emb, size=16) + + USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 + usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") + + usr_job_emb = layers.embedding( + input=usr_job_id, + size=[USR_JOB_DICT_SIZE, 16], + param_attr='job_table', + is_sparse=IS_SPARSE) + + usr_job_fc = layers.fc(input=usr_job_emb, size=16) + + concat_embed = layers.concat( + input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1) + + usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") + + return usr_combined_features + + +def get_mov_combined_features(): + + MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 + + mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') + + mov_emb = layers.embedding( + input=mov_id, + dtype='float32', + size=[MOV_DICT_SIZE, 32], + param_attr='movie_table', + is_sparse=IS_SPARSE) + + mov_fc = layers.fc(input=mov_emb, size=32) + + CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) + + category_id = layers.data( + name='category_id', shape=[1], dtype='int64', lod_level=1) + + mov_categories_emb = layers.embedding( + input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) + + mov_categories_hidden = layers.sequence_pool( + input=mov_categories_emb, pool_type="sum") + + MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) + + mov_title_id = layers.data( + name='movie_title', shape=[1], dtype='int64', lod_level=1) + + mov_title_emb = layers.embedding( + input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) + + mov_title_conv = nets.sequence_conv_pool( + input=mov_title_emb, + num_filters=32, + filter_size=3, + act="tanh", + pool_type="sum") + + concat_embed = layers.concat( + input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) + + # FIXME(dzh) : need tanh operator + mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") + + return mov_combined_features + + +def inference_program(): + usr_combined_features = get_usr_combined_features() + mov_combined_features = get_mov_combined_features() + + inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) + scale_infer = layers.scale(x=inference, scale=5.0) + + return scale_infer + + +def train_program(): + + scale_infer = inference_program() + + label = layers.data(name='score', shape=[1], dtype='float32') + square_cost = layers.square_error_cost(input=scale_infer, label=label) + avg_cost = layers.mean(square_cost) + + return [avg_cost, scale_infer] + + +def train(use_cuda, train_program, params_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + optimizer = fluid.optimizer.SGD(learning_rate=0.2) + + trainer = fluid.Trainer( + train_func=train_program, place=place, optimizer=optimizer) + + feed_order = [ + 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', + 'movie_title', 'score' + ] + + def event_handler(event): + if isinstance(event, fluid.EndStepEvent): + test_reader = paddle.batch( + paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) + avg_cost_set = trainer.test( + reader=test_reader, feed_order=feed_order) + + # get avg cost + avg_cost = np.array(avg_cost_set).mean() + + print("avg_cost: %s" % avg_cost) + + if float(avg_cost) < 4: # Smaller value to increase CI speed + trainer.save_params(params_dirname) + trainer.stop() + else: + print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1, + float(avg_cost))) + if math.isnan(float(avg_cost)): + sys.exit("got NaN loss, training failed.") + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.movielens.train(), buf_size=8192), + batch_size=BATCH_SIZE) + + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=feed_order) + + +def infer(use_cuda, inference_program, params_dirname): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + inferencer = fluid.Inferencer( + inference_program, param_path=params_dirname, place=place) + + # Use the first data from paddle.dataset.movielens.test() as input. + # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor, + # where `data` is a list of sequences of index numbers, `lod` is + # the level of detail (lod) info associated with `data`. + # For example, data = [[10, 2, 3], [2, 3]] means that it contains + # two sequences of indexes, of length 3 and 2, respectively. + # Correspondingly, lod = [[3, 2]] contains one level of detail info, + # indicating that `data` consists of two sequences of length 3 and 2. + user_id = fluid.create_lod_tensor([[1]], [[1]], place) + gender_id = fluid.create_lod_tensor([[1]], [[1]], place) + age_id = fluid.create_lod_tensor([[0]], [[1]], place) + job_id = fluid.create_lod_tensor([[10]], [[1]], place) + movie_id = fluid.create_lod_tensor([[783]], [[1]], place) + category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) + movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]], + place) + + results = inferencer.infer( + { + 'user_id': user_id, + 'gender_id': gender_id, + 'age_id': age_id, + 'job_id': job_id, + 'movie_id': movie_id, + 'category_id': category_id, + 'movie_title': movie_title + }, + return_numpy=False) + + print("infer results: ", np.array(results[0])) + + +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + params_dirname = "recommender_system.inference.model" + train( + use_cuda=use_cuda, + train_program=train_program, + params_dirname=params_dirname) + infer( + use_cuda=use_cuda, + inference_program=inference_program, + params_dirname=params_dirname) + + +if __name__ == '__main__': + main(USE_GPU) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt index 673c965b662a022739f8d489c331f4de9455a926..d71147a85e77ea6dc5b6391aa169abd9b02a0aa1 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt @@ -1,6 +1,11 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") +# This test is buggy +# py_test(test_understand_sentiment_dynamic_rnn SRCS +# test_understand_sentiment_dynamic_rnn.py SERIAL) +LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn) + # default test foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py index 89179fc586cde99318a17bab287441c0f2d6c369..11e9fd1bec1450f6753dbe38c7014090d6e136b6 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -64,7 +64,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -85,7 +85,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -112,26 +112,30 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) - def create_random_lodtensor(lod, place, low, high): - data = np.random.random_integers(low, high, - [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - lod = [0, 4, 10] - tensor_words = create_random_lodtensor( - lod, place, low=0, high=len(word_dict) - 1) + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) @@ -139,9 +143,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py index 7db097b3b377c763ceed9fa909672088effe50cf..90757d54f99715163518ce5a094e6ba3a67efed3 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -79,7 +79,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -100,7 +100,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -112,7 +112,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -127,26 +127,30 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) - def create_random_lodtensor(lod, place, low, high): - data = np.random.random_integers(low, high, - [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - lod = [0, 4, 10] - tensor_words = create_random_lodtensor( - lod, place, low=0, high=len(word_dict) - 1) + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) @@ -154,9 +158,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_conv.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_conv.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py index 0d7cbe3874cbc0c2def9d0032737f81e662296d6..52b7d4a83779d01936afb3d9d1e4864b05d55b5a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py @@ -71,7 +71,7 @@ def train_program(word_dict): return [avg_cost, accuracy] -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) @@ -92,7 +92,7 @@ def train(use_cuda, train_program, save_dirname): print("acc : %s" % acc) if acc > 0.2: # Smaller value to increase CI speed - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() else: @@ -104,7 +104,7 @@ def train(use_cuda, train_program, save_dirname): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, map(np.array, event.metrics))) if event.step == 1: # Run 2 iterations to speed CI - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() train_reader = paddle.batch( @@ -119,26 +119,30 @@ def train(use_cuda, train_program, save_dirname): feed_order=['words', 'label']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() inferencer = fluid.Inferencer( infer_func=partial(inference_program, word_dict), - param_path=save_dirname, + param_path=params_dirname, place=place) - def create_random_lodtensor(lod, place, low, high): - data = np.random.random_integers(low, high, - [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - lod = [0, 4, 10] - tensor_words = create_random_lodtensor( - lod, place, low=0, high=len(word_dict) - 1) + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) @@ -146,9 +150,9 @@ def infer(use_cuda, inference_program, save_dirname=None): def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "understand_sentiment_stacked_lstm.inference.model" - train(use_cuda, train_program, save_path) - infer(use_cuda, inference_program, save_path) + params_dirname = "understand_sentiment_stacked_lstm.inference.model" + train(use_cuda, train_program, params_dirname) + infer(use_cuda, inference_program, params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/word2vec/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..673c965b662a022739f8d489c331f4de9455a926 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +# default test +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index bf86cd9acf8da940fcc2fb5b594e33f9b6965acb..eeb8e67087334ea96aab9cdb6272e34e2eb99939 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -25,16 +25,6 @@ HIDDEN_SIZE = 256 N = 5 BATCH_SIZE = 32 - -def create_random_lodtensor(lod, place, low, high): - # The range of data elements is [low, high] - data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) @@ -90,7 +80,7 @@ def train_program(is_sparse): return avg_cost -def train(use_cuda, train_program, save_dirname): +def train(use_cuda, train_program, params_dirname): train_reader = paddle.batch( paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) test_reader = paddle.batch( @@ -107,7 +97,7 @@ def train(use_cuda, train_program, save_dirname): print("loss= ", avg_cost) if avg_cost < 10.0: - trainer.save_params(save_dirname) + trainer.save_params(params_dirname) trainer.stop() if math.isnan(avg_cost): @@ -125,16 +115,28 @@ def train(use_cuda, train_program, save_dirname): feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw']) -def infer(use_cuda, inference_program, save_dirname=None): +def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_dirname, place=place) - - lod = [0, 1] - first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) - second_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) - third_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) - fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) + infer_func=inference_program, param_path=params_dirname, place=place) + + # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word + # is simply an index to look up for the corresponding word vector and hence + # the shape of word (base_shape) should be [1]. The length-based level of + # detail (lod) info of each LoDtensor should be [[1]] meaning there is only + # one lod_level and there is only one sequence of one word on this level. + # Note that lod info should be a list of lists. + lod = [[1]] + base_shape = [1] + # The range of random integers is [low, high] + first_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) + second_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) + third_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) + fourth_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) result = inferencer.infer( { @@ -151,17 +153,17 @@ def main(use_cuda, is_sparse): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "word2vec.inference.model" + params_dirname = "word2vec.inference.model" train( use_cuda=use_cuda, train_program=partial(train_program, is_sparse), - save_dirname=save_path) + params_dirname=params_dirname) infer( use_cuda=use_cuda, inference_program=partial(inference_program, is_sparse), - save_dirname=save_path) + params_dirname=params_dirname) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index 792ed7368d646cd9dff9255eb402b6a9b84f69a6..c6687e8ad7fcc45c82d6dcb2256e9055a81cc61c 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -125,14 +125,6 @@ def stacked_lstm_net(data, return avg_cost, accuracy, prediction -def create_random_lodtensor(lod, place, low, high): - data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - def train(word_dict, net_method, use_cuda, @@ -242,9 +234,21 @@ def infer(word_dict, use_cuda, save_dirname=None): word_dict_len = len(word_dict) - lod = [0, 4, 10] - tensor_words = create_random_lodtensor( - lod, place, low=0, high=word_dict_len - 1) + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensor will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + tensor_words = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=word_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index f1ee5dfd99e1c8b26280c010c1aaca05a004a5b6..bc8a1aafc82d62501cecfa71be0cc3851c75eae2 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -116,29 +116,6 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, return feature_out -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def create_random_lodtensor(lod, place, low, high): - data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - def train(use_cuda, save_dirname=None, is_local=True): # define network topology word = fluid.layers.data( @@ -271,23 +248,35 @@ def infer(use_cuda, save_dirname=None): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) - lod = [0, 4, 10] - word = create_random_lodtensor( - lod, place, low=0, high=word_dict_len - 1) - pred = create_random_lodtensor( - lod, place, low=0, high=pred_dict_len - 1) - ctx_n2 = create_random_lodtensor( - lod, place, low=0, high=word_dict_len - 1) - ctx_n1 = create_random_lodtensor( - lod, place, low=0, high=word_dict_len - 1) - ctx_0 = create_random_lodtensor( - lod, place, low=0, high=word_dict_len - 1) - ctx_p1 = create_random_lodtensor( - lod, place, low=0, high=word_dict_len - 1) - ctx_p2 = create_random_lodtensor( - lod, place, low=0, high=word_dict_len - 1) - mark = create_random_lodtensor( - lod, place, low=0, high=mark_dict_len - 1) + # Setup inputs by creating LoDTensors to represent sequences of words. + # Here each word is the basic element of these LoDTensors and the shape of + # each word (base_shape) should be [1] since it is simply an index to + # look up for the corresponding word vector. + # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], + # which has only one lod level. Then the created LoDTensors will have only + # one higher level structure (sequence of words, or sentence) than the basic + # element (word). Hence the LoDTensor will hold data for three sentences of + # length 3, 4 and 2, respectively. + # Note that lod info should be a list of lists. + lod = [[3, 4, 2]] + base_shape = [1] + # The range of random integers is [low, high] + word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=word_dict_len - 1) + pred = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=pred_dict_len - 1) + ctx_n2 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=word_dict_len - 1) + ctx_n1 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=word_dict_len - 1) + ctx_0 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=word_dict_len - 1) + ctx_p1 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=word_dict_len - 1) + ctx_p2 = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=word_dict_len - 1) + mark = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=mark_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 7be924f762ddeb045dda890dbfdcd96a65449553..65d6552acc9b3d31a97a45290e4613a633fffa3c 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -173,63 +173,33 @@ def train(use_cuda, save_dirname, is_local=True): test_reader = paddle.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) - feeding = { - 'user_id': 0, - 'gender_id': 1, - 'age_id': 2, - 'job_id': 3, - 'movie_id': 4, - 'category_id': 5, - 'movie_title': 6, - 'score': 7 - } - - def func_feed(feeding, data): - feed_tensors = {} - for (key, idx) in feeding.iteritems(): - tensor = fluid.LoDTensor() - if key != "category_id" and key != "movie_title": - if key == "score": - numpy_data = np.array(map(lambda x: x[idx], data)).astype( - "float32") - else: - numpy_data = np.array(map(lambda x: x[idx], data)).astype( - "int64") - else: - numpy_data = map(lambda x: np.array(x[idx]).astype("int64"), - data) - lod_info = [len(item) for item in numpy_data] - offset = 0 - lod = [offset] - for item in lod_info: - offset += item - lod.append(offset) - numpy_data = np.concatenate(numpy_data, axis=0) - tensor.set_lod([lod]) - - numpy_data = numpy_data.reshape([numpy_data.shape[0], 1]) - tensor.set(numpy_data, place) - feed_tensors[key] = tensor - return feed_tensors + feed_order = [ + 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', + 'movie_title', 'score' + ] def train_loop(main_program): exe.run(framework.default_startup_program()) + feed_list = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list, place) + PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch outs = exe.run(program=main_program, - feed=func_feed(feeding, data), + feed=feeder.feed(data), fetch_list=[avg_cost]) out = np.array(outs[0]) if (batch_id + 1) % 10 == 0: avg_cost_set = [] for test_data in test_reader(): - avg_cost_np = exe.run( - program=test_program, - feed=func_feed(feeding, test_data), - fetch_list=[avg_cost]) + avg_cost_np = exe.run(program=test_program, + feed=feeder.feed(test_data), + fetch_list=[avg_cost]) avg_cost_set.append(avg_cost_np[0]) break # test only 1 segment for speeding up CI @@ -279,23 +249,6 @@ def infer(use_cuda, save_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) - def create_lod_tensor(data, lod=None): - tensor = fluid.LoDTensor() - if lod is None: - # Tensor, the shape is [batch_size, 1] - index = 0 - lod_0 = [index] - for l in range(len(data)): - index += 1 - lod_0.append(index) - lod = [lod_0] - tensor.set_lod(lod) - - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - tensor.set(flattened_data, place) - return tensor - inference_scope = fluid.core.Scope() with fluid.scope_guard(inference_scope): # Use fluid.io.load_inference_model to obtain the inference program desc, @@ -307,26 +260,33 @@ def infer(use_cuda, save_dirname=None): # Use the first data from paddle.dataset.movielens.test() as input assert feed_target_names[0] == "user_id" - user_id = create_lod_tensor([[1]]) + # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor + # where `data` is a list of sequences of index numbers, `lod` is + # the level of detail (lod) info associated with `data`. + # For example, data = [[10, 2, 3], [2, 3]] means that it contains + # two sequences of indexes, of length 3 and 2, respectively. + # Correspondingly, lod = [[3, 2]] contains one level of detail info, + # indicating that `data` consists of two sequences of length 3 and 2. + user_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[1] == "gender_id" - gender_id = create_lod_tensor([[1]]) + gender_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[2] == "age_id" - age_id = create_lod_tensor([[0]]) + age_id = fluid.create_lod_tensor([[0]], [[1]], place) assert feed_target_names[3] == "job_id" - job_id = create_lod_tensor([[10]]) + job_id = fluid.create_lod_tensor([[10]], [[1]], place) assert feed_target_names[4] == "movie_id" - movie_id = create_lod_tensor([[783]]) + movie_id = fluid.create_lod_tensor([[783]], [[1]], place) assert feed_target_names[5] == "category_id" - category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]]) + category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) assert feed_target_names[6] == "movie_title" - movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]], - [[0, 5]]) + movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], + [[5]], place) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 30e1a5040cc92b02bbbf90dac97001812ec90134..3118d88701e5f64ae50f7ee774ea8174aa7758eb 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -21,15 +21,6 @@ import math import sys -def create_random_lodtensor(lod, place, low, high): - # The range of data elements is [low, high] - data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64") - res = fluid.LoDTensor() - res.set(data, place) - res.set_lod([lod]) - return res - - def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): PASS_NUM = 100 EMBED_SIZE = 32 @@ -175,16 +166,23 @@ def infer(use_cuda, save_dirname=None): word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) - # Setup inputs, by creating 4 words, the lod of which should be [0, 1] - lod = [0, 1] - first_word = create_random_lodtensor( - lod, place, low=0, high=dict_size - 1) - second_word = create_random_lodtensor( - lod, place, low=0, high=dict_size - 1) - third_word = create_random_lodtensor( - lod, place, low=0, high=dict_size - 1) - fourth_word = create_random_lodtensor( - lod, place, low=0, high=dict_size - 1) + # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word + # is simply an index to look up for the corresponding word vector and hence + # the shape of word (base_shape) should be [1]. The length-based level of + # detail (lod) info of each LoDtensor should be [[1]] meaning there is only + # one lod_level and there is only one sequence of one word on this level. + # Note that lod info should be a list of lists. + lod = [[1]] + base_shape = [1] + # The range of random integers is [low, high] + first_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) + second_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) + third_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) + fourth_word = fluid.create_random_int_lodtensor( + lod, base_shape, place, low=0, high=dict_size - 1) assert feed_target_names[0] == 'firstw' assert feed_target_names[1] == 'secondw' diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..013d72f418cf7ac11eb31fd221052039e896e203 --- /dev/null +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -0,0 +1,91 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor, _validate_lod, _convert_lod +import numpy +import unittest + + +class TestLoDTensor(unittest.TestCase): + def test_validate_lod(self): + lod = (1, 2, 1) + self.assertRaises(AssertionError, _validate_lod, lod, -1) + lod = [[1, 2], (2, 3)] + self.assertRaises(AssertionError, _validate_lod, lod, -1) + lod = [1, 2, 3] + self.assertRaises(AssertionError, _validate_lod, lod, -1) + + lod = [] + self.assertTrue(_validate_lod(lod, -1)) + lod = [[], [1], [3]] + self.assertFalse(_validate_lod(lod, -1)) + lod = [[0], [-1], [3]] + self.assertFalse(_validate_lod(lod, -1)) + + # Each level's sum should be equal to the number of items in the next level + # Moreover, last level's sum should be equal to the tensor height + lod = [[2, 3], [1, 3, 1, 2, 1]] + self.assertTrue(_validate_lod(lod, tensor_height=8)) + lod = [[1, 3], [2, 1, 3]] + self.assertFalse(_validate_lod(lod, tensor_height=6)) + lod = [[1, 3], [2, 1, 3, 4]] + self.assertFalse(_validate_lod(lod, tensor_height=5)) + + def test_convert_lod(self): + lod = [[1, 2, 3]] + converted_lod = [[0, 1, 3, 6]] + self.assertEqual(_convert_lod(lod), converted_lod) + + lod = [[2, 3], [1, 3, 1, 2, 1]] + converted_lod = [[0, 2, 5], [0, 1, 4, 5, 7, 8]] + self.assertEqual(_convert_lod(lod), converted_lod) + + def test_create_lod_tensor(self): + # Create LoDTensor from a list + data = [[1, 2, 3], [3, 4]] + wrong_lod = [[2, 2]] + correct_lod = [[3, 2]] + self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod, + fluid.CPUPlace()) + tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace()) + self.assertEqual(tensor.lod(), [[0, 3, 5]]) + + # Create LoDTensor from numpy array + data = numpy.random.random([10, 1]) + lod = [[2, 1], [3, 3, 4]] + tensor = create_lod_tensor(data, lod, fluid.CPUPlace()) + self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]]) + + # Create LoDTensor from another LoDTensor, they are differnt instances + new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]] + new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace()) + self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]]) + self.assertEqual(new_tensor.lod(), [[0, 2, 4, 5], [0, 1, 3, 5, 8, 10]]) + + def test_create_random_int_lodtensor(self): + # The shape of a word, commonly used in speech and NLP problem, is [1] + shape = [1] + lod = [[2, 3, 5]] + dict_size = 10000 + low = 0 + high = dict_size - 1 + tensor = create_random_int_lodtensor(lod, shape, + fluid.CPUPlace(), low, high) + self.assertEqual(tensor.lod(), [[0, 2, 5, 10]]) + self.assertEqual(tensor.shape(), [10, 1]) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index be704a7be7d2c9f3c95ad81ca906eeaf73b35beb..eed1412ba4f2b8f2209c0573359bea1e4b20d8d5 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -17,7 +17,7 @@ endif(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184 list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185 -list(REMOVE_ITEM TEST_OPS test_nce) # IXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778 +list(REMOVE_ITEM TEST_OPS test_nce) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778 list(REMOVE_ITEM TEST_OPS test_recurrent_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/6152 list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957 @@ -39,74 +39,12 @@ function(py_test_modules TARGET_NAME) endif() endif() endfunction() - -list(REMOVE_ITEM TEST_OPS test_sequence_expand) - -# test time consuming OPs in a separate process for expliot parallism -list(REMOVE_ITEM TEST_OPS test_parallel_executor) list(REMOVE_ITEM TEST_OPS test_warpctc_op) -list(REMOVE_ITEM TEST_OPS test_dyn_rnn) -list(REMOVE_ITEM TEST_OPS test_mul_op) - -# tests that need to be run in separate process. -list(REMOVE_ITEM TEST_OPS test_multihead_attention) -list(REMOVE_ITEM TEST_OPS test_calc_gradient) -list(REMOVE_ITEM TEST_OPS test_while_op) -list(REMOVE_ITEM TEST_OPS test_lod_array_length_op) -list(REMOVE_ITEM TEST_OPS test_reorder_lod_tensor) -list(REMOVE_ITEM TEST_OPS test_profiler) -list(REMOVE_ITEM TEST_OPS test_nvprof) -list(REMOVE_ITEM TEST_OPS test_normalization_wrapper) -list(REMOVE_ITEM TEST_OPS test_executor_and_mul) -list(REMOVE_ITEM TEST_OPS test_assign_value_op) -list(REMOVE_ITEM TEST_OPS test_array_read_write_op) -list(REMOVE_ITEM TEST_OPS test_lod_rank_table) -list(REMOVE_ITEM TEST_OPS test_weight_normalization) -list(REMOVE_ITEM TEST_OPS test_conditional_block) -list(REMOVE_ITEM TEST_OPS test_parameter) -list(REMOVE_ITEM TEST_OPS test_registry) -list(REMOVE_ITEM TEST_OPS test_fetch_var) -list(REMOVE_ITEM TEST_OPS test_parallel_op) -list(REMOVE_ITEM TEST_OPS test_dynrnn_static_input) list(REMOVE_ITEM TEST_OPS test_dist_train) -list(REMOVE_ITEM TEST_OPS test_network_with_dtype) - -# tests that can be bundled together in one python process for speed. -if(WITH_FAST_BUNDLE_TEST) - py_test_modules("test_all_ops" MODULES ${TEST_OPS}) -else() - foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) - endforeach(TEST_OP) -endif(WITH_FAST_BUNDLE_TEST) - -# -py_test_modules(test_sequence_expand MODULES test_sequence_expand) -# tests with high overhead -py_test_modules(test_parallel_executor MODULES test_parallel_executor) +list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) +list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) +foreach(TEST_OP ${TEST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) +endforeach(TEST_OP) py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL) -py_test_modules(test_train_dyn_rnn MODULES test_dyn_rnn) -py_test_modules(test_mul_op MODULES test_mul_op) -py_test_modules(test_network_with_dtype MODULES test_network_with_dtype) - -# tests that need to be run in separate process. -py_test_modules(test_multihead_attention MODULES test_multihead_attention) -py_test_modules(test_calc_gradient MODULES test_calc_gradient) -py_test_modules(test_while_op MODULES test_while_op) -py_test_modules(test_lod_array_length_op MODULES test_lod_array_length_op) -py_test_modules(test_reorder_lod_tensor MODULES test_reorder_lod_tensor) -py_test_modules(test_profiler MODULES test_profiler) -py_test_modules(test_nvprof MODULES test_nvprof) -py_test_modules(test_normalization_wrapper MODULES test_normalization_wrapper) -py_test_modules(test_executor_and_mul MODULES test_executor_and_mul) -py_test_modules(test_assign_value_op MODULES test_assign_value_op) -py_test_modules(test_array_read_write_op MODULES test_array_read_write_op) -py_test_modules(test_lod_rank_table MODULES test_lod_rank_table) -py_test_modules(test_weight_normalization MODULES test_weight_normalization) -py_test_modules(test_conditional_block MODULES test_conditional_block) -py_test_modules(test_parameter MODULES test_parameter) -py_test_modules(test_registry MODULES test_registry) -py_test_modules(test_fetch_var MODULES test_fetch_var) -py_test_modules(test_dynrnn_static_input MODULES test_dynrnn_static_input) -py_test_modules(test_parallel_op MODULES test_parallel_op) py_test_modules(test_dist_train MODULES test_dist_train SERIAL) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 299ab8e51f017e1980a8b40e3830fc42b1ff7ccc..b611470fa1ff326df960c349b71006f52d586d8e 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -36,6 +36,12 @@ def randomize_probability(batch_size, class_num, dtype='float32'): def create_op(scope, op_type, inputs, outputs, attrs): kwargs = dict() + op_maker = core.op_proto_and_checker_maker + op_role_attr_name = op_maker.kOpRoleAttrName() + + if op_role_attr_name not in attrs: + attrs[op_role_attr_name] = int(op_maker.OpRole.Forward) + def __create_var__(name, var_name): scope.var(var_name).get_tensor() kwargs[name].append(var_name) @@ -473,9 +479,9 @@ class OpTest(unittest.TestCase): def np_dtype_to_fluid_dtype(input): """Change the dtype of float16 numpy array - numpy float16 is binded to paddle::platform::float16 + numpy float16 is binded to paddle::platform::float16 in tensor_py.h via the help of uint16 data type since - the internal memory representation of float16 is + the internal memory representation of float16 is uint16_t in paddle and np.uint16 in numpy, which are themselves binded together by pybind. @@ -483,9 +489,9 @@ class OpTest(unittest.TestCase): input: input numpy array Returns: - input: The dtype of input will be changed to np.uint16 if + input: The dtype of input will be changed to np.uint16 if it is originally np.float16, such that the internal memory - of input will be reinterpreted as of dtype np.uint16. + of input will be reinterpreted as of dtype np.uint16. """ if input.dtype == np.float16: input.dtype = np.uint16 diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c3c648717814c28c39a401487925824e885946 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -0,0 +1,96 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import time +import numpy as np + +__all__ = ['TestParallelExecutorBase'] + + +class TestParallelExecutorBase(unittest.TestCase): + def check_network_convergence(self, + method, + memory_opt=True, + iter=50, + batch_size=None, + allow_op_delay=False, + feed_dict=None, + seed=None, + use_parallel_executor=True, + balance_parameter_opt_between_cards=False): + def run_executor(exe, feed, fetch_list, program=None): + if isinstance(exe, fluid.ParallelExecutor): + res = exe.run(fetch_list=fetch_list, feed=feed) + elif isinstance(exe, fluid.Executor): + if program is None: + program = fluid.default_main_program() + res = exe.run(program=program, feed=feed, fetch_list=fetch_list) + else: + raise ValueError('Unkown type exe') + return res + + main = fluid.Program() + startup = fluid.Program() + startup.random_seed = 1 # Fix random seed + with fluid.program_guard(main, startup): + if seed is not None: + startup.random_seed = seed + loss = method(use_feed=feed_dict is not None) + adam = fluid.optimizer.Adam() + adam.minimize(loss) + if memory_opt: + fluid.memory_optimize(main) + place = fluid.CUDAPlace(0) + startup_exe = fluid.Executor(place) + startup_exe.run(startup) + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.allow_op_delay = allow_op_delay + + build_strategy = fluid.BuildStrategy() + build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce + + if use_parallel_executor: + exe = fluid.ParallelExecutor( + True, + loss_name=loss.name, + exec_strategy=exec_strategy, + build_strategy=build_strategy) + else: + exe = fluid.Executor(place=place) + + if batch_size is not None: + batch_size *= fluid.core.get_cuda_device_count() + begin = time.time() + first_loss, = run_executor( + exe=exe, feed=feed_dict, fetch_list=[loss.name]) + first_loss = np.array(first_loss) + + for i in xrange(iter): + run_executor(exe=exe, feed=feed_dict, fetch_list=[]) + + last_loss, = run_executor( + exe=exe, feed=feed_dict, fetch_list=[loss.name]) + end = time.time() + + if batch_size is not None: + print "%.4f Instance per second" % ( + (batch_size * iter + 2) / (end - begin)) + + last_loss = np.array(last_loss) + + print first_loss, last_loss + # self.assertGreater(first_loss[0], last_loss[0]) + return first_loss, last_loss diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index 08479202f637a2102d163f306e3e17bcfa873482..2314bb2ed8a4eeb34752fd5d040f8a8476798aa6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -12,19 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import time import unittest +from multiprocessing import Process + +import numpy import paddle.fluid as fluid -import paddle.fluid.core as core import paddle.fluid.layers as layers -import numpy -from multiprocessing import Process -from threading import Thread -import os, sys -import time class TestSendOp(unittest.TestCase): + @unittest.skip( + "This test is buggy. We cannot use time.sleep to sync processes, the connection may fail in unittest." + ) def test_send(self): # Run init_serv in a thread place = fluid.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py index 66e3e2d51d118756d4881955b4df8eb4d2bbc094..533d8ccfac82a2e298af16181ab16bf7aa3db282 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py @@ -50,5 +50,27 @@ class TestFillConstantBatchSizeLikeWhenSecondDimIsBatchSize(OpTest): self.check_output() +class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest): + def setUp(self): + self.op_type = "fill_constant_batch_size_like" + self.inputs = { + 'Input': (np.random.random((31, 28)).astype("float32"), + [[0, 9, 23, 31]]) + } + self.attrs = { + 'value': 3.5, + 'shape': [-1, 16], + 'input_dim_idx': 0, + 'output_dim_idx': 0 + } + + out = np.random.random((3, 16)).astype("float32") + out.fill(3.5) + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index c5414abf0fee6b686dccf7c97e9c6d5408ecf62a..60dc1f83fc32e2551eb2a04ef35f1c8a0ffec769 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -369,6 +369,16 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) + def test_upsampling_bilinear2d(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[3, 9, 6], dtype="float32") + output = layers.upsampling_bilinear2d(x, out_shape=[12, 12]) + self.assertIsNotNone(output) + output = layers.upsampling_bilinear2d(x, scale=3) + self.assertIsNotNone(output) + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index 779ae388f04496a7be9a6d5aa4e39b8245022925..8b15aa6822aee7bb4d53dcf1d87565fae5504821 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -63,7 +63,10 @@ class TestOperator(unittest.TestCase): self.assertEqual(mul_op.output("Out"), ["mul.out"]) self.assertEqual( set(mul_op.attr_names), - set(["x_num_col_dims", "y_num_col_dims", "use_mkldnn"])) + set([ + "x_num_col_dims", "y_num_col_dims", "use_mkldnn", "op_role", + "op_role_var" + ])) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) self.assertEqual(mul_op.attr("x_num_col_dims"), 1) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py deleted file mode 100644 index 056f9e1781997aa1586d972874b652d5b725fe3f..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ /dev/null @@ -1,902 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import unittest - -import paddle.fluid as fluid -import paddle -import paddle.dataset.mnist as mnist -import paddle.dataset.wmt16 as wmt16 - - -def simple_fc_net(use_feed): - if use_feed: - img = fluid.layers.data(name='image', shape=[784], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - else: - reader = fluid.layers.open_files( - filenames=['./mnist.recordio'], - shapes=[[-1, 784], [-1, 1]], - lod_levels=[0, 0], - dtypes=['float32', 'int64'], - thread_num=1, - for_parallel=True) - reader = fluid.layers.io.double_buffer(reader) - img, label = fluid.layers.read_file(reader) - hidden = img - for _ in xrange(4): - hidden = fluid.layers.fc( - hidden, - size=200, - act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) - prediction = fluid.layers.fc(hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) - loss = fluid.layers.mean(loss) - return loss - - -def fc_with_batchnorm(use_feed): - if use_feed: - img = fluid.layers.data(name='image', shape=[784], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - else: - reader = fluid.layers.open_files( - filenames=['mnist.recordio'], - shapes=[[-1, 784], [-1, 1]], - lod_levels=[0, 0], - dtypes=['float32', 'int64'], - thread_num=1, - for_parallel=True) - reader = fluid.layers.io.double_buffer(reader) - img, label = fluid.layers.read_file(reader) - - hidden = img - for _ in xrange(1): - hidden = fluid.layers.fc( - hidden, - size=200, - act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) - - hidden = fluid.layers.batch_norm(input=hidden) - - prediction = fluid.layers.fc(hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) - loss = fluid.layers.mean(loss) - return loss - - -def squeeze_excitation(input, num_channels, reduction_ratio): - # pool = fluid.layers.pool2d( - # input=input, pool_size=0, pool_type='avg', global_pooling=True) - conv = input - shape = conv.shape - reshape = fluid.layers.reshape( - x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) - pool = fluid.layers.reduce_mean(input=reshape, dim=2) - - squeeze = fluid.layers.fc(input=pool, - size=num_channels / reduction_ratio, - act='relu') - excitation = fluid.layers.fc(input=squeeze, - size=num_channels, - act='sigmoid') - scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) - return scale - - -def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, - act=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) / 2, - groups=groups, - act=None, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act, momentum=0.1) - - -def shortcut(input, ch_out, stride): - ch_in = input.shape[1] - if ch_in != ch_out: - if stride == 1: - filter_size = 1 - else: - filter_size = 3 - return conv_bn_layer(input, ch_out, filter_size, stride) - else: - return input - - -def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): - # The number of first 1x1 convolutional channels for each bottleneck build block - # was halved to reduce the compution cost. - conv0 = conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu') - conv1 = conv_bn_layer( - input=conv0, - num_filters=num_filters * 2, - filter_size=3, - stride=stride, - groups=cardinality, - act='relu') - conv2 = conv_bn_layer( - input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) - scale = squeeze_excitation( - input=conv2, - num_channels=num_filters * 2, - reduction_ratio=reduction_ratio) - - short = shortcut(input, num_filters * 2, stride) - - return fluid.layers.elementwise_add(x=short, y=scale, act='relu') - - -def SE_ResNeXt50Small(batch_size=2, use_feed=False): - assert not use_feed, "SE_ResNeXt doesn't support feed yet" - - img = fluid.layers.fill_constant( - shape=[batch_size, 3, 224, 224], dtype='float32', value=0.0) - label = fluid.layers.fill_constant( - shape=[batch_size, 1], dtype='int64', value=0.0) - - conv = conv_bn_layer( - input=img, num_filters=16, filter_size=3, stride=2, act='relu') - conv = conv_bn_layer( - input=conv, num_filters=16, filter_size=3, stride=1, act='relu') - conv = conv_bn_layer( - input=conv, num_filters=16, filter_size=3, stride=1, act='relu') - conv = fluid.layers.pool2d( - input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - - cardinality = 32 - reduction_ratio = 16 - depth = [3, 4, 6, 3] - num_filters = [128, 256, 512, 1024] - - for block in range(len(depth)): - for i in range(depth[block]): - conv = bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - cardinality=cardinality, - reduction_ratio=reduction_ratio) - - shape = conv.shape - reshape = fluid.layers.reshape( - x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) - pool = fluid.layers.reduce_mean(input=reshape, dim=2) - dropout = fluid.layers.dropout(x=pool, dropout_prob=0.2) - # Classifier layer: - prediction = fluid.layers.fc(input=dropout, size=1000, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) - loss = fluid.layers.mean(loss) - return loss - - -import time - - -class TestParallelExecutorBase(unittest.TestCase): - def check_network_convergence(self, - method, - memory_opt=True, - iter=50, - batch_size=None, - allow_op_delay=False, - feed_dict=None, - seed=None, - use_parallel_executor=True, - balance_parameter_opt_between_cards=False): - def run_executor(exe, feed, fetch_list, program=None): - if isinstance(exe, fluid.ParallelExecutor): - res = exe.run(fetch_list=fetch_list, feed=feed) - elif isinstance(exe, fluid.Executor): - if program is None: - program = fluid.default_main_program() - res = exe.run(program=program, feed=feed, fetch_list=fetch_list) - else: - raise ValueError('Unkown type exe') - return res - - main = fluid.Program() - startup = fluid.Program() - startup.random_seed = 1 # Fix random seed - with fluid.program_guard(main, startup): - if seed is not None: - startup.random_seed = seed - loss = method(use_feed=feed_dict is not None) - adam = fluid.optimizer.Adam() - adam.minimize(loss) - if memory_opt: - fluid.memory_optimize(main) - place = fluid.CUDAPlace(0) - startup_exe = fluid.Executor(place) - startup_exe.run(startup) - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.allow_op_delay = allow_op_delay - - build_strategy = fluid.BuildStrategy() - build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce - - if use_parallel_executor: - exe = fluid.ParallelExecutor( - True, - loss_name=loss.name, - exec_strategy=exec_strategy, - build_strategy=build_strategy) - else: - exe = fluid.Executor(place=place) - - if batch_size is not None: - batch_size *= fluid.core.get_cuda_device_count() - begin = time.time() - first_loss, = run_executor( - exe=exe, feed=feed_dict, fetch_list=[loss.name]) - first_loss = np.array(first_loss) - - for i in xrange(iter): - run_executor(exe=exe, feed=feed_dict, fetch_list=[]) - - last_loss, = run_executor( - exe=exe, feed=feed_dict, fetch_list=[loss.name]) - end = time.time() - - if batch_size is not None: - print "%.4f Instance per second" % ( - (batch_size * iter + 2) / (end - begin)) - - last_loss = np.array(last_loss) - - print first_loss, last_loss - # self.assertGreater(first_loss[0], last_loss[0]) - return first_loss, last_loss - - -class TestMNIST(TestParallelExecutorBase): - @classmethod - def setUpClass(cls): - # Convert mnist to recordio file - with fluid.program_guard(fluid.Program(), fluid.Program()): - reader = paddle.batch(mnist.train(), batch_size=4) - feeder = fluid.DataFeeder( - feed_list=[ # order is image and label - fluid.layers.data( - name='image', shape=[784]), - fluid.layers.data( - name='label', shape=[1], dtype='int64'), - ], - place=fluid.CPUPlace()) - fluid.recordio_writer.convert_reader_to_recordio_file( - './mnist.recordio', reader, feeder) - - def check_simple_fc_convergence(self, balance_parameter_opt_between_cards): - self.check_network_convergence(simple_fc_net) - self.check_network_convergence(simple_fc_net, allow_op_delay=True) - - img = np.zeros(shape=[32, 784], dtype='float32') - label = np.ones(shape=[32, 1], dtype='int64') - self.check_network_convergence( - simple_fc_net, - feed_dict={"image": img, - "label": label}, - balance_parameter_opt_between_cards=balance_parameter_opt_between_cards - ) - - def test_simple_fc(self): - self.check_simple_fc_convergence(False) - - def test_simple_fc_with_new_strategy(self): - self.check_simple_fc_convergence(True) - - def check_simple_fc_parallel_accuracy(self, - balance_parameter_opt_between_cards): - img = np.zeros(shape=[32, 784], dtype='float32') - label = np.ones(shape=[32, 1], dtype='int64') - single_first_loss, single_last_loss = self.check_network_convergence( - method=simple_fc_net, - seed=1000, - feed_dict={"image": img, - "label": label}, - use_parallel_executor=False) - parallel_first_loss, parallel_last_loss = self.check_network_convergence( - method=simple_fc_net, - seed=1000, - feed_dict={"image": img, - "label": label}, - use_parallel_executor=True, - balance_parameter_opt_between_cards=balance_parameter_opt_between_cards - ) - - for p_f in parallel_first_loss: - self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6) - for p_l in parallel_last_loss: - self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6) - - def test_simple_fc_parallel_accuracy(self): - self.check_simple_fc_parallel_accuracy(False) - - def test_simple_fc_parallel_accuracy_with_new_strategy(self): - self.check_simple_fc_parallel_accuracy(True) - - def check_batchnorm_fc_convergence(self, - balance_parameter_opt_between_cards): - self.check_network_convergence(fc_with_batchnorm) - img = np.zeros(shape=[32, 784], dtype='float32') - label = np.ones(shape=[32, 1], dtype='int64') - self.check_network_convergence( - fc_with_batchnorm, - feed_dict={"image": img, - "label": label}, - balance_parameter_opt_between_cards=balance_parameter_opt_between_cards - ) - - def test_batchnorm_fc(self): - self.check_batchnorm_fc_convergence(False) - - def test_batchnorm_fc_with_new_strategy(self): - self.check_batchnorm_fc_convergence(True) - - -class TestResnet(TestParallelExecutorBase): - # @classmethod - # def setUpClass(cls): - # # import os - # # if os.path.exists('./flowers.recordio'): - # # return - # with fluid.program_guard(fluid.Program(), fluid.Program()): - # reader = paddle.batch(flowers.train(), batch_size=4) - # feeder = fluid.DataFeeder( - # feed_list=[ - # fluid.layers.data( - # name='image', shape=[3, 224, 224]), - # fluid.layers.data( - # name='label', shape=[1], dtype='int64'), - # ], - # place=fluid.CPUPlace()) - # fluid.recordio_writer.convert_reader_to_recordio_file( - # "./flowers.recordio", reader, feeder, compressor=fluid.core.RecordIOWriter.Compressor.NoCompress) - - def check_resnet_convergence(self, balance_parameter_opt_between_cards): - import functools - batch_size = 2 - self.check_network_convergence( - functools.partial( - SE_ResNeXt50Small, batch_size=batch_size), - iter=20, - batch_size=batch_size, - balance_parameter_opt_between_cards=balance_parameter_opt_between_cards - ) - - def test_resnet(self): - self.check_resnet_convergence(False) - - def test_resnet_with_new_strategy(self): - self.check_resnet_convergence(True) - - -class ModelHyperParams(object): - # Dictionary size for source and target language. This model directly uses - # paddle.dataset.wmt16 in which , and token has - # alreay been added, but the token is not added. Transformer requires - # sequences in a mini-batch are padded to have the same length. A token is - # added into the original dictionary in paddle.dateset.wmt16. - - # size of source word dictionary. - src_vocab_size = 10000 - # index for token in source language. - src_pad_idx = src_vocab_size - - # size of target word dictionay - trg_vocab_size = 10000 - # index for token in target language. - trg_pad_idx = trg_vocab_size - - # position value corresponding to the token. - pos_pad_idx = 0 - - # max length of sequences. It should plus 1 to include position - # padding token for position encoding. - max_length = 50 - - # the dimension for word embeddings, which is also the last dimension of - # the input and output of multi-head attention, position-wise feed-forward - # networks, encoder and decoder. - - d_model = 512 - # size of the hidden layer in position-wise feed-forward networks. - d_inner_hid = 1024 - # the dimension that keys are projected to for dot-product attention. - d_key = 64 - # the dimension that values are projected to for dot-product attention. - d_value = 64 - # number of head used in multi-head attention. - n_head = 8 - # number of sub-layers to be stacked in the encoder and decoder. - n_layer = 6 - # dropout rate used by all dropout layers. - dropout = 0.1 - - -def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head): - """ - Pad the instances to the max sequence length in batch, and generate the - corresponding position data and attention bias. Then, convert the numpy - data to tensors and return a dict mapping names to tensors. - """ - - def __pad_batch_data(insts, - pad_idx, - is_target=False, - return_pos=True, - return_attn_bias=True, - return_max_len=True): - """ - Pad the instances to the max sequence length in batch, and generate the - corresponding position data and attention bias. - """ - return_list = [] - max_len = max(len(inst) for inst in insts) - inst_data = np.array( - [inst + [pad_idx] * (max_len - len(inst)) for inst in insts]) - return_list += [inst_data.astype("int64").reshape([-1, 1])] - if return_pos: - inst_pos = np.array([[ - pos_i + 1 if w_i != pad_idx else 0 - for pos_i, w_i in enumerate(inst) - ] for inst in inst_data]) - - return_list += [inst_pos.astype("int64").reshape([-1, 1])] - if return_attn_bias: - if is_target: - # This is used to avoid attention on paddings and subsequent - # words. - slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, - max_len)) - slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape( - [-1, 1, max_len, max_len]) - slf_attn_bias_data = np.tile(slf_attn_bias_data, - [1, n_head, 1, 1]) * [-1e9] - else: - # This is used to avoid attention on paddings. - slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * - (max_len - len(inst)) - for inst in insts]) - slf_attn_bias_data = np.tile( - slf_attn_bias_data.reshape([-1, 1, 1, max_len]), - [1, n_head, max_len, 1]) - return_list += [slf_attn_bias_data.astype("float32")] - if return_max_len: - return_list += [max_len] - return return_list if len(return_list) > 1 else return_list[0] - - def data_to_tensor(data_list, name_list, input_dict, place): - assert len(data_list) == len(name_list) - for i in range(len(name_list)): - tensor = fluid.LoDTensor() - tensor.set(data_list[i], place) - input_dict[name_list[i]] = tensor - - src_word, src_pos, src_slf_attn_bias, src_max_len = __pad_batch_data( - [inst[0] for inst in insts], src_pad_idx, is_target=False) - trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = __pad_batch_data( - [inst[1] for inst in insts], trg_pad_idx, is_target=True) - trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :], - [1, 1, trg_max_len, 1]).astype("float32") - lbl_word = __pad_batch_data([inst[2] for inst in insts], trg_pad_idx, False, - False, False, False) - lbl_weight = (lbl_word != trg_pad_idx).astype("float32").reshape([-1, 1]) - - return [ - src_word, src_pos, trg_word, trg_pos, src_slf_attn_bias, - trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight - ] - - -import transformer_model - - -def transformer(use_feed): - assert not use_feed, "transfomer doesn't support feed yet" - return transformer_model.transformer( - ModelHyperParams.src_vocab_size + 1, - ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1, - ModelHyperParams.n_layer, ModelHyperParams.n_head, - ModelHyperParams.d_key, ModelHyperParams.d_value, - ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, - ModelHyperParams.dropout, ModelHyperParams.src_pad_idx, - ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx) - - -class TestTransformer(TestParallelExecutorBase): - @classmethod - def setUpClass(cls): - reader = paddle.batch( - wmt16.train(ModelHyperParams.src_vocab_size, - ModelHyperParams.trg_vocab_size), - batch_size=transformer_model.batch_size) - - with fluid.recordio_writer.create_recordio_writer( - "./wmt16.recordio") as writer: - for batch in reader(): - for tensor in prepare_batch_input( - batch, ModelHyperParams.src_pad_idx, - ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): - t = fluid.LoDTensor() - t.set(tensor, fluid.CPUPlace()) - writer.append_tensor(t) - writer.complete_append_tensor() - - @unittest.skip("transformer is buggy in multi gpu") - def test_main(self): - self.check_network_convergence(transformer) - - -class ParallelExecutorTestingDuringTraining(unittest.TestCase): - def check_network_convergence(self, build_strategy=None): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - loss = simple_fc_net(True) - test_program = main.clone(for_test=True) - - opt = fluid.optimizer.SGD(learning_rate=0.001) - opt.minimize(loss) - - batch_size = 32 - image = np.random.normal(size=(batch_size, 784)).astype('float32') - label = np.random.randint(0, 10, (batch_size, 1), dtype="int64") - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(startup) - feed_dict = {'image': image, 'label': label} - - train_exe = fluid.ParallelExecutor( - use_cuda=True, - loss_name=loss.name, - main_program=main, - build_strategy=build_strategy) - - test_exe = fluid.ParallelExecutor( - use_cuda=True, - main_program=test_program, - share_vars_from=train_exe, - build_strategy=build_strategy) - - for i in xrange(5): - test_loss, = test_exe.run([loss.name], feed=feed_dict) - test_loss = np.array(test_loss) - - train_loss, = train_exe.run([loss.name], feed=feed_dict) - train_loss = np.array(train_loss) - self.assertTrue( - np.allclose( - train_loss, test_loss, atol=1e-8), - "Train loss: " + str(train_loss) + "\n Test loss:" + - str(test_loss)) - - def test_parallel_testing(self): - build_strategy = fluid.BuildStrategy() - build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce - self.check_network_convergence(build_strategy) - - def test_parallel_testing_with_new_strategy(self): - build_strategy = fluid.BuildStrategy() - build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce - self.check_network_convergence(build_strategy) - - -import paddle.dataset.conll05 as conll05 -import paddle.fluid as fluid - -word_dict, verb_dict, label_dict = conll05.get_dict() -word_dict_len = len(word_dict) -label_dict_len = len(label_dict) -pred_dict_len = len(verb_dict) -mark_dict_len = 2 -word_dim = 32 -mark_dim = 5 -hidden_dim = 512 -depth = 8 -mix_hidden_lr = 1e-3 -embedding_name = 'emb' - - -def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, - is_sparse, **ignored): - # 8 features - predicate_embedding = fluid.layers.embedding( - input=predicate, - is_sparse=is_sparse, - size=[pred_dict_len, word_dim], - dtype='float32', - param_attr='vemb') - - mark_embedding = fluid.layers.embedding( - input=mark, - is_sparse=is_sparse, - size=[mark_dict_len, mark_dim], - dtype='float32') - - word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] - emb_layers = [ - fluid.layers.embedding( - size=[word_dict_len, word_dim], - is_sparse=is_sparse, - input=x, - param_attr=fluid.ParamAttr( - name=embedding_name, trainable=False)) for x in word_input - ] - emb_layers.append(predicate_embedding) - emb_layers.append(mark_embedding) - - hidden_0_layers = [ - fluid.layers.fc(input=emb, size=hidden_dim, act='tanh') - for emb in emb_layers - ] - - hidden_0 = fluid.layers.sums(input=hidden_0_layers) - - lstm_0 = fluid.layers.dynamic_lstm( - input=hidden_0, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid') - - # stack L-LSTM and R-LSTM with direct edges - input_tmp = [hidden_0, lstm_0] - - for i in range(1, depth): - mix_hidden = fluid.layers.sums(input=[ - fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'), - fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh') - ]) - - lstm = fluid.layers.dynamic_lstm( - input=mix_hidden, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - is_reverse=((i % 2) == 1)) - - input_tmp = [mix_hidden, lstm] - - feature_out = fluid.layers.sums(input=[ - fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'), - fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh') - ]) - - return feature_out - - -class TestCRFModel(unittest.TestCase): - def check_network_convergence(self, is_sparse, build_strategy=None): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - word = fluid.layers.data( - name='word_data', shape=[1], dtype='int64', lod_level=1) - predicate = fluid.layers.data( - name='verb_data', shape=[1], dtype='int64', lod_level=1) - ctx_n2 = fluid.layers.data( - name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) - ctx_n1 = fluid.layers.data( - name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) - ctx_0 = fluid.layers.data( - name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) - ctx_p1 = fluid.layers.data( - name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) - ctx_p2 = fluid.layers.data( - name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) - mark = fluid.layers.data( - name='mark_data', shape=[1], dtype='int64', lod_level=1) - - feature_out = db_lstm(**locals()) - target = fluid.layers.data( - name='target', shape=[1], dtype='int64', lod_level=1) - crf_cost = fluid.layers.linear_chain_crf( - input=feature_out, - label=target, - param_attr=fluid.ParamAttr( - name='crfw', learning_rate=1e-1)) - avg_cost = fluid.layers.mean(crf_cost) - - sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.01, - decay_steps=100000, - decay_rate=0.5, - staircase=True)) - sgd_optimizer.minimize(avg_cost) - - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.conll05.test(), buf_size=8192), - batch_size=16) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(startup) - - pe = fluid.ParallelExecutor( - use_cuda=True, - loss_name=avg_cost.name, - build_strategy=build_strategy) - - feeder = fluid.DataFeeder( - feed_list=[ - word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, - mark, target - ], - place=fluid.CPUPlace()) - - data = train_data() - for i in xrange(10): - cur_batch = next(data) - print map(np.array, - pe.run(feed=feeder.feed(cur_batch), - fetch_list=[avg_cost.name]))[0] - - def test_update_sparse_parameter_all_reduce(self): - build_strategy = fluid.BuildStrategy() - build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce - self.check_network_convergence( - is_sparse=True, build_strategy=build_strategy) - - def test_update_dense_parameter_all_reduce(self): - build_strategy = fluid.BuildStrategy() - build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce - self.check_network_convergence( - is_sparse=False, build_strategy=build_strategy) - - def test_update_sparse_parameter_reduce(self): - build_strategy = fluid.BuildStrategy() - build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce - self.check_network_convergence( - is_sparse=True, build_strategy=build_strategy) - - def test_update_dense_parameter_reduce(self): - build_strategy = fluid.BuildStrategy() - build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce - self.check_network_convergence( - is_sparse=False, build_strategy=build_strategy) - - -# test fetch all the variables of global_block - -import paddle.dataset.flowers as flowers -import math - - -def Lenet(data, class_dim): - conv1 = fluid.layers.conv2d(data, 32, 5, 1, act=None) - bn1 = fluid.layers.batch_norm(conv1, act='relu') - pool1 = fluid.layers.pool2d(bn1, 2, 'max', 2) - conv2 = fluid.layers.conv2d(pool1, 50, 5, 1, act=None) - bn2 = fluid.layers.batch_norm(conv2, act='relu') - pool2 = fluid.layers.pool2d(bn2, 2, 'max', 2) - - fc1 = fluid.layers.fc(pool2, size=500, act='relu') - fc2 = fluid.layers.fc(fc1, size=class_dim, act='softmax') - - return fc2 - - -class TestFetchOp(unittest.TestCase): - def parallel_exe(self, train_inputs, seed): - main = fluid.Program() - startup = fluid.Program() - startup.random_seed = seed - with fluid.program_guard(main, startup): - data = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - out = Lenet(data, class_dim=102) - loss = fluid.layers.cross_entropy(input=out, label=label) - loss = fluid.layers.mean(loss) - - opt = fluid.optimizer.Momentum( - learning_rate=0.1, - momentum=0.9, - regularization=fluid.regularizer.L2Decay(1e-4)) - - opt.minimize(loss) - - # TODO(zcd): I found that onece the memory optimizer is open, - # parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD, - # conv2d_1.b_0@GRAD. Those variables should not be pruned. - # fluid.memory_optimize(main) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(startup) - - feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) - pe = fluid.ParallelExecutor( - use_cuda=True, loss_name=loss.name, main_program=main) - - fetch_list = [] - all_vars = main.global_block().vars - for k, v in all_vars.iteritems(): - if 'tmp' not in k and k[0] is not '_' or v.persistable: - fetch_list.append(k) - - for data in train_inputs: - ret = pe.run(fetch_list, feed=feeder.feed(data)) - for i in range(len(fetch_list)): - assert not math.isnan(np.sum(ret[i])) and \ - not math.isinf(np.sum(ret[i])) - - def test_fetch_op(self): - tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16) - tst_reader_iter = tst_reader() - - iters = 3 - train_inputs = [] - for i in range(iters): - train_inputs.append(tst_reader_iter.next()) - - self.parallel_exe(train_inputs, seed=1) - - -class TestFeedParallel(unittest.TestCase): - def test_main(self): - main = fluid.Program() - startup = fluid.Program() - startup.random_seed = 1 - with fluid.scope_guard(fluid.core.Scope()): - with fluid.program_guard(main, startup): - data = fluid.layers.data( - name='image', shape=[3, 224, 224], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - out = Lenet(data, class_dim=102) - loss = fluid.layers.cross_entropy(input=out, label=label) - loss = fluid.layers.mean(loss) - opt = fluid.optimizer.Momentum( - learning_rate=0.1, - momentum=0.9, - regularization=fluid.regularizer.L2Decay(1e-4)) - - opt.minimize(loss) - place = fluid.CUDAPlace(0) - feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) - reader = feeder.decorate_reader( - paddle.batch( - flowers.train(), batch_size=16), multi_devices=True) - exe = fluid.Executor(place) - exe.run(startup) - pe = fluid.ParallelExecutor( - use_cuda=True, loss_name=loss.name, main_program=main) - - for batch_id, data in enumerate(reader()): - loss_np = np.array(pe.run(feed=data, fetch_list=[loss.name])[0]) - print batch_id, loss_np - if batch_id == 2: - break - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py new file mode 100644 index 0000000000000000000000000000000000000000..66e138b03f3b170aca4fb2207438eb9af1783c33 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py @@ -0,0 +1,197 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.dataset.conll05 as conll05 +import paddle.fluid as fluid +import unittest +import paddle +import numpy as np + +word_dict, verb_dict, label_dict = conll05.get_dict() +word_dict_len = len(word_dict) +label_dict_len = len(label_dict) +pred_dict_len = len(verb_dict) +mark_dict_len = 2 +word_dim = 32 +mark_dim = 5 +hidden_dim = 512 +depth = 8 +mix_hidden_lr = 1e-3 +embedding_name = 'emb' + + +def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, + is_sparse, **ignored): + # 8 features + predicate_embedding = fluid.layers.embedding( + input=predicate, + is_sparse=is_sparse, + size=[pred_dict_len, word_dim], + dtype='float32', + param_attr='vemb') + + mark_embedding = fluid.layers.embedding( + input=mark, + is_sparse=is_sparse, + size=[mark_dict_len, mark_dim], + dtype='float32') + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + fluid.layers.embedding( + size=[word_dict_len, word_dim], + is_sparse=is_sparse, + input=x, + param_attr=fluid.ParamAttr( + name=embedding_name, trainable=False)) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0_layers = [ + fluid.layers.fc(input=emb, size=hidden_dim, act='tanh') + for emb in emb_layers + ] + + hidden_0 = fluid.layers.sums(input=hidden_0_layers) + + lstm_0 = fluid.layers.dynamic_lstm( + input=hidden_0, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') + + # stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'), + fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh') + ]) + + lstm = fluid.layers.dynamic_lstm( + input=mix_hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) + + input_tmp = [mix_hidden, lstm] + + feature_out = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'), + fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh') + ]) + + return feature_out + + +class TestCRFModel(unittest.TestCase): + def check_network_convergence(self, is_sparse, build_strategy=None): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + word = fluid.layers.data( + name='word_data', shape=[1], dtype='int64', lod_level=1) + predicate = fluid.layers.data( + name='verb_data', shape=[1], dtype='int64', lod_level=1) + ctx_n2 = fluid.layers.data( + name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) + ctx_n1 = fluid.layers.data( + name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) + ctx_0 = fluid.layers.data( + name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) + ctx_p1 = fluid.layers.data( + name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) + ctx_p2 = fluid.layers.data( + name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) + mark = fluid.layers.data( + name='mark_data', shape=[1], dtype='int64', lod_level=1) + + feature_out = db_lstm(**locals()) + target = fluid.layers.data( + name='target', shape=[1], dtype='int64', lod_level=1) + crf_cost = fluid.layers.linear_chain_crf( + input=feature_out, + label=target, + param_attr=fluid.ParamAttr( + name='crfw', learning_rate=1e-1)) + avg_cost = fluid.layers.mean(crf_cost) + + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.exponential_decay( + learning_rate=0.01, + decay_steps=100000, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=16) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(startup) + + pe = fluid.ParallelExecutor( + use_cuda=True, + loss_name=avg_cost.name, + build_strategy=build_strategy) + + feeder = fluid.DataFeeder( + feed_list=[ + word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, + mark, target + ], + place=fluid.CPUPlace()) + + data = train_data() + for i in xrange(10): + cur_batch = next(data) + print map(np.array, + pe.run(feed=feeder.feed(cur_batch), + fetch_list=[avg_cost.name]))[0] + + def test_update_sparse_parameter_all_reduce(self): + build_strategy = fluid.BuildStrategy() + build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce + self.check_network_convergence( + is_sparse=True, build_strategy=build_strategy) + + def test_update_dense_parameter_all_reduce(self): + build_strategy = fluid.BuildStrategy() + build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce + self.check_network_convergence( + is_sparse=False, build_strategy=build_strategy) + + def test_update_sparse_parameter_reduce(self): + build_strategy = fluid.BuildStrategy() + build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce + self.check_network_convergence( + is_sparse=True, build_strategy=build_strategy) + + def test_update_dense_parameter_reduce(self): + build_strategy = fluid.BuildStrategy() + build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce + self.check_network_convergence( + is_sparse=False, build_strategy=build_strategy) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py new file mode 100644 index 0000000000000000000000000000000000000000..24f8d28c0304a77a99213374b25d0db728eca265 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py @@ -0,0 +1,132 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.dataset.flowers as flowers +import math +import paddle.fluid as fluid +import unittest +import numpy as np +import paddle + + +def Lenet(data, class_dim): + conv1 = fluid.layers.conv2d(data, 32, 5, 1, act=None) + bn1 = fluid.layers.batch_norm(conv1, act='relu') + pool1 = fluid.layers.pool2d(bn1, 2, 'max', 2) + conv2 = fluid.layers.conv2d(pool1, 50, 5, 1, act=None) + bn2 = fluid.layers.batch_norm(conv2, act='relu') + pool2 = fluid.layers.pool2d(bn2, 2, 'max', 2) + + fc1 = fluid.layers.fc(pool2, size=500, act='relu') + fc2 = fluid.layers.fc(fc1, size=class_dim, act='softmax') + + return fc2 + + +class TestFetchOp(unittest.TestCase): + def parallel_exe(self, train_inputs, seed): + main = fluid.Program() + startup = fluid.Program() + startup.random_seed = seed + with fluid.program_guard(main, startup): + data = fluid.layers.data( + name='image', shape=[3, 224, 224], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + out = Lenet(data, class_dim=102) + loss = fluid.layers.cross_entropy(input=out, label=label) + loss = fluid.layers.mean(loss) + + opt = fluid.optimizer.Momentum( + learning_rate=0.1, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + + opt.minimize(loss) + + # TODO(zcd): I found that onece the memory optimizer is open, + # parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD, + # conv2d_1.b_0@GRAD. Those variables should not be pruned. + # fluid.memory_optimize(main) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(startup) + + feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) + pe = fluid.ParallelExecutor( + use_cuda=True, loss_name=loss.name, main_program=main) + + fetch_list = [] + all_vars = main.global_block().vars + for k, v in all_vars.iteritems(): + if 'tmp' not in k and k[0] is not '_' or v.persistable: + fetch_list.append(k) + + for data in train_inputs: + ret = pe.run(fetch_list, feed=feeder.feed(data)) + for i in range(len(fetch_list)): + assert not math.isnan(np.sum(ret[i])) and \ + not math.isinf(np.sum(ret[i])) + + def test_fetch_op(self): + tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16) + tst_reader_iter = tst_reader() + + iters = 3 + train_inputs = [] + for i in range(iters): + train_inputs.append(tst_reader_iter.next()) + + self.parallel_exe(train_inputs, seed=1) + + +class TestFeedParallel(unittest.TestCase): + def test_main(self): + main = fluid.Program() + startup = fluid.Program() + startup.random_seed = 1 + with fluid.scope_guard(fluid.core.Scope()): + with fluid.program_guard(main, startup): + data = fluid.layers.data( + name='image', shape=[3, 224, 224], dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int64') + out = Lenet(data, class_dim=102) + loss = fluid.layers.cross_entropy(input=out, label=label) + loss = fluid.layers.mean(loss) + opt = fluid.optimizer.Momentum( + learning_rate=0.1, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + + opt.minimize(loss) + place = fluid.CUDAPlace(0) + feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) + reader = feeder.decorate_reader( + paddle.batch( + flowers.train(), batch_size=16), multi_devices=True) + exe = fluid.Executor(place) + exe.run(startup) + pe = fluid.ParallelExecutor( + use_cuda=True, loss_name=loss.name, main_program=main) + + for batch_id, data in enumerate(reader()): + loss_np = np.array(pe.run(feed=data, fetch_list=[loss.name])[0]) + print batch_id, loss_np + if batch_id == 2: + break + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..015703c3e25f4e11e64ab6a7de99da12bee608f6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -0,0 +1,171 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from parallel_executor_test_base import TestParallelExecutorBase +import paddle.fluid as fluid +import numpy as np +import paddle +import paddle.dataset.mnist as mnist +import unittest + +MNIST_RECORDIO_FILE = "./mnist_test_pe.recordio" + + +def simple_fc_net(use_feed): + if use_feed: + img = fluid.layers.data(name='image', shape=[784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + else: + reader = fluid.layers.open_files( + filenames=[MNIST_RECORDIO_FILE], + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64'], + thread_num=1, + for_parallel=True) + reader = fluid.layers.io.double_buffer(reader) + img, label = fluid.layers.read_file(reader) + hidden = img + for _ in xrange(4): + hidden = fluid.layers.fc( + hidden, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + prediction = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + +def fc_with_batchnorm(use_feed): + if use_feed: + img = fluid.layers.data(name='image', shape=[784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + else: + reader = fluid.layers.open_files( + filenames=[MNIST_RECORDIO_FILE], + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64'], + thread_num=1, + for_parallel=True) + reader = fluid.layers.io.double_buffer(reader) + img, label = fluid.layers.read_file(reader) + + hidden = img + for _ in xrange(1): + hidden = fluid.layers.fc( + hidden, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + + hidden = fluid.layers.batch_norm(input=hidden) + + prediction = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + +class TestMNIST(TestParallelExecutorBase): + @classmethod + def setUpClass(cls): + # Convert mnist to recordio file + with fluid.program_guard(fluid.Program(), fluid.Program()): + reader = paddle.batch(mnist.train(), batch_size=4) + feeder = fluid.DataFeeder( + feed_list=[ # order is image and label + fluid.layers.data( + name='image', shape=[784]), + fluid.layers.data( + name='label', shape=[1], dtype='int64'), + ], + place=fluid.CPUPlace()) + fluid.recordio_writer.convert_reader_to_recordio_file( + MNIST_RECORDIO_FILE, reader, feeder) + + def check_simple_fc_convergence(self, balance_parameter_opt_between_cards): + self.check_network_convergence(simple_fc_net) + self.check_network_convergence(simple_fc_net, allow_op_delay=True) + + img = np.zeros(shape=[32, 784], dtype='float32') + label = np.ones(shape=[32, 1], dtype='int64') + self.check_network_convergence( + simple_fc_net, + feed_dict={"image": img, + "label": label}, + balance_parameter_opt_between_cards=balance_parameter_opt_between_cards + ) + + def test_simple_fc(self): + self.check_simple_fc_convergence(False) + + def test_simple_fc_with_new_strategy(self): + self.check_simple_fc_convergence(True) + + def check_simple_fc_parallel_accuracy(self, + balance_parameter_opt_between_cards): + img = np.zeros(shape=[32, 784], dtype='float32') + label = np.ones(shape=[32, 1], dtype='int64') + single_first_loss, single_last_loss = self.check_network_convergence( + method=simple_fc_net, + seed=1000, + feed_dict={"image": img, + "label": label}, + use_parallel_executor=False) + parallel_first_loss, parallel_last_loss = self.check_network_convergence( + method=simple_fc_net, + seed=1000, + feed_dict={"image": img, + "label": label}, + use_parallel_executor=True, + balance_parameter_opt_between_cards=balance_parameter_opt_between_cards + ) + + for p_f in parallel_first_loss: + self.assertAlmostEquals(p_f, single_first_loss[0], delta=1e-6) + for p_l in parallel_last_loss: + self.assertAlmostEquals(p_l, single_last_loss[0], delta=1e-6) + + def test_simple_fc_parallel_accuracy(self): + self.check_simple_fc_parallel_accuracy(False) + + def test_simple_fc_parallel_accuracy_with_new_strategy(self): + self.check_simple_fc_parallel_accuracy(True) + + def check_batchnorm_fc_convergence(self, + balance_parameter_opt_between_cards): + self.check_network_convergence(fc_with_batchnorm) + img = np.zeros(shape=[32, 784], dtype='float32') + label = np.ones(shape=[32, 1], dtype='int64') + self.check_network_convergence( + fc_with_batchnorm, + feed_dict={"image": img, + "label": label}, + balance_parameter_opt_between_cards=balance_parameter_opt_between_cards + ) + + def test_batchnorm_fc(self): + self.check_batchnorm_fc_convergence(False) + + def test_batchnorm_fc_with_new_strategy(self): + self.check_batchnorm_fc_convergence(True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py new file mode 100644 index 0000000000000000000000000000000000000000..a3fa140cbb7994a36d2cbee26d598165f1f771d2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py @@ -0,0 +1,152 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +from parallel_executor_test_base import TestParallelExecutorBase +import unittest + + +def squeeze_excitation(input, num_channels, reduction_ratio): + # pool = fluid.layers.pool2d( + # input=input, pool_size=0, pool_type='avg', global_pooling=True) + conv = input + shape = conv.shape + reshape = fluid.layers.reshape( + x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) + pool = fluid.layers.reduce_mean(input=reshape, dim=2) + + squeeze = fluid.layers.fc(input=pool, + size=num_channels / reduction_ratio, + act='relu') + excitation = fluid.layers.fc(input=squeeze, + size=num_channels, + act='sigmoid') + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, + act=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=conv, act=act, momentum=0.1) + + +def shortcut(input, ch_out, stride): + ch_in = input.shape[1] + if ch_in != ch_out: + if stride == 1: + filter_size = 1 + else: + filter_size = 3 + return conv_bn_layer(input, ch_out, filter_size, stride) + else: + return input + + +def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): + # The number of first 1x1 convolutional channels for each bottleneck build block + # was halved to reduce the compution cost. + conv0 = conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu') + conv1 = conv_bn_layer( + input=conv0, + num_filters=num_filters * 2, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu') + conv2 = conv_bn_layer( + input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) + scale = squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) + + short = shortcut(input, num_filters * 2, stride) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + +def SE_ResNeXt50Small(batch_size=2, use_feed=False): + assert not use_feed, "SE_ResNeXt doesn't support feed yet" + + img = fluid.layers.fill_constant( + shape=[batch_size, 3, 224, 224], dtype='float32', value=0.0) + label = fluid.layers.fill_constant( + shape=[batch_size, 1], dtype='int64', value=0.0) + + conv = conv_bn_layer( + input=img, num_filters=16, filter_size=3, stride=2, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=16, filter_size=3, stride=1, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=16, filter_size=3, stride=1, act='relu') + conv = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + cardinality = 32 + reduction_ratio = 16 + depth = [3, 4, 6, 3] + num_filters = [128, 256, 512, 1024] + + for block in range(len(depth)): + for i in range(depth[block]): + conv = bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio) + + shape = conv.shape + reshape = fluid.layers.reshape( + x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) + pool = fluid.layers.reduce_mean(input=reshape, dim=2) + dropout = fluid.layers.dropout(x=pool, dropout_prob=0.2) + # Classifier layer: + prediction = fluid.layers.fc(input=dropout, size=1000, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + +class TestResnet(TestParallelExecutorBase): + def check_resnet_convergence(self, balance_parameter_opt_between_cards): + import functools + batch_size = 2 + self.check_network_convergence( + functools.partial( + SE_ResNeXt50Small, batch_size=batch_size), + iter=20, + batch_size=batch_size, + balance_parameter_opt_between_cards=balance_parameter_opt_between_cards + ) + + def test_resnet(self): + self.check_resnet_convergence(False) + + def test_resnet_with_new_strategy(self): + self.check_resnet_convergence(True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py new file mode 100644 index 0000000000000000000000000000000000000000..93a5f767867d68110cf7b8f441cc740ecd843cf9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py @@ -0,0 +1,93 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import numpy as np +import unittest + + +def simple_fc_net(): + img = fluid.layers.data(name='image', shape=[784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + hidden = img + for _ in xrange(4): + hidden = fluid.layers.fc( + hidden, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + prediction = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + +class ParallelExecutorTestingDuringTraining(unittest.TestCase): + def check_network_convergence(self, build_strategy=None): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + loss = simple_fc_net() + test_program = main.clone(for_test=True) + + opt = fluid.optimizer.SGD(learning_rate=0.001) + opt.minimize(loss) + + batch_size = 32 + image = np.random.normal(size=(batch_size, 784)).astype('float32') + label = np.random.randint(0, 10, (batch_size, 1), dtype="int64") + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(startup) + feed_dict = {'image': image, 'label': label} + + train_exe = fluid.ParallelExecutor( + use_cuda=True, + loss_name=loss.name, + main_program=main, + build_strategy=build_strategy) + + test_exe = fluid.ParallelExecutor( + use_cuda=True, + main_program=test_program, + share_vars_from=train_exe, + build_strategy=build_strategy) + + for i in xrange(5): + test_loss, = test_exe.run([loss.name], feed=feed_dict) + test_loss = np.array(test_loss) + + train_loss, = train_exe.run([loss.name], feed=feed_dict) + train_loss = np.array(train_loss) + self.assertTrue( + np.allclose( + train_loss, test_loss, atol=1e-8), + "Train loss: " + str(train_loss) + "\n Test loss:" + + str(test_loss)) + + def test_parallel_testing(self): + build_strategy = fluid.BuildStrategy() + build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce + self.check_network_convergence(build_strategy) + + def test_parallel_testing_with_new_strategy(self): + build_strategy = fluid.BuildStrategy() + build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce + self.check_network_convergence(build_strategy) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..c81df66d987f3d3856af0e19fc935df7de2edacc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py @@ -0,0 +1,174 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import transformer_model +import numpy as np +from parallel_executor_test_base import TestParallelExecutorBase +import unittest +import paddle +import paddle.dataset.wmt16 as wmt16 + +WMT16_RECORDIO_FILE = "./wmt16_test_pe.recordio" + + +class ModelHyperParams(object): + # Dictionary size for source and target language. This model directly uses + # paddle.dataset.wmt16 in which , and token has + # alreay been added, but the token is not added. Transformer requires + # sequences in a mini-batch are padded to have the same length. A token is + # added into the original dictionary in paddle.dateset.wmt16. + + # size of source word dictionary. + src_vocab_size = 10000 + # index for token in source language. + src_pad_idx = src_vocab_size + + # size of target word dictionay + trg_vocab_size = 10000 + # index for token in target language. + trg_pad_idx = trg_vocab_size + + # position value corresponding to the token. + pos_pad_idx = 0 + + # max length of sequences. It should plus 1 to include position + # padding token for position encoding. + max_length = 50 + + # the dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + + d_model = 512 + # size of the hidden layer in position-wise feed-forward networks. + d_inner_hid = 1024 + # the dimension that keys are projected to for dot-product attention. + d_key = 64 + # the dimension that values are projected to for dot-product attention. + d_value = 64 + # number of head used in multi-head attention. + n_head = 8 + # number of sub-layers to be stacked in the encoder and decoder. + n_layer = 6 + # dropout rate used by all dropout layers. + dropout = 0.1 + + +def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. Then, convert the numpy + data to tensors and return a dict mapping names to tensors. + """ + + def __pad_batch_data(insts, + pad_idx, + is_target=False, + return_pos=True, + return_attn_bias=True, + return_max_len=True): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. + """ + return_list = [] + max_len = max(len(inst) for inst in insts) + inst_data = np.array( + [inst + [pad_idx] * (max_len - len(inst)) for inst in insts]) + return_list += [inst_data.astype("int64").reshape([-1, 1])] + if return_pos: + inst_pos = np.array([[ + pos_i + 1 if w_i != pad_idx else 0 + for pos_i, w_i in enumerate(inst) + ] for inst in inst_data]) + + return_list += [inst_pos.astype("int64").reshape([-1, 1])] + if return_attn_bias: + if is_target: + # This is used to avoid attention on paddings and subsequent + # words. + slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, + max_len)) + slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape( + [-1, 1, max_len, max_len]) + slf_attn_bias_data = np.tile(slf_attn_bias_data, + [1, n_head, 1, 1]) * [-1e9] + else: + # This is used to avoid attention on paddings. + slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * + (max_len - len(inst)) + for inst in insts]) + slf_attn_bias_data = np.tile( + slf_attn_bias_data.reshape([-1, 1, 1, max_len]), + [1, n_head, max_len, 1]) + return_list += [slf_attn_bias_data.astype("float32")] + if return_max_len: + return_list += [max_len] + return return_list if len(return_list) > 1 else return_list[0] + + src_word, src_pos, src_slf_attn_bias, src_max_len = __pad_batch_data( + [inst[0] for inst in insts], src_pad_idx, is_target=False) + trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = __pad_batch_data( + [inst[1] for inst in insts], trg_pad_idx, is_target=True) + trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :], + [1, 1, trg_max_len, 1]).astype("float32") + lbl_word = __pad_batch_data([inst[2] for inst in insts], trg_pad_idx, False, + False, False, False) + lbl_weight = (lbl_word != trg_pad_idx).astype("float32").reshape([-1, 1]) + + return [ + src_word, src_pos, trg_word, trg_pos, src_slf_attn_bias, + trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight + ] + + +def transformer(use_feed): + assert not use_feed, "transfomer doesn't support feed yet" + return transformer_model.transformer( + ModelHyperParams.src_vocab_size + 1, + ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, ModelHyperParams.n_head, + ModelHyperParams.d_key, ModelHyperParams.d_value, + ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, + ModelHyperParams.dropout, ModelHyperParams.src_pad_idx, + ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx) + + +class TestTransformer(TestParallelExecutorBase): + @classmethod + def setUpClass(cls): + reader = paddle.batch( + wmt16.train(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=transformer_model.batch_size) + + with fluid.recordio_writer.create_recordio_writer( + WMT16_RECORDIO_FILE) as writer: + for batch in reader(): + for tensor in prepare_batch_input( + batch, ModelHyperParams.src_pad_idx, + ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): + t = fluid.LoDTensor() + t.set(tensor, fluid.CPUPlace()) + writer.append_tensor(t) + writer.complete_append_tensor() + + @unittest.skip("transformer is buggy in multi gpu") + def test_main(self): + self.check_network_convergence(transformer) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..2105d320665367e3ec1bfd7b3a353a144c91244f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_polygon_box_transform.py @@ -0,0 +1,68 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +def PolygonBoxRestore(input): + shape = input.shape + batch_size = shape[0] + geo_channels = shape[1] + h = shape[2] + w = shape[3] + h_indexes = np.array(range(h) * w).reshape( + [w, h]).transpose()[np.newaxis, :] # [1, h, w] + w_indexes = np.array(range(w) * h).reshape( + [h, w])[np.newaxis, :] # [1, h, w] + indexes = np.concatenate( + (w_indexes, h_indexes))[np.newaxis, :] # [1, 2, h, w] + indexes = indexes.repeat( + [geo_channels / 2], + axis=0)[np.newaxis, :] # [1, geo_channels/2, 2, h, w] + indexes = indexes.repeat( + [batch_size], axis=0) # [batch_size, geo_channels/2, 2, h, w] + return indexes.reshape( + input.shape) - input # [batch_size, geo_channels, h, w] + + +class TestPolygonBoxRestoreOp(OpTest): + def config(self): + self.input_shape = (1, 8, 2, 2) + + def setUp(self): + self.config() + self.op_type = "polygon_box_transform" + input = np.random.random(self.input_shape).astype("float32") + self.inputs = {'Input': input} + output = PolygonBoxRestore(input) + self.outputs = {'Output': output} + + def test_check_output(self): + self.check_output() + + +class TestCase1(TestPolygonBoxRestoreOp): + def config(self): + self.input_shape = (2, 10, 3, 2) + + +class TestCase2(TestPolygonBoxRestoreOp): + def config(self): + self.input_shape = (3, 12, 4, 5) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_preprocessor.py b/python/paddle/fluid/tests/unittests/test_preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..cbf1a7e0c50a87cd43507ffdb94109873cf4e5d9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_preprocessor.py @@ -0,0 +1,93 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np + +import paddle.fluid as fluid +import paddle.v2 as paddle +import paddle.v2.dataset.mnist as mnist + + +class TestPreprocessor(unittest.TestCase): + def setUp(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + reader = paddle.batch(mnist.train(), batch_size=32) + feeder = fluid.DataFeeder( + feed_list=[ # order is image and label + fluid.layers.data( + name='image', shape=[784]), + fluid.layers.data( + name='label', shape=[1], dtype='int64'), + ], + place=fluid.CPUPlace()) + self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file( + './mnist_for_preprocessor_test.recordio', reader, feeder) + + def test_main(self): + N = 10 + + img_expected_res = [] + lbl_expected_res = [] + with fluid.program_guard(fluid.Program(), fluid.Program()): + data_file = fluid.layers.io.open_recordio_file( + './mnist_for_preprocessor_test.recordio', + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + img, lbl = fluid.layers.io.read_file(data_file) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for _ in range(N): + img_v, lbl_v = exe.run(fetch_list=[img, lbl]) + img_expected_res.append(img_v / 2) + lbl_expected_res.append(lbl_v + 1) + + img_actual_res = [] + lbl_actual_res = [] + with fluid.program_guard(fluid.Program(), fluid.Program()): + data_file = fluid.layers.io.open_recordio_file( + './mnist_for_preprocessor_test.recordio', + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + preprocessor = fluid.layers.io.Preprocessor(reader=data_file) + with preprocessor.block(): + img, lbl = preprocessor.inputs() + img_out = img / 2 + lbl_out = lbl + 1 + preprocessor.outputs(img_out, lbl_out) + + data_file = fluid.layers.io.double_buffer(preprocessor()) + img, lbl = fluid.layers.io.read_file(data_file) + + if fluid.core.is_compiled_with_cuda(): + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + for _ in range(N): + img_v, lbl_v = exe.run(fetch_list=[img, lbl]) + img_actual_res.append(img_v) + lbl_actual_res.append(lbl_v) + + for idx in range(N): + np.allclose(img_expected_res[idx], img_actual_res[idx]) + np.allclose(lbl_expected_res[idx], lbl_actual_res[idx]) diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index 80a8f7c09cfe521f8f94a27e85fc8d86c02b3e97..9ff0ae6fca27d4681891b2033e2f8f95bd825942 100644 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -107,7 +107,7 @@ class ControlFlowGraph(object): # Repeatedly apply liveness updates until the algorithm stablize # on a complete set live input vars and live output vars. while True: - for i in range(self.op_size, 0, -1): + for i in reversed(range(self.op_size)): live_in[i] = set(self._live_in[i]) live_out[i] = set(self._live_out[i]) for s in self._successors[i]: