“4501abd6aca88f9636d022acb77fe29be6fbcbcb”上不存在“paddle/phi/kernels/cpu/reduce_all_kernel.cc”
提交 101378c8 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into remove_evaluator

...@@ -27,7 +27,6 @@ third_party/ ...@@ -27,7 +27,6 @@ third_party/
cmake-build-* cmake-build-*
# generated while compiling # generated while compiling
python/paddle/v2/fluid/core.so
paddle/pybind/pybind.h paddle/pybind/pybind.h
CMakeFiles CMakeFiles
cmake_install.cmake cmake_install.cmake
......
...@@ -19,7 +19,7 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) ...@@ -19,7 +19,7 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
include(system) include(system)
project(paddle CXX C Go) project(paddle CXX C)
message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
...@@ -60,7 +60,7 @@ option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) ...@@ -60,7 +60,7 @@ option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" ON) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
...@@ -146,6 +146,7 @@ include(external/cares) ...@@ -146,6 +146,7 @@ include(external/cares)
include(external/grpc) include(external/grpc)
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
include(cupti)
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(package) # set paddle packages include(package) # set paddle packages
...@@ -174,7 +175,7 @@ set(EXTERNAL_LIBS ...@@ -174,7 +175,7 @@ set(EXTERNAL_LIBS
) )
if(WITH_GPU) if(WITH_GPU)
include(cuda) include(cuda)
endif(WITH_GPU) endif(WITH_GPU)
if(WITH_MKLML) if(WITH_MKLML)
...@@ -201,17 +202,18 @@ endif() ...@@ -201,17 +202,18 @@ endif()
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# placed after this block, because they depends on it. # placed after this block, because they depends on it.
if(WITH_GOLANG) if(WITH_GOLANG)
enable_language(Go)
add_subdirectory(go) add_subdirectory(go)
endif(WITH_GOLANG) endif(WITH_GOLANG)
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
add_subdirectory(paddle) add_subdirectory(paddle)
if(WITH_PYTHON) if(WITH_PYTHON)
add_subdirectory(python) add_subdirectory(python)
endif() endif()
if(WITH_DOC) if(WITH_DOC)
......
...@@ -22,7 +22,8 @@ COPY ./paddle/scripts/docker/root/ /root/ ...@@ -22,7 +22,8 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y \ apt-get install -y \
git python-pip python-dev openssh-server bison libnccl-dev \ git python-pip python-dev openssh-server bison \
libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \ python-matplotlib gcc-4.8 g++-4.8 \
......
...@@ -21,16 +21,6 @@ RUN apt-get update && \ ...@@ -21,16 +21,6 @@ RUN apt-get update && \
wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \ wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
apt-get clean -y apt-get clean -y
# Install Go and glide
RUN wget -qO- go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
tar -xz -C /usr/local && \
mkdir /root/gopath && \
mkdir /root/gopath/bin && \
mkdir /root/gopath/src
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# git credential to skip password typing # git credential to skip password typing
RUN git config --global credential.helper store RUN git config --global credential.helper store
......
#FROM python:2.7.14
FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04 FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04
RUN apt-get update && apt-get install -y python
RUN pip install -U kubernetes opencv-python && apt-get update -y && apt-get install -y iputils-ping libgtk2.0-dev # you can get mirror list here:
# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF, # https://launchpad.net/ubuntu/+archivemirrors
# so we must build one with distribute support to install in this image. ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN apt-get update && apt-get install -y python python-dev python-pip iputils-ping libgtk2.0-dev
RUN pip install -U kubernetes opencv-python
RUN pip install paddlepaddle RUN pip install paddlepaddle
# if network is slowly, you may need to add proxy here.
# ENV https_proxy=
RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python' RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python'
RUN pip uninstall -y paddlepaddle RUN pip uninstall -y paddlepaddle
# unset proxy if it is setted.
# ENV https_proxy=""
# NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF,
# so we must build one with distribute support to install in this image.
ADD *.whl /
RUN pip install /*.whl && rm -f /*.whl
ENV LD_LIBRARY_PATH=/usr/local/lib
# tf k8s
RUN pip install tensorflow==1.4.0
ADD tf_k8s /usr/bin
RUN chmod +x /usr/bin/tf_k8s
ADD vgg16_tf.py /workspace/
# below lines may change a lot for debugging # below lines may change a lot for debugging
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
ADD *.whl / RUN chmod +x /usr/bin/paddle_k8s
RUN pip install /*.whl && rm -f /*.whl && \
chmod +x /usr/bin/paddle_k8s
ENV LD_LIBRARY_PATH=/usr/local/lib
ADD vgg16_fluid.py vgg16_v2.py /workspace/ ADD vgg16_fluid.py vgg16_v2.py /workspace/
...@@ -11,7 +11,7 @@ spec: ...@@ -11,7 +11,7 @@ spec:
paddle-job: vgg16job paddle-job: vgg16job
spec: spec:
imagePullSecrets: imagePullSecrets:
- name: job-registry-secret - name: job-registry-secret
hostNetwork: true hostNetwork: true
containers: containers:
- name: trainer - name: trainer
......
#!/bin/bash
check_trainer_ret() {
ret=$1
stdbuf -oL echo "job returned $ret...setting pod return message..."
stdbuf -oL echo "==============================="
if [ $ret -eq 136 ] ; then
echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log
elif [ $ret -eq 139 ] ; then
echo "Segmentation Fault" > /dev/termination-log
elif [ $ret -eq 1 ] ; then
echo "General Error" > /dev/termination-log
elif [ $ret -eq 134 ] ; then
echo "Program Abort" > /dev/termination-log
fi
stdbuf -oL echo "termination log wroted..."
exit $ret
}
g_pservers=""
g_trainers=""
wait_running_pods(){
pserver_label="tf-job-pserver=${JOB_NAME}"
trainer_label="tf-job-trainer=${JOB_NAME}"
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS_NUM}
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS_NUM}
g_pservers=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PORT})
g_trainers=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PORT})
}
start_tf_pserver(){
wait_running_pods
label="tf-job-pserver=${JOB_NAME}"
pserver_id=$(python /root/k8s_tools.py fetch_id ${label})
cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
--job_name=${TF_JOB_NAME} --task_index=${pserver_id}"
stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
}
start_tf_trainer(){
wait_running_pods
label="tf-job-trainer=${JOB_NAME}"
trainer_id=$(python /root/k8s_tools.py fetch_id ${label})
cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
--job_name=${TF_JOB_NAME} --task_index=${trainer_id} --batch_size=${BATCH_SIZE}"
stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
check_trainer_ret $?
}
start_tf(){
if [[ "${TF_JOB_NAME}" == "worker" ]]; then
start_tf_trainer
else
start_tf_pserver
fi
}
usage() {
echo "usage: tf_k8s [<args>]:"
echo " start_tf Start tensorflow jobs"
}
case "$1" in
start_tf)
start_tf
;;
--help)
usage
;;
*)
usage
;;
esac
apiVersion: extensions/v1beta1
kind: ReplicaSet
metadata:
name: vgg16job-tf-pserver
spec:
replicas: 10
template:
metadata:
labels:
tf-job-pserver: vgg16job-tf
spec:
hostNetwork: true
imagePullSecrets:
- name: job-registry-secret
containers:
- name: pserver
image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
imagePullPolicy: Always
command: ["tf_k8s", "start_tf"]
ports:
- name: jobport-30236
containerPort: 30236
env:
- name: PORT
value: "32036"
- name: ENTRY
value: "python vgg16_tf.py"
- name: JOB_NAME
value: vgg16job-tf
- name: PSERVERS_NUM
value: "10"
- name: TF_JOB_NAME
value: "ps"
- name: TRAINERS_NUM
value: "20"
- name: BATCH_SIZE
value: "128"
- name: TRAINER_PACKAGE
value: "/workspace"
- name: NUM_PASSES
value: "1"
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: "metadata.namespace"
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: "status.podIP"
resources:
requests:
memory: 10Gi
cpu: 4
limits:
memory: 10Gi
cpu: 4
apiVersion: batch/v1
kind: Job
metadata:
name: vgg16job-tf-trainer
spec:
parallelism: 20
completions: 20
template:
metadata:
labels:
tf-job-trainer: vgg16job-tf
spec:
imagePullSecrets:
- name: job-registry-secret
hostNetwork: true
containers:
- name: trainer
image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
imagePullPolicy: Always
command: ["tf_k8s", "start_tf"]
ports:
- name: jobport-30236
containerPort: 30236
env:
- name: PORT
value: "32036"
- name: JOB_NAME
value: vgg16job-tf
- name: TF_JOB_NAME
value: "worker"
- name: ENTRY
value: "python vgg16_tf.py"
- name: PSERVERS_NUM
value: "10"
- name: BATCH_SIZE
value: "128"
- name: TRAINERS_NUM
value: "20"
- name: TRAINER_PACKAGE
value: "/workspace"
- name: NUM_PASSES
value: "1"
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: "metadata.namespace"
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: "status.podIP"
resources:
requests:
memory: 40Gi
cpu: 2
limits:
memory: 40Gi
cpu: 2
restartPolicy: Never
...@@ -68,6 +68,21 @@ parser.add_argument( ...@@ -68,6 +68,21 @@ parser.add_argument(
type=str2bool, type=str2bool,
default=True, default=True,
help='Whether to run as local mode.') help='Whether to run as local mode.')
parser.add_argument(
"--ps_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
parser.add_argument(
"--trainer_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
# Flags for defining the tf.train.Server
parser.add_argument(
"--task_index", type=int, default=0, help="Index of task within the job")
args = parser.parse_args() args = parser.parse_args()
...@@ -180,8 +195,9 @@ def main(): ...@@ -180,8 +195,9 @@ def main():
iters += 1 iters += 1
num_samples += len(data) num_samples += len(data)
print( print(
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, spent %f" "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed = %.2f img/s"
% (pass_id, iters, loss, acc, time.time() - ts) % (pass_id, iters, loss, acc,
len(data) / (time.time() - ts))
) # The accuracy is the accumulation of batches, but not the current batch. ) # The accuracy is the accumulation of batches, but not the current batch.
pass_elapsed = time.time() - start_time pass_elapsed = time.time() - start_time
...@@ -209,27 +225,24 @@ def main(): ...@@ -209,27 +225,24 @@ def main():
batch_size=args.batch_size) batch_size=args.batch_size)
train_loop(exe, fluid.default_main_program()) train_loop(exe, fluid.default_main_program())
else: else:
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # all pserver endpoints
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, "6174"]))
pserver_endpoints = ",".join(eplist)
print("pserver endpoints: ", pserver_endpoints)
trainers = int(os.getenv("TRAINERS")) # total trainer count trainers = int(os.getenv("TRAINERS")) # total trainer count
print("trainers total: ", trainers) print("trainers total: ", trainers)
current_endpoint = os.getenv(
"POD_IP") + ":6174" # current pserver endpoint
training_role = os.getenv( training_role = os.getenv(
"TRAINING_ROLE", "TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver "TRAINER") # get the training role: trainer/pserver
t = fluid.DistributeTranspiler() t = fluid.DistributeTranspiler()
t.transpile( t.transpile(
optimize_ops, optimize_ops,
params_grads, params_grads,
pservers=pserver_endpoints, trainer_id=args.task_index,
pservers=args.ps_hosts,
trainers=trainers) trainers=trainers)
if training_role == "PSERVER": if training_role == "PSERVER":
current_endpoint = os.getenv("POD_IP") + ":" + os.getenv(
"PADDLE_INIT_PORT")
if not current_endpoint: if not current_endpoint:
print("need env SERVER_ENDPOINT") print("need env SERVER_ENDPOINT")
exit(1) exit(1)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""VGG16 benchmark in TensorFlow
You can get distribution example template structure here:
https://medium.com/clusterone/how-to-write-distributed-tensorflow-code-with-an-example-on-tensorport-70bf3306adcb
https://www.tensorflow.org/deploy/distributed
"""
import tensorflow as tf
import paddle.v2 as paddle
import numpy as np
import argparse
import time
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--batch_size', type=int, default=128, help="Batch size for training.")
parser.add_argument(
'--learning_rate',
type=float,
default=1e-3,
help="Learning rate for training.")
parser.add_argument('--num_passes', type=int, default=50, help="No. of passes.")
parser.add_argument(
'--device',
type=str,
default='CPU',
choices=['CPU', 'GPU'],
help="The device type.")
parser.add_argument(
'--data_format',
type=str,
default='NHWC',
choices=['NCHW', 'NHWC'],
help='The data order, NCHW=[batch, channels, height, width].'
'Only support NHWC right now.')
parser.add_argument(
'--data_set',
type=str,
default='cifar10',
choices=['cifar10', 'flowers'],
help='Optional dataset for benchmark.')
parser.add_argument(
"--ps_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
parser.add_argument(
"--worker_hosts",
type=str,
default="",
help="Comma-separated list of hostname:port pairs")
parser.add_argument(
"--job_name", type=str, default="", help="One of 'worker', 'ps'")
# Flags for defining the tf.train.Server
parser.add_argument(
"--task_index", type=int, default=0, help="Index of task within the job")
args = parser.parse_args()
class VGG16Model(object):
def __init__(self):
self.parameters = []
def batch_norm_relu(self, inputs, is_training):
"""Performs a batch normalization followed by a ReLU."""
# We set fused=True for a significant speed boost. See
# https://www.tensorflow.org/speed/speed_guide#common_fused_ops
inputs = tf.layers.batch_normalization(
inputs=inputs,
axis=1 if args.data_format == 'NCHW' else -1,
momentum=0.9,
epsilon=1e-05,
center=True,
scale=True,
training=is_training,
fused=True)
inputs = tf.nn.relu(inputs)
return inputs
def conv_bn_layer(self,
name,
images,
kernel_shape,
is_training,
drop_rate=0.0):
with tf.name_scope(name) as scope:
kernel = tf.Variable(
tf.truncated_normal(
kernel_shape, dtype=tf.float32, stddev=1e-1),
name='weights')
conv = tf.nn.conv2d(
images,
kernel, [1, 1, 1, 1],
data_format=args.data_format,
padding='SAME')
biases = tf.Variable(
tf.constant(
0.0, shape=[kernel_shape[-1]], dtype=tf.float32),
trainable=True,
name='biases')
out = tf.nn.bias_add(conv, biases)
out = self.batch_norm_relu(out, is_training)
out = tf.layers.dropout(out, rate=drop_rate, training=is_training)
return out
def fc_layer(self, name, inputs, shape):
with tf.name_scope(name) as scope:
fc_w = tf.Variable(
tf.truncated_normal(
shape, dtype=tf.float32, stddev=1e-1),
name='weights')
fc_b = tf.Variable(
tf.constant(
0.0, shape=[shape[-1]], dtype=tf.float32),
trainable=True,
name='biases')
out = tf.nn.bias_add(tf.matmul(inputs, fc_w), fc_b)
return out
def network(self, images, class_dim, is_training):
""" VGG16 model structure.
TODO(kuke): enable this network to support the 'NCHW' data format
"""
# conv1
conv1_1 = self.conv_bn_layer(
'conv1_1', images, [3, 3, 3, 64], is_training, drop_rate=0.3)
conv1_2 = self.conv_bn_layer(
'conv1_2', conv1_1, [3, 3, 64, 64], is_training, drop_rate=0.0)
# pool1
pool1 = tf.nn.max_pool(
conv1_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1')
# conv2
conv2_1 = self.conv_bn_layer(
'conv2_1', pool1, [3, 3, 64, 128], is_training, drop_rate=0.4)
conv2_2 = self.conv_bn_layer(
'conv2_2', conv2_1, [3, 3, 128, 128], is_training, drop_rate=0.0)
# pool2
pool2 = tf.nn.max_pool(
conv2_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')
# conv3
conv3_1 = self.conv_bn_layer(
'conv3_1', pool2, [3, 3, 128, 256], is_training, drop_rate=0.4)
conv3_2 = self.conv_bn_layer(
'conv3_2', conv3_1, [3, 3, 256, 256], is_training, drop_rate=0.4)
conv3_3 = self.conv_bn_layer(
'conv3_3', conv3_2, [3, 3, 256, 256], is_training, drop_rate=0.0)
# pool3
pool3 = tf.nn.max_pool(
conv3_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool3')
# conv4
conv4_1 = self.conv_bn_layer(
'conv4_1', pool3, [3, 3, 256, 512], is_training, drop_rate=0.4)
conv4_2 = self.conv_bn_layer(
'conv4_2', conv4_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
conv4_3 = self.conv_bn_layer(
'conv4_3', conv4_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
# pool4
pool4 = tf.nn.max_pool(
conv4_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# conv5
conv5_1 = self.conv_bn_layer(
'conv5_1', pool4, [3, 3, 512, 512], is_training, drop_rate=0.4)
conv5_2 = self.conv_bn_layer(
'conv5_2', conv5_1, [3, 3, 512, 512], is_training, drop_rate=0.4)
conv5_3 = self.conv_bn_layer(
'conv5_3', conv5_2, [3, 3, 512, 512], is_training, drop_rate=0.0)
# pool5
pool5 = tf.nn.max_pool(
conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# flatten
shape = int(np.prod(pool5.get_shape()[1:]))
pool5_flat = tf.reshape(pool5, [-1, shape])
# fc1
drop = tf.layers.dropout(pool5_flat, rate=0.5, training=is_training)
fc1 = self.fc_layer('fc1', drop, [shape, 512])
# fc2
bn = self.batch_norm_relu(fc1, is_training)
drop = tf.layers.dropout(bn, rate=0.5, training=is_training)
fc2 = self.fc_layer('fc2', drop, [512, 512])
fc3 = self.fc_layer('fc3', fc2, [512, class_dim])
return fc3
def run_benchmark(cluster_spec, server):
"""Run benchmark on cifar10 or flowers."""
if args.data_set == "cifar10":
class_dim = 10
raw_shape = (3, 32, 32)
dat_shape = (None, 32, 32, 3) if args.data_format == 'NHWC' else (
None, 3, 32, 32)
else:
class_dim = 102
raw_shape = (3, 224, 224)
dat_shape = (None, 224, 224, 3) if args.data_format == 'NHWC' else (
None, 3, 224, 224)
device = tf.train.replica_device_setter(
worker_device="/job:worker/task:{}".format(args.task_index),
cluster=cluster_spec)
with tf.device(device):
images = tf.placeholder(tf.float32, shape=dat_shape)
labels = tf.placeholder(tf.int64, shape=(None, ))
is_training = tf.placeholder('bool')
onehot_labels = tf.one_hot(labels, depth=class_dim)
vgg16 = VGG16Model()
logits = vgg16.network(images, class_dim, is_training)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
avg_loss = tf.reduce_mean(loss)
correct = tf.equal(tf.argmax(logits, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
global_step = tf.Variable(0, name='global_step', trainable=False)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(avg_loss, global_step=global_step)
summary_op = tf.summary.merge_all()
init_op = tf.global_variables_initializer()
# data reader
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.train(),
buf_size=5120),
batch_size=args.batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.test10()
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
buf_size=5120),
batch_size=args.batch_size)
# test
def test():
test_accs = []
for batch_id, data in enumerate(test_reader()):
test_images = np.array(
map(lambda x: np.transpose(x[0].reshape(raw_shape),
axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
test_labels = np.array(map(lambda x: x[1], data)).astype('int64')
test_accs.append(
accuracy.eval(feed_dict={
images: test_images,
labels: test_labels,
is_training: False
}))
return np.mean(test_accs)
config = tf.ConfigProto(
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
hooks = [tf.train.StopAtStepHook(last_step=1000000)]
with tf.train.MonitoredTrainingSession(
master=server.target, is_chief=(args.task_index == 0),
hooks=hooks) as sess:
iters, num_samples, start_time = 0, 0, 0.0
for pass_id in range(args.num_passes):
# train
num_samples = 0
start_time = time.time()
for batch_id, data in enumerate(train_reader()):
train_images = np.array(
map(lambda x: np.transpose(x[0].reshape(raw_shape),
axes=[1, 2, 0]) if args.data_format == 'NHWC' else x[0], data)).astype("float32")
train_labels = np.array(map(lambda x: x[1], data)).astype(
'int64')
iter_begin_time = time.time()
_, loss, acc = sess.run([train_op, avg_loss, accuracy],
feed_dict={
images: train_images,
labels: train_labels,
is_training: True
})
iters += 1
print(
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, Speed=%.2f imgs/sec"
% (pass_id, iters, loss, acc,
len(data) / (time.time() - iter_begin_time)))
num_samples += len(data)
train_elapsed = time.time() - start_time
# test
pass_test_acc = test()
print("Pass = %d, Train speed = %f imgs/s, Test accuracy = %f\n" %
(pass_id, num_samples / train_elapsed, pass_test_acc))
def print_arguments():
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == '__main__':
print_arguments()
ps_hosts = args.ps_hosts.split(",")
worker_hosts = args.worker_hosts.split(",")
# Create a cluster from the parameter server and worker hosts.
cluster_spec = tf.train.ClusterSpec({
"ps": ps_hosts,
"worker": worker_hosts
})
# Create and start a server for the local task.
server = tf.train.Server(
cluster_spec, job_name=args.job_name, task_index=args.task_index)
if args.job_name == "ps":
print("start pserver")
server.join()
elif args.job_name == "worker":
print("start worker")
run_benchmark(cluster_spec, server)
...@@ -59,6 +59,7 @@ endif(NOT WITH_GOLANG) ...@@ -59,6 +59,7 @@ endif(NOT WITH_GOLANG)
if(NOT WITH_GPU) if(NOT WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
add_definitions("-DCUPTI_LIB_PATH=\"\"")
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else() else()
...@@ -73,7 +74,14 @@ else() ...@@ -73,7 +74,14 @@ else()
if(NOT CUDNN_FOUND) if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle needs cudnn to compile") message(FATAL_ERROR "Paddle needs cudnn to compile")
endif() endif()
if(CUPTI_FOUND)
include_directories(${CUPTI_INCLUDE_DIR})
add_definitions(-DPADDLE_WITH_CUPTI)
add_definitions("-DCUPTI_LIB_PATH=\"${CUPTI_LIBRARY_PATH}\"")
else()
add_definitions("-DCUPTI_LIB_PATH=\"\"")
message(STATUS "Cannot find CUPTI, GPU Profiling is incorrect.")
endif()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}") set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
# Include cuda and cudnn # Include cuda and cudnn
......
...@@ -155,7 +155,8 @@ endif() ...@@ -155,7 +155,8 @@ endif()
include_directories(${CUDA_INCLUDE_DIRS}) include_directories(${CUDA_INCLUDE_DIRS})
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO) if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) # TODO(panyx0718): CUPTI only allows DSO?
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
# setting nvcc arch flags # setting nvcc arch flags
......
if(NOT WITH_GPU)
return()
endif()
set(CUPTI_ROOT "/usr" CACHE PATH "CUPTI ROOT")
find_path(CUPTI_INCLUDE_DIR cupti.h
PATHS ${CUPTI_ROOT} ${CUPTI_ROOT}/include
$ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include
NO_DEFAULT_PATH
)
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
set(TARGET_ARCH "x86_64")
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
endif()
list(APPEND CUPTI_CHECK_LIBRARY_DIRS
${CUPTI_ROOT}
${CUPTI_ROOT}/lib64
${CUPTI_ROOT}/lib
${CUPTI_ROOT}/lib/${TARGET_ARCH}-linux-gnu
$ENV{CUPTI_ROOT}
$ENV{CUPTI_ROOT}/lib64
$ENV{CUPTI_ROOT}/lib
/usr/lib
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/lib64)
find_library(CUPTI_LIBRARY NAMES libcupti.so libcupti.dylib # libcupti_static.a
PATHS ${CUPTI_CHECK_LIBRARY_DIRS} ${CUPTI_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH
DOC "Path to cuPTI library.")
get_filename_component(CUPTI_LIBRARY_PATH ${CUPTI_LIBRARY} DIRECTORY)
if(CUPTI_INCLUDE_DIR AND CUPTI_LIBRARY)
set(CUPTI_FOUND ON)
else()
set(CUPTI_FOUND OFF)
endif()
...@@ -8,7 +8,7 @@ data_feeder ...@@ -8,7 +8,7 @@ data_feeder
DataFeeder DataFeeder
---------- ----------
.. autoclass:: paddle.v2.fluid.data_feeder.DataFeeder .. autoclass:: paddle.fluid.data_feeder.DataFeeder
:members: :members:
:noindex: :noindex:
...@@ -8,14 +8,14 @@ evaluator ...@@ -8,14 +8,14 @@ evaluator
Accuracy Accuracy
-------- --------
.. autoclass:: paddle.v2.fluid.evaluator.Accuracy .. autoclass:: paddle.fluid.evaluator.Accuracy
:members: :members:
:noindex: :noindex:
ChunkEvaluator ChunkEvaluator
-------------- --------------
.. autoclass:: paddle.v2.fluid.evaluator.ChunkEvaluator .. autoclass:: paddle.fluid.evaluator.ChunkEvaluator
:members: :members:
:noindex: :noindex:
...@@ -8,25 +8,25 @@ executor ...@@ -8,25 +8,25 @@ executor
Executor Executor
-------- --------
.. autoclass:: paddle.v2.fluid.executor.Executor .. autoclass:: paddle.fluid.executor.Executor
:members: :members:
:noindex: :noindex:
global_scope global_scope
------------ ------------
.. autofunction:: paddle.v2.fluid.executor.global_scope .. autofunction:: paddle.fluid.executor.global_scope
:noindex: :noindex:
scope_guard scope_guard
----------- -----------
.. autofunction:: paddle.v2.fluid.executor.scope_guard .. autofunction:: paddle.fluid.executor.scope_guard
:noindex: :noindex:
switch_scope switch_scope
------------ ------------
.. autofunction:: paddle.v2.fluid.executor.switch_scope .. autofunction:: paddle.fluid.executor.switch_scope
:noindex: :noindex:
...@@ -17,7 +17,7 @@ import argparse ...@@ -17,7 +17,7 @@ import argparse
import sys import sys
import types import types
import paddle.v2.fluid as fluid import paddle.fluid as fluid
def parse_arg(): def parse_arg():
...@@ -70,7 +70,7 @@ class DocGenerator(object): ...@@ -70,7 +70,7 @@ class DocGenerator(object):
def print_class(self, name): def print_class(self, name):
self._print_header_(name, dot='-', is_title=False) self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autoclass:: paddle.v2.fluid.{0}.{1} self.stream.write('''.. autoclass:: paddle.fluid.{0}.{1}
:members: :members:
:noindex: :noindex:
...@@ -78,7 +78,7 @@ class DocGenerator(object): ...@@ -78,7 +78,7 @@ class DocGenerator(object):
def print_method(self, name): def print_method(self, name):
self._print_header_(name, dot='-', is_title=False) self._print_header_(name, dot='-', is_title=False)
self.stream.write('''.. autofunction:: paddle.v2.fluid.{0}.{1} self.stream.write('''.. autofunction:: paddle.fluid.{0}.{1}
:noindex: :noindex:
'''.format(self.module_name, name)) '''.format(self.module_name, name))
......
======================
Fluid
======================
.. toctree::
:maxdepth: 1
layers.rst
data_feeder.rst
executor.rst
initializer.rst
evaluator.rst
nets.rst
optimizer.rst
param_attr.rst
profiler.rst
regularizer.rst
io.rst
...@@ -8,28 +8,28 @@ initializer ...@@ -8,28 +8,28 @@ initializer
Constant Constant
-------- --------
.. autoclass:: paddle.v2.fluid.initializer.Constant .. autoclass:: paddle.fluid.initializer.Constant
:members: :members:
:noindex: :noindex:
Uniform Uniform
------- -------
.. autoclass:: paddle.v2.fluid.initializer.Uniform .. autoclass:: paddle.fluid.initializer.Uniform
:members: :members:
:noindex: :noindex:
Normal Normal
------ ------
.. autoclass:: paddle.v2.fluid.initializer.Normal .. autoclass:: paddle.fluid.initializer.Normal
:members: :members:
:noindex: :noindex:
Xavier Xavier
------ ------
.. autoclass:: paddle.v2.fluid.initializer.Xavier .. autoclass:: paddle.fluid.initializer.Xavier
:members: :members:
:noindex: :noindex:
...@@ -8,54 +8,54 @@ io ...@@ -8,54 +8,54 @@ io
save_vars save_vars
--------- ---------
.. autofunction:: paddle.v2.fluid.io.save_vars .. autofunction:: paddle.fluid.io.save_vars
:noindex: :noindex:
save_params save_params
----------- -----------
.. autofunction:: paddle.v2.fluid.io.save_params .. autofunction:: paddle.fluid.io.save_params
:noindex: :noindex:
save_persistables save_persistables
----------------- -----------------
.. autofunction:: paddle.v2.fluid.io.save_persistables .. autofunction:: paddle.fluid.io.save_persistables
:noindex: :noindex:
load_vars load_vars
--------- ---------
.. autofunction:: paddle.v2.fluid.io.load_vars .. autofunction:: paddle.fluid.io.load_vars
:noindex: :noindex:
load_params load_params
----------- -----------
.. autofunction:: paddle.v2.fluid.io.load_params .. autofunction:: paddle.fluid.io.load_params
:noindex: :noindex:
load_persistables load_persistables
----------------- -----------------
.. autofunction:: paddle.v2.fluid.io.load_persistables .. autofunction:: paddle.fluid.io.load_persistables
:noindex: :noindex:
save_inference_model save_inference_model
-------------------- --------------------
.. autofunction:: paddle.v2.fluid.io.save_inference_model .. autofunction:: paddle.fluid.io.save_inference_model
:noindex: :noindex:
load_inference_model load_inference_model
-------------------- --------------------
.. autofunction:: paddle.v2.fluid.io.load_inference_model .. autofunction:: paddle.fluid.io.load_inference_model
:noindex: :noindex:
get_inference_program get_inference_program
--------------------- ---------------------
.. autofunction:: paddle.v2.fluid.io.get_inference_program .. autofunction:: paddle.fluid.io.get_inference_program
:noindex: :noindex:
...@@ -11,167 +11,167 @@ control_flow ...@@ -11,167 +11,167 @@ control_flow
split_lod_tensor split_lod_tensor
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor .. autofunction:: paddle.fluid.layers.split_lod_tensor
:noindex: :noindex:
merge_lod_tensor merge_lod_tensor
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor .. autofunction:: paddle.fluid.layers.merge_lod_tensor
:noindex: :noindex:
BlockGuard BlockGuard
---------- ----------
.. autoclass:: paddle.v2.fluid.layers.BlockGuard .. autoclass:: paddle.fluid.layers.BlockGuard
:members: :members:
:noindex: :noindex:
BlockGuardWithCompletion BlockGuardWithCompletion
------------------------ ------------------------
.. autoclass:: paddle.v2.fluid.layers.BlockGuardWithCompletion .. autoclass:: paddle.fluid.layers.BlockGuardWithCompletion
:members: :members:
:noindex: :noindex:
StaticRNNMemoryLink StaticRNNMemoryLink
------------------- -------------------
.. autoclass:: paddle.v2.fluid.layers.StaticRNNMemoryLink .. autoclass:: paddle.fluid.layers.StaticRNNMemoryLink
:members: :members:
:noindex: :noindex:
WhileGuard WhileGuard
---------- ----------
.. autoclass:: paddle.v2.fluid.layers.WhileGuard .. autoclass:: paddle.fluid.layers.WhileGuard
:members: :members:
:noindex: :noindex:
While While
----- -----
.. autoclass:: paddle.v2.fluid.layers.While .. autoclass:: paddle.fluid.layers.While
:members: :members:
:noindex: :noindex:
lod_rank_table lod_rank_table
-------------- --------------
.. autofunction:: paddle.v2.fluid.layers.lod_rank_table .. autofunction:: paddle.fluid.layers.lod_rank_table
:noindex: :noindex:
max_sequence_len max_sequence_len
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.max_sequence_len .. autofunction:: paddle.fluid.layers.max_sequence_len
:noindex: :noindex:
topk topk
---- ----
.. autofunction:: paddle.v2.fluid.layers.topk .. autofunction:: paddle.fluid.layers.topk
:noindex: :noindex:
lod_tensor_to_array lod_tensor_to_array
------------------- -------------------
.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array .. autofunction:: paddle.fluid.layers.lod_tensor_to_array
:noindex: :noindex:
array_to_lod_tensor array_to_lod_tensor
------------------- -------------------
.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor .. autofunction:: paddle.fluid.layers.array_to_lod_tensor
:noindex: :noindex:
increment increment
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.increment .. autofunction:: paddle.fluid.layers.increment
:noindex: :noindex:
array_write array_write
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.array_write .. autofunction:: paddle.fluid.layers.array_write
:noindex: :noindex:
create_array create_array
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.create_array .. autofunction:: paddle.fluid.layers.create_array
:noindex: :noindex:
less_than less_than
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.less_than .. autofunction:: paddle.fluid.layers.less_than
:noindex: :noindex:
array_read array_read
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.array_read .. autofunction:: paddle.fluid.layers.array_read
:noindex: :noindex:
shrink_memory shrink_memory
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.shrink_memory .. autofunction:: paddle.fluid.layers.shrink_memory
:noindex: :noindex:
array_length array_length
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.array_length .. autofunction:: paddle.fluid.layers.array_length
:noindex: :noindex:
IfElse IfElse
------ ------
.. autoclass:: paddle.v2.fluid.layers.IfElse .. autoclass:: paddle.fluid.layers.IfElse
:members: :members:
:noindex: :noindex:
DynamicRNN DynamicRNN
---------- ----------
.. autoclass:: paddle.v2.fluid.layers.DynamicRNN .. autoclass:: paddle.fluid.layers.DynamicRNN
:members: :members:
:noindex: :noindex:
ConditionalBlock ConditionalBlock
---------------- ----------------
.. autoclass:: paddle.v2.fluid.layers.ConditionalBlock .. autoclass:: paddle.fluid.layers.ConditionalBlock
:members: :members:
:noindex: :noindex:
StaticRNN StaticRNN
--------- ---------
.. autoclass:: paddle.v2.fluid.layers.StaticRNN .. autoclass:: paddle.fluid.layers.StaticRNN
:members: :members:
:noindex: :noindex:
reorder_lod_tensor_by_rank reorder_lod_tensor_by_rank
-------------------------- --------------------------
.. autofunction:: paddle.v2.fluid.layers.reorder_lod_tensor_by_rank .. autofunction:: paddle.fluid.layers.reorder_lod_tensor_by_rank
:noindex: :noindex:
ParallelDo ParallelDo
---------- ----------
.. autoclass:: paddle.v2.fluid.layers.ParallelDo .. autoclass:: paddle.fluid.layers.ParallelDo
:members: :members:
:noindex: :noindex:
Print Print
----- -----
.. autofunction:: paddle.v2.fluid.layers.Print .. autofunction:: paddle.fluid.layers.Print
:noindex: :noindex:
device device
...@@ -180,7 +180,7 @@ device ...@@ -180,7 +180,7 @@ device
get_places get_places
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.get_places .. autofunction:: paddle.fluid.layers.get_places
:noindex: :noindex:
io io
...@@ -189,27 +189,27 @@ io ...@@ -189,27 +189,27 @@ io
data data
---- ----
.. autofunction:: paddle.v2.fluid.layers.data .. autofunction:: paddle.fluid.layers.data
:noindex: :noindex:
BlockGuardServ BlockGuardServ
-------------- --------------
.. autoclass:: paddle.v2.fluid.layers.BlockGuardServ .. autoclass:: paddle.fluid.layers.BlockGuardServ
:members: :members:
:noindex: :noindex:
ListenAndServ ListenAndServ
------------- -------------
.. autoclass:: paddle.v2.fluid.layers.ListenAndServ .. autoclass:: paddle.fluid.layers.ListenAndServ
:members: :members:
:noindex: :noindex:
Send Send
---- ----
.. autofunction:: paddle.v2.fluid.layers.Send .. autofunction:: paddle.fluid.layers.Send
:noindex: :noindex:
nn nn
...@@ -218,259 +218,259 @@ nn ...@@ -218,259 +218,259 @@ nn
fc fc
-- --
.. autofunction:: paddle.v2.fluid.layers.fc .. autofunction:: paddle.fluid.layers.fc
:noindex: :noindex:
embedding embedding
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.embedding .. autofunction:: paddle.fluid.layers.embedding
:noindex: :noindex:
dynamic_lstm dynamic_lstm
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm .. autofunction:: paddle.fluid.layers.dynamic_lstm
:noindex: :noindex:
dynamic_lstmp dynamic_lstmp
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp .. autofunction:: paddle.fluid.layers.dynamic_lstmp
:noindex: :noindex:
dynamic_gru dynamic_gru
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.dynamic_gru .. autofunction:: paddle.fluid.layers.dynamic_gru
:noindex: :noindex:
gru_unit gru_unit
-------- --------
.. autofunction:: paddle.v2.fluid.layers.gru_unit .. autofunction:: paddle.fluid.layers.gru_unit
:noindex: :noindex:
linear_chain_crf linear_chain_crf
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf .. autofunction:: paddle.fluid.layers.linear_chain_crf
:noindex: :noindex:
crf_decoding crf_decoding
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.crf_decoding .. autofunction:: paddle.fluid.layers.crf_decoding
:noindex: :noindex:
cos_sim cos_sim
------- -------
.. autofunction:: paddle.v2.fluid.layers.cos_sim .. autofunction:: paddle.fluid.layers.cos_sim
:noindex: :noindex:
cross_entropy cross_entropy
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.cross_entropy .. autofunction:: paddle.fluid.layers.cross_entropy
:noindex: :noindex:
square_error_cost square_error_cost
----------------- -----------------
.. autofunction:: paddle.v2.fluid.layers.square_error_cost .. autofunction:: paddle.fluid.layers.square_error_cost
:noindex: :noindex:
accuracy accuracy
-------- --------
.. autofunction:: paddle.v2.fluid.layers.accuracy .. autofunction:: paddle.fluid.layers.accuracy
:noindex: :noindex:
chunk_eval chunk_eval
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.chunk_eval .. autofunction:: paddle.fluid.layers.chunk_eval
:noindex: :noindex:
sequence_conv sequence_conv
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.sequence_conv .. autofunction:: paddle.fluid.layers.sequence_conv
:noindex: :noindex:
conv2d conv2d
------ ------
.. autofunction:: paddle.v2.fluid.layers.conv2d .. autofunction:: paddle.fluid.layers.conv2d
:noindex: :noindex:
sequence_pool sequence_pool
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.sequence_pool .. autofunction:: paddle.fluid.layers.sequence_pool
:noindex: :noindex:
pool2d pool2d
------ ------
.. autofunction:: paddle.v2.fluid.layers.pool2d .. autofunction:: paddle.fluid.layers.pool2d
:noindex: :noindex:
batch_norm batch_norm
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.batch_norm .. autofunction:: paddle.fluid.layers.batch_norm
:noindex: :noindex:
layer_norm layer_norm
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.layer_norm .. autofunction:: paddle.fluid.layers.layer_norm
:noindex: :noindex:
beam_search_decode beam_search_decode
------------------ ------------------
.. autofunction:: paddle.v2.fluid.layers.beam_search_decode .. autofunction:: paddle.fluid.layers.beam_search_decode
:noindex: :noindex:
conv2d_transpose conv2d_transpose
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose .. autofunction:: paddle.fluid.layers.conv2d_transpose
:noindex: :noindex:
sequence_expand sequence_expand
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.sequence_expand .. autofunction:: paddle.fluid.layers.sequence_expand
:noindex: :noindex:
lstm_unit lstm_unit
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.lstm_unit .. autofunction:: paddle.fluid.layers.lstm_unit
:noindex: :noindex:
reduce_sum reduce_sum
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.reduce_sum .. autofunction:: paddle.fluid.layers.reduce_sum
:noindex: :noindex:
reduce_mean reduce_mean
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.reduce_mean .. autofunction:: paddle.fluid.layers.reduce_mean
:noindex: :noindex:
reduce_max reduce_max
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.reduce_max .. autofunction:: paddle.fluid.layers.reduce_max
:noindex: :noindex:
reduce_min reduce_min
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.reduce_min .. autofunction:: paddle.fluid.layers.reduce_min
:noindex: :noindex:
sequence_first_step sequence_first_step
------------------- -------------------
.. autofunction:: paddle.v2.fluid.layers.sequence_first_step .. autofunction:: paddle.fluid.layers.sequence_first_step
:noindex: :noindex:
sequence_last_step sequence_last_step
------------------ ------------------
.. autofunction:: paddle.v2.fluid.layers.sequence_last_step .. autofunction:: paddle.fluid.layers.sequence_last_step
:noindex: :noindex:
dropout dropout
------- -------
.. autofunction:: paddle.v2.fluid.layers.dropout .. autofunction:: paddle.fluid.layers.dropout
:noindex: :noindex:
split split
----- -----
.. autofunction:: paddle.v2.fluid.layers.split .. autofunction:: paddle.fluid.layers.split
:noindex: :noindex:
ctc_greedy_decoder ctc_greedy_decoder
------------------ ------------------
.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder .. autofunction:: paddle.fluid.layers.ctc_greedy_decoder
:noindex: :noindex:
edit_distance edit_distance
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.edit_distance .. autofunction:: paddle.fluid.layers.edit_distance
:noindex: :noindex:
l2_normalize l2_normalize
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.l2_normalize .. autofunction:: paddle.fluid.layers.l2_normalize
:noindex: :noindex:
matmul matmul
------ ------
.. autofunction:: paddle.v2.fluid.layers.matmul .. autofunction:: paddle.fluid.layers.matmul
:noindex: :noindex:
warpctc warpctc
------- -------
.. autofunction:: paddle.v2.fluid.layers.warpctc .. autofunction:: paddle.fluid.layers.warpctc
:noindex: :noindex:
sequence_reshape sequence_reshape
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.sequence_reshape .. autofunction:: paddle.fluid.layers.sequence_reshape
:noindex: :noindex:
transpose transpose
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.transpose .. autofunction:: paddle.fluid.layers.transpose
:noindex: :noindex:
im2sequence im2sequence
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.im2sequence .. autofunction:: paddle.fluid.layers.im2sequence
:noindex: :noindex:
nce nce
--- ---
.. autofunction:: paddle.v2.fluid.layers.nce .. autofunction:: paddle.fluid.layers.nce
:noindex: :noindex:
beam_search beam_search
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.beam_search .. autofunction:: paddle.fluid.layers.beam_search
:noindex: :noindex:
row_conv row_conv
-------- --------
.. autofunction:: paddle.v2.fluid.layers.row_conv .. autofunction:: paddle.fluid.layers.row_conv
:noindex: :noindex:
multiplex multiplex
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.multiplex .. autofunction:: paddle.fluid.layers.multiplex
:noindex: :noindex:
ops ops
...@@ -479,259 +479,259 @@ ops ...@@ -479,259 +479,259 @@ ops
mean mean
---- ----
.. autofunction:: paddle.v2.fluid.layers.mean .. autofunction:: paddle.fluid.layers.mean
:noindex: :noindex:
mul mul
--- ---
.. autofunction:: paddle.v2.fluid.layers.mul .. autofunction:: paddle.fluid.layers.mul
:noindex: :noindex:
reshape reshape
------- -------
.. autofunction:: paddle.v2.fluid.layers.reshape .. autofunction:: paddle.fluid.layers.reshape
:noindex: :noindex:
scale scale
----- -----
.. autofunction:: paddle.v2.fluid.layers.scale .. autofunction:: paddle.fluid.layers.scale
:noindex: :noindex:
sigmoid_cross_entropy_with_logits sigmoid_cross_entropy_with_logits
--------------------------------- ---------------------------------
.. autofunction:: paddle.v2.fluid.layers.sigmoid_cross_entropy_with_logits .. autofunction:: paddle.fluid.layers.sigmoid_cross_entropy_with_logits
:noindex: :noindex:
elementwise_add elementwise_add
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_add .. autofunction:: paddle.fluid.layers.elementwise_add
:noindex: :noindex:
elementwise_div elementwise_div
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_div .. autofunction:: paddle.fluid.layers.elementwise_div
:noindex: :noindex:
elementwise_sub elementwise_sub
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_sub .. autofunction:: paddle.fluid.layers.elementwise_sub
:noindex: :noindex:
elementwise_mul elementwise_mul
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_mul .. autofunction:: paddle.fluid.layers.elementwise_mul
:noindex: :noindex:
elementwise_max elementwise_max
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_max .. autofunction:: paddle.fluid.layers.elementwise_max
:noindex: :noindex:
elementwise_min elementwise_min
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_min .. autofunction:: paddle.fluid.layers.elementwise_min
:noindex: :noindex:
elementwise_pow elementwise_pow
--------------- ---------------
.. autofunction:: paddle.v2.fluid.layers.elementwise_pow .. autofunction:: paddle.fluid.layers.elementwise_pow
:noindex: :noindex:
clip clip
---- ----
.. autofunction:: paddle.v2.fluid.layers.clip .. autofunction:: paddle.fluid.layers.clip
:noindex: :noindex:
clip_by_norm clip_by_norm
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.clip_by_norm .. autofunction:: paddle.fluid.layers.clip_by_norm
:noindex: :noindex:
sequence_softmax sequence_softmax
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.sequence_softmax .. autofunction:: paddle.fluid.layers.sequence_softmax
:noindex: :noindex:
sigmoid sigmoid
------- -------
.. autofunction:: paddle.v2.fluid.layers.sigmoid .. autofunction:: paddle.fluid.layers.sigmoid
:noindex: :noindex:
logsigmoid logsigmoid
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.logsigmoid .. autofunction:: paddle.fluid.layers.logsigmoid
:noindex: :noindex:
exp exp
--- ---
.. autofunction:: paddle.v2.fluid.layers.exp .. autofunction:: paddle.fluid.layers.exp
:noindex: :noindex:
relu relu
---- ----
.. autofunction:: paddle.v2.fluid.layers.relu .. autofunction:: paddle.fluid.layers.relu
:noindex: :noindex:
tanh tanh
---- ----
.. autofunction:: paddle.v2.fluid.layers.tanh .. autofunction:: paddle.fluid.layers.tanh
:noindex: :noindex:
tanh_shrink tanh_shrink
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.tanh_shrink .. autofunction:: paddle.fluid.layers.tanh_shrink
:noindex: :noindex:
softshrink softshrink
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.softshrink .. autofunction:: paddle.fluid.layers.softshrink
:noindex: :noindex:
sqrt sqrt
---- ----
.. autofunction:: paddle.v2.fluid.layers.sqrt .. autofunction:: paddle.fluid.layers.sqrt
:noindex: :noindex:
abs abs
--- ---
.. autofunction:: paddle.v2.fluid.layers.abs .. autofunction:: paddle.fluid.layers.abs
:noindex: :noindex:
ceil ceil
---- ----
.. autofunction:: paddle.v2.fluid.layers.ceil .. autofunction:: paddle.fluid.layers.ceil
:noindex: :noindex:
floor floor
----- -----
.. autofunction:: paddle.v2.fluid.layers.floor .. autofunction:: paddle.fluid.layers.floor
:noindex: :noindex:
round round
----- -----
.. autofunction:: paddle.v2.fluid.layers.round .. autofunction:: paddle.fluid.layers.round
:noindex: :noindex:
reciprocal reciprocal
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.reciprocal .. autofunction:: paddle.fluid.layers.reciprocal
:noindex: :noindex:
log log
--- ---
.. autofunction:: paddle.v2.fluid.layers.log .. autofunction:: paddle.fluid.layers.log
:noindex: :noindex:
square square
------ ------
.. autofunction:: paddle.v2.fluid.layers.square .. autofunction:: paddle.fluid.layers.square
:noindex: :noindex:
softplus softplus
-------- --------
.. autofunction:: paddle.v2.fluid.layers.softplus .. autofunction:: paddle.fluid.layers.softplus
:noindex: :noindex:
softsign softsign
-------- --------
.. autofunction:: paddle.v2.fluid.layers.softsign .. autofunction:: paddle.fluid.layers.softsign
:noindex: :noindex:
brelu brelu
----- -----
.. autofunction:: paddle.v2.fluid.layers.brelu .. autofunction:: paddle.fluid.layers.brelu
:noindex: :noindex:
leaky_relu leaky_relu
---------- ----------
.. autofunction:: paddle.v2.fluid.layers.leaky_relu .. autofunction:: paddle.fluid.layers.leaky_relu
:noindex: :noindex:
soft_relu soft_relu
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.soft_relu .. autofunction:: paddle.fluid.layers.soft_relu
:noindex: :noindex:
elu elu
--- ---
.. autofunction:: paddle.v2.fluid.layers.elu .. autofunction:: paddle.fluid.layers.elu
:noindex: :noindex:
relu6 relu6
----- -----
.. autofunction:: paddle.v2.fluid.layers.relu6 .. autofunction:: paddle.fluid.layers.relu6
:noindex: :noindex:
pow pow
--- ---
.. autofunction:: paddle.v2.fluid.layers.pow .. autofunction:: paddle.fluid.layers.pow
:noindex: :noindex:
stanh stanh
----- -----
.. autofunction:: paddle.v2.fluid.layers.stanh .. autofunction:: paddle.fluid.layers.stanh
:noindex: :noindex:
hard_shrink hard_shrink
----------- -----------
.. autofunction:: paddle.v2.fluid.layers.hard_shrink .. autofunction:: paddle.fluid.layers.hard_shrink
:noindex: :noindex:
thresholded_relu thresholded_relu
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.thresholded_relu .. autofunction:: paddle.fluid.layers.thresholded_relu
:noindex: :noindex:
hard_sigmoid hard_sigmoid
------------ ------------
.. autofunction:: paddle.v2.fluid.layers.hard_sigmoid .. autofunction:: paddle.fluid.layers.hard_sigmoid
:noindex: :noindex:
swish swish
----- -----
.. autofunction:: paddle.v2.fluid.layers.swish .. autofunction:: paddle.fluid.layers.swish
:noindex: :noindex:
tensor tensor
...@@ -740,66 +740,66 @@ tensor ...@@ -740,66 +740,66 @@ tensor
create_tensor create_tensor
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.create_tensor .. autofunction:: paddle.fluid.layers.create_tensor
:noindex: :noindex:
create_parameter create_parameter
---------------- ----------------
.. autofunction:: paddle.v2.fluid.layers.create_parameter .. autofunction:: paddle.fluid.layers.create_parameter
:noindex: :noindex:
create_global_var create_global_var
----------------- -----------------
.. autofunction:: paddle.v2.fluid.layers.create_global_var .. autofunction:: paddle.fluid.layers.create_global_var
:noindex: :noindex:
cast cast
---- ----
.. autofunction:: paddle.v2.fluid.layers.cast .. autofunction:: paddle.fluid.layers.cast
:noindex: :noindex:
concat concat
------ ------
.. autofunction:: paddle.v2.fluid.layers.concat .. autofunction:: paddle.fluid.layers.concat
:noindex: :noindex:
sums sums
---- ----
.. autofunction:: paddle.v2.fluid.layers.sums .. autofunction:: paddle.fluid.layers.sums
:noindex: :noindex:
assign assign
------ ------
.. autofunction:: paddle.v2.fluid.layers.assign .. autofunction:: paddle.fluid.layers.assign
:noindex: :noindex:
fill_constant_batch_size_like fill_constant_batch_size_like
----------------------------- -----------------------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like .. autofunction:: paddle.fluid.layers.fill_constant_batch_size_like
:noindex: :noindex:
fill_constant fill_constant
------------- -------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant .. autofunction:: paddle.fluid.layers.fill_constant
:noindex: :noindex:
ones ones
---- ----
.. autofunction:: paddle.v2.fluid.layers.ones .. autofunction:: paddle.fluid.layers.ones
:noindex: :noindex:
zeros zeros
----- -----
.. autofunction:: paddle.v2.fluid.layers.zeros .. autofunction:: paddle.fluid.layers.zeros
:noindex: :noindex:
...@@ -8,24 +8,24 @@ nets ...@@ -8,24 +8,24 @@ nets
simple_img_conv_pool simple_img_conv_pool
-------------------- --------------------
.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool .. autofunction:: paddle.fluid.nets.simple_img_conv_pool
:noindex: :noindex:
sequence_conv_pool sequence_conv_pool
------------------ ------------------
.. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool .. autofunction:: paddle.fluid.nets.sequence_conv_pool
:noindex: :noindex:
glu glu
--- ---
.. autofunction:: paddle.v2.fluid.nets.glu .. autofunction:: paddle.fluid.nets.glu
:noindex: :noindex:
scaled_dot_product_attention scaled_dot_product_attention
---------------------------- ----------------------------
.. autofunction:: paddle.v2.fluid.nets.scaled_dot_product_attention .. autofunction:: paddle.fluid.nets.scaled_dot_product_attention
:noindex: :noindex:
...@@ -8,42 +8,42 @@ optimizer ...@@ -8,42 +8,42 @@ optimizer
SGD SGD
--- ---
.. autoclass:: paddle.v2.fluid.optimizer.SGD .. autoclass:: paddle.fluid.optimizer.SGD
:members: :members:
:noindex: :noindex:
Momentum Momentum
-------- --------
.. autoclass:: paddle.v2.fluid.optimizer.Momentum .. autoclass:: paddle.fluid.optimizer.Momentum
:members: :members:
:noindex: :noindex:
Adagrad Adagrad
------- -------
.. autoclass:: paddle.v2.fluid.optimizer.Adagrad .. autoclass:: paddle.fluid.optimizer.Adagrad
:members: :members:
:noindex: :noindex:
Adam Adam
---- ----
.. autoclass:: paddle.v2.fluid.optimizer.Adam .. autoclass:: paddle.fluid.optimizer.Adam
:members: :members:
:noindex: :noindex:
Adamax Adamax
------ ------
.. autoclass:: paddle.v2.fluid.optimizer.Adamax .. autoclass:: paddle.fluid.optimizer.Adamax
:members: :members:
:noindex: :noindex:
DecayedAdagrad DecayedAdagrad
-------------- --------------
.. autoclass:: paddle.v2.fluid.optimizer.DecayedAdagrad .. autoclass:: paddle.fluid.optimizer.DecayedAdagrad
:members: :members:
:noindex: :noindex:
...@@ -8,14 +8,14 @@ param_attr ...@@ -8,14 +8,14 @@ param_attr
ParamAttr ParamAttr
--------- ---------
.. autoclass:: paddle.v2.fluid.param_attr.ParamAttr .. autoclass:: paddle.fluid.param_attr.ParamAttr
:members: :members:
:noindex: :noindex:
WeightNormParamAttr WeightNormParamAttr
------------------- -------------------
.. autoclass:: paddle.v2.fluid.param_attr.WeightNormParamAttr .. autoclass:: paddle.fluid.param_attr.WeightNormParamAttr
:members: :members:
:noindex: :noindex:
...@@ -8,18 +8,18 @@ profiler ...@@ -8,18 +8,18 @@ profiler
cuda_profiler cuda_profiler
------------- -------------
.. autofunction:: paddle.v2.fluid.profiler.cuda_profiler .. autofunction:: paddle.fluid.profiler.cuda_profiler
:noindex: :noindex:
reset_profiler reset_profiler
-------------- --------------
.. autofunction:: paddle.v2.fluid.profiler.reset_profiler .. autofunction:: paddle.fluid.profiler.reset_profiler
:noindex: :noindex:
profiler profiler
-------- --------
.. autofunction:: paddle.v2.fluid.profiler.profiler .. autofunction:: paddle.fluid.profiler.profiler
:noindex: :noindex:
...@@ -8,20 +8,20 @@ regularizer ...@@ -8,20 +8,20 @@ regularizer
append_regularization_ops append_regularization_ops
------------------------- -------------------------
.. autofunction:: paddle.v2.fluid.regularizer.append_regularization_ops .. autofunction:: paddle.fluid.regularizer.append_regularization_ops
:noindex: :noindex:
L1Decay L1Decay
------- -------
.. autoclass:: paddle.v2.fluid.regularizer.L1Decay .. autoclass:: paddle.fluid.regularizer.L1Decay
:members: :members:
:noindex: :noindex:
L2Decay L2Decay
------- -------
.. autoclass:: paddle.v2.fluid.regularizer.L2Decay .. autoclass:: paddle.fluid.regularizer.L2Decay
:members: :members:
:noindex: :noindex:
API
===
.. toctree::
:maxdepth: 1
模型配置 <v2/model_configs.rst>
数据访问 <v2/data.rst>
训练与应用 <v2/run_logic.rst>
v2/fluid.rst
...@@ -4,7 +4,8 @@ API ...@@ -4,7 +4,8 @@ API
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
overview.rst
v2/model_configs.rst v2/model_configs.rst
v2/data.rst v2/data.rst
v2/run_logic.rst v2/run_logic.rst
v2/fluid.rst fluid/index.rst
V2 API Overview
================
The PaddlePaddle V2 API is designed to provide a modern user interface for PaddlePaddle V1(the original layer-based platform of PaddlePaddle),
it proposes some high-level concepts such as `Layers <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/layer.html>`_ , `Optimizer <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/optimizer.html>`_ , `Evaluator <http://www.paddlepaddle.org/docs/develop/api/en/v2/config/evaluators.html>`_ and `Data Reader <http://www.paddlepaddle.org/docs/develop/api/en/v2/data/data_reader.html>`_ to make the model configuration more familiar to users.
A model is composed of the computation described by a group of `Layers`, with `Evaluator` to define the error, `Optimizer` to update the parameters and `Data Reader` to feed in the data.
We also provide the `interface for Training and Inference <http://www.paddlepaddle.org/docs/develop/api/en/v2/run_logic.html>`_ to help control the training and inference phrase,
it has several easy to use methods
- `paddle.train`
- `paddle.test`
- `paddle.infer`
to better expose the internal running details, different `events <http://www.paddlepaddle.org/docs/develop/api/en/v2/run_logic.html#event>`_ are available to users by writing some callbacks.
======================
Fluid
======================
.. toctree::
:maxdepth: 1
fluid/layers.rst
fluid/data_feeder.rst
fluid/executor.rst
fluid/initializer.rst
fluid/evaluator.rst
fluid/nets.rst
fluid/optimizer.rst
fluid/param_attr.rst
fluid/profiler.rst
fluid/regularizer.rst
fluid/io.rst
...@@ -189,7 +189,7 @@ PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种B ...@@ -189,7 +189,7 @@ PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种B
"WITH_TESTING", "是否开启单元测试", "OFF" "WITH_TESTING", "是否开启单元测试", "OFF"
"WITH_DOC", "是否编译中英文文档", "OFF" "WITH_DOC", "是否编译中英文文档", "OFF"
"WITH_SWIG_PY", "是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练", "Auto" "WITH_SWIG_PY", "是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练", "Auto"
"WITH_GOLANG", "是否编译go语言的可容错parameter server", "ON" "WITH_GOLANG", "是否编译go语言的可容错parameter server", "OFF"
"WITH_MKL", "是否使用MKL数学库,如果为否则是用OpenBLAS", "ON" "WITH_MKL", "是否使用MKL数学库,如果为否则是用OpenBLAS", "ON"
BLAS BLAS
......
...@@ -191,7 +191,7 @@ You can add :code:`-D` argument to pass such options, like: ...@@ -191,7 +191,7 @@ You can add :code:`-D` argument to pass such options, like:
"WITH_TESTING", "Build unit tests", "OFF" "WITH_TESTING", "Build unit tests", "OFF"
"WITH_DOC", "Build documentations", "OFF" "WITH_DOC", "Build documentations", "OFF"
"WITH_SWIG_PY", "Build Python SWIG interface for V2 API", "Auto" "WITH_SWIG_PY", "Build Python SWIG interface for V2 API", "Auto"
"WITH_GOLANG", "Build fault-tolerant parameter server written in go", "ON" "WITH_GOLANG", "Build fault-tolerant parameter server written in go", "OFF"
"WITH_MKL", "Use MKL as BLAS library, else use OpenBLAS", "ON" "WITH_MKL", "Use MKL as BLAS library, else use OpenBLAS", "ON"
......
...@@ -12,7 +12,7 @@ The following table compares concepts in Fluid and Go ...@@ -12,7 +12,7 @@ The following table compares concepts in Fluid and Go
| Go | Fluid | | Go | Fluid |
|----|-------| |----|-------|
|user-defined functions | [layers](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid) | |user-defined functions | [layers](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid) |
| control-flow and built-in functions | [intrinsics/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators) | | control-flow and built-in functions | [intrinsics/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators) |
| goroutines, channels | [class ThreadPool](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework/thread_pool.h) | | goroutines, channels | [class ThreadPool](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework/thread_pool.h) |
| runtime | [class Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h) | | runtime | [class Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h) |
......
...@@ -89,7 +89,7 @@ with train_loop.block(): ...@@ -89,7 +89,7 @@ with train_loop.block():
h[t] = the_step(input[t]) h[t] = the_step(input[t])
``` ```
An actual Fluid example is described [here](https://github.com/PaddlePaddle/Paddle/blob/a91efdde6910ce92a78e3aa7157412c4c88d9ee8/python/paddle/v2/fluid/tests/test_while_op.py#L36-L44). An actual Fluid example is described [here](https://github.com/PaddlePaddle/Paddle/blob/bde090a97564b9c61a6aaa38b72ccc4889d102d9/python/paddle/fluid/tests/unittests/test_while_op.py#L50-L58).
From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop. From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop.
......
...@@ -101,7 +101,7 @@ In-place is a built-in attribute of an operator. Since we treat in-place and oth ...@@ -101,7 +101,7 @@ In-place is a built-in attribute of an operator. Since we treat in-place and oth
#### contruct control flow graph #### contruct control flow graph
Following is the ProgramDesc protobuf of [machine translation](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book/test_machine_translation.py) example. Following is the ProgramDesc protobuf of [machine translation](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_machine_translation.py) example.
- Block0: - Block0:
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
$ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
$ docker run ${CUDA_SO} ${DEVICES} -it paddlepaddle/paddle:latest-gpu $ docker run ${CUDA_SO} ${DEVICES} -it paddlepaddle/paddle:latest-gpu
更多关于Docker的安装与使用, 请参考 `PaddlePaddle Docker 文档 <http://www.paddlepaddle.org/doc_cn/build_and_install/install/docker_install.html>`_ 。 更多关于Docker的安装与使用, 请参考 `PaddlePaddle Docker 文档 <http://www.paddlepaddle.org/docs/0.11.0/documentation/zh/getstarted/build_and_install/docker_install_cn.html>`_ 。
2. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致 2. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致
......
FAQ FAQ
==== ====
本文档对关于PaddlePaddle的一些常见问题提供了解答。如果您的问题未在此处,请您到 `PaddlePaddle社区 <https://github.com/PaddlePaddle/Paddle/issues>`_ 查找答案或直接提 `issue <https://github.com/PaddlePaddle/Paddle/issues/new>`_ ,我们会及时进行回复。
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
......
...@@ -148,10 +148,10 @@ Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异 ...@@ -148,10 +148,10 @@ Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异
.. code-block:: python .. code-block:: python
optimizer = paddle.optimizer.RMSProp( optimizer = paddle.optimizer.RMSProp(
learning_rate=1e-3, learning_rate=1e-3,
gradient_clipping_threshold=10.0, gradient_clipping_threshold=10.0,
regularization=paddle.optimizer.L2Regularization(rate=8e-4)) regularization=paddle.optimizer.L2Regularization(rate=8e-4))
具体可以参考 `nmt_without_attention <https://github.com/PaddlePaddle/models/blob/develop/nmt_without_attention/train.py#L35>`_ 示例。 具体可以参考 `nmt_without_attention <https://github.com/PaddlePaddle/models/blob/develop/nmt_without_attention/train.py#L35>`_ 示例。
...@@ -159,13 +159,13 @@ optimizer = paddle.optimizer.RMSProp( ...@@ -159,13 +159,13 @@ optimizer = paddle.optimizer.RMSProp(
.. code-block:: python .. code-block:: python
decoder_inputs = paddle.layer.fc( decoder_inputs = paddle.layer.fc(
act=paddle.activation.Linear(), act=paddle.activation.Linear(),
size=decoder_size * 3, size=decoder_size * 3,
bias_attr=False, bias_attr=False,
input=[context, current_word], input=[context, current_word],
layer_attr=paddle.attr.ExtraLayerAttribute( layer_attr=paddle.attr.ExtraLayerAttribute(
error_clipping_threshold=100.0)) error_clipping_threshold=100.0))
完整代码可以参考示例 `machine translation <https://github.com/PaddlePaddle/book/blob/develop/08.machine_translation/train.py#L66>`_ 。 完整代码可以参考示例 `machine translation <https://github.com/PaddlePaddle/book/blob/develop/08.machine_translation/train.py#L66>`_ 。
......
...@@ -196,6 +196,6 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数 ...@@ -196,6 +196,6 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数
obj="process", obj="process",
args={"src_dict_path": src_dict_path}) args={"src_dict_path": src_dict_path})
完整源码可参考 `seqToseq <https://github.com/PaddlePaddle/Paddle/tree/develop/demo/seqToseq>`_ 示例。 完整源码可参考 `sequence_recurrent <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/gserver/tests/sequence_recurrent.py>`_ 示例。
C-API预测库 C-API预测库
================== ==================
当我们训练完一个神经网络模型之后,下一步就是用模型来做预测。预测就是准备输入数据,经过模型处理之后,得到预测结果的过程。
相比于模型训练,预测有如下特点:
#. 预测不需要训练过程中反向传播和参数更新的部分。
#. 预测不需要标签(label)。
#. 预测很多时候需要和用户系统整合在一起。
因为上述特点,模型预测SDK需要单独设计,并具备以下特点:
#. 预测SDK不包含反向传播和参数更新部分,以减小SDK的体积。
#. 预测SDK需要提供一个简洁的用户接口,方便使用。
#. 因为输入数据可能有多种结构,对输入数据的格式做清晰简洁的封装。
#. 为了和用户系统兼容,SDK的接口需要是满足C标准的接口。
PaddlePaddle提供了C-API,用于解决上述问题。关于C-API的使用,我们提供了如下指南:
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
......
...@@ -65,6 +65,7 @@ ...@@ -65,6 +65,7 @@
output_file = "output.paddle.model" output_file = "output.paddle.model"
merge_v2_model(net, param_file, output_file) merge_v2_model(net, param_file, output_file)
``` ```
对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)这个示例,可直接运行 `python` [merge_v2_model.py](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense/merge_v2_model.py)。序列化结果会写入当前运行目录下的`output.paddle.model`文件中。使用这种方式,运行时C-API可以通过指定`output.paddle.model`文件的路径来加载预测模型。 对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)这个示例,可直接运行 `python` [merge_v2_model.py](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense/merge_v2_model.py)。序列化结果会写入当前运行目录下的`output.paddle.model`文件中。使用这种方式,运行时C-API可以通过指定`output.paddle.model`文件的路径来加载预测模型。
#### 注意事项 #### 注意事项
......
...@@ -32,7 +32,7 @@ The non-cluster version of this demo with fluid API is as follows: ...@@ -32,7 +32,7 @@ The non-cluster version of this demo with fluid API is as follows:
``` python ``` python
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
x = fluid.layers.data(name='x', shape=[13], dtype='float32') x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None) y_predict = fluid.layers.fc(input=x, size=1, act=None)
...@@ -125,11 +125,11 @@ for pass_id in range(100): ...@@ -125,11 +125,11 @@ for pass_id in range(100):
### E2E demo ### E2E demo
Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py). Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book_distribute/notest_dist_fit_a_line.py).
First `cd` into the folder that contains the `python` files. In this case: First `cd` into the folder that contains the `python` files. In this case:
```bash ```bash
cd /paddle/python/paddle/v2/fluid/tests/book_distribute cd /paddle/python/paddle/fluid/tests/book_distribute
``` ```
In parameter server node run the following in the command line: In parameter server node run the following in the command line:
......
在不同集群中运行 在不同集群中运行
================ ================
用户的集群环境不尽相同,为了方便大家的部署,我们提供了多种的集群部署方式,方便提交集群训练任务,以下将一一介绍:
PaddlePaddle可以使用多种分布式计算平台构建分布式计算任务,包括: `Kubernetes <http://kubernetes.io>`_ 是Google开源的容器集群的调度框架,支持大规模集群生产环境的完整集群方案。以下指南展示了PaddlePaddle对Kubernetes的支持:
- `Kubernetes <http://kubernetes.io>`_ Google开源的容器集群的调度框架,支持大规模集群生产环境的完整集群方案。
- `OpenMPI <https://www.open-mpi.org>`_ 成熟的高性能并行计算框架。
- `Fabric <http://www.fabfile.org>`_ 集群管理工具。可以使用`Fabric`编写集群任务提交和管理脚本。
对于不同的集群平台,会分别介绍集群作业的启动和停止方法。这些例子都可以在 `cluster_train_v2 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/scripts/cluster_train_v2>`_ 找到。 .. toctree::
:maxdepth: 1
k8s_cn.md
k8s_distributed_cn.md
在使用分布式计算平台进行训练时,任务被调度在集群中时,分布式计算平台通常会通过API或者环境变量提供任务运行需要的参数,比如节点的ID、IP和任务节点个数等。 `OpenMPI <https://www.open-mpi.org>`_ 是成熟的高性能并行计算框架,在HPC领域使用非常的广泛。以下指南介绍了如何使用OpenMPI来搭建PaddlePaddle的集群训练任务:
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
fabric_cn.md
openmpi_cn.md openmpi_cn.md
k8s_cn.md
k8s_distributed_cn.md `Fabric <http://www.fabfile.org>`_ 是一个方便的程序部署和管理工具。我们提供了使用Fabric 进行部署、管理的方法,如果想详细了解,请阅读以下指南:
.. toctree::
:maxdepth: 1
fabric_cn.md
我们也支持在AWS上部署PaddlePaddle,详细请了解:
.. toctree::
:maxdepth: 1
k8s_aws_cn.md k8s_aws_cn.md
您可以在 `cluster_train_v2 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/scripts/cluster_train_v2>`_ 找到以上相关的例子。
...@@ -35,7 +35,7 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py ...@@ -35,7 +35,7 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py
``` ```
ncalls tottime percall cumtime percall filename:lineno(function) ncalls tottime percall cumtime percall filename:lineno(function)
1 0.284 0.284 29.514 29.514 main.py:1(<module>) 1 0.284 0.284 29.514 29.514 main.py:1(<module>)
4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/executor.py:20(run) 4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/executor.py:20(run)
4696 12.040 0.003 12.040 0.003 {built-in method run} 4696 12.040 0.003 12.040 0.003 {built-in method run}
1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14(<module>) 1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14(<module>)
``` ```
...@@ -61,9 +61,9 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py ...@@ -61,9 +61,9 @@ cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py
```text ```text
4696 12.040 0.003 12.040 0.003 {built-in method run} 4696 12.040 0.003 12.040 0.003 {built-in method run}
300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader) 300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader)
107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:219(__init__) 107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:219(__init__)
4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) 4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp)
1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/__init__.py:1(<module>) 1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/__init__.py:1(<module>)
``` ```
可以看到最耗时的函数是C++端的`run`函数。这需要联合我们第二节`Python``C++`混合代码的性能分析来进行调优。而`sync_with_cpp`函数的总共耗时很长,每次调用的耗时也很长。于是我们可以点击`sync_with_cpp`的详细信息,了解其调用关系。 可以看到最耗时的函数是C++端的`run`函数。这需要联合我们第二节`Python``C++`混合代码的性能分析来进行调优。而`sync_with_cpp`函数的总共耗时很长,每次调用的耗时也很长。于是我们可以点击`sync_with_cpp`的详细信息,了解其调用关系。
...@@ -76,9 +76,9 @@ Called By: ...@@ -76,9 +76,9 @@ Called By:
Function was called by... Function was called by...
ncalls tottime cumtime ncalls tottime cumtime
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:487(clone) /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:487(clone)
1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:534(append_backward) 1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:534(append_backward)
Called: Called:
......
...@@ -49,7 +49,7 @@ port, we will see the output like the following: ...@@ -49,7 +49,7 @@ port, we will see the output like the following:
``` ```
ncalls tottime percall cumtime percall filename:lineno(function) ncalls tottime percall cumtime percall filename:lineno(function)
1 0.284 0.284 29.514 29.514 main.py:1(<module>) 1 0.284 0.284 29.514 29.514 main.py:1(<module>)
4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/executor.py:20(run) 4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/executor.py:20(run)
4696 12.040 0.003 12.040 0.003 {built-in method run} 4696 12.040 0.003 12.040 0.003 {built-in method run}
1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14(<module>) 1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14(<module>)
``` ```
...@@ -74,9 +74,9 @@ focus on. We can sort above profiling file by tottime: ...@@ -74,9 +74,9 @@ focus on. We can sort above profiling file by tottime:
```text ```text
4696 12.040 0.003 12.040 0.003 {built-in method run} 4696 12.040 0.003 12.040 0.003 {built-in method run}
300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader) 300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader)
107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:219(__init__) 107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:219(__init__)
4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) 4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp)
1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/__init__.py:1(<module>) 1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/__init__.py:1(<module>)
``` ```
We can see that the most time-consuming function is the `built-in We can see that the most time-consuming function is the `built-in
...@@ -93,9 +93,9 @@ Called By: ...@@ -93,9 +93,9 @@ Called By:
Function was called by... Function was called by...
ncalls tottime cumtime ncalls tottime cumtime
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp)
/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:487(clone) /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:487(clone)
1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:534(append_backward) 1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/fluid/framework.py:534(append_backward)
Called: Called:
......
# PaddlePaddle Fluid Source Code Overview # PaddlePaddle Fluid Source Code Overview
Examples: https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid/tests/book Examples: https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/tests/book
Core: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework Core: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework
...@@ -26,16 +26,16 @@ sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) ...@@ -26,16 +26,16 @@ sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
``` ```
- Variables: `x`, `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/framework.py#) - Variables: `x`, `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/framework.py#)
- Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid/layers) - Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/layers)
- Every Layer has one or more operators and variables/parameters - Every Layer has one or more operators and variables/parameters
- All the operators are defined at [`paddle/operators/`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators). Other worth-looking files: - All the operators are defined at [`paddle/operators/`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators). Other worth-looking files:
- Base class: [`paddle/framework/operator.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h) - Base class: [`paddle/framework/operator.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h)
- Operator Registration: [`paddle/framework/op_registry.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h) - Operator Registration: [`paddle/framework/op_registry.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h)
- Operator Lookup: [`paddle/framework/op_info.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_info.h) - Operator Lookup: [`paddle/framework/op_info.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_info.h)
- Optimizer: `fluid.optimizer.SGD`. It does the following - Optimizer: `fluid.optimizer.SGD`. It does the following
- Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/backward.py)] - Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/backward.py)]
- Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/optimizer.py)] - Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/optimizer.py)]
# Run Time # Run Time
...@@ -57,7 +57,7 @@ exe.run(fluid.default_main_program(), ...@@ -57,7 +57,7 @@ exe.run(fluid.default_main_program(),
- Place: `place`. one of CPU, GPU or FPGA. [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h) - Place: `place`. one of CPU, GPU or FPGA. [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h)
- The device handle are at [paddle/platform/device_context.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h) - The device handle are at [paddle/platform/device_context.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h)
- Executor: `fluid.Executor(place)`. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/executor.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)] - Executor: `fluid.Executor(place)`. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/executor.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)]
- Feeds the data: `feed=feeder.feed(data)` - Feeds the data: `feed=feeder.feed(data)`
- Evaluates all the operators - Evaluates all the operators
- Fetches the result: `fetch_list=[avg_cost]` - Fetches the result: `fetch_list=[avg_cost]`
......
...@@ -23,6 +23,12 @@ $ docker build -t username/paddle-android:dev . -f Dockerfile.android ...@@ -23,6 +23,12 @@ $ docker build -t username/paddle-android:dev . -f Dockerfile.android
$ docker pull paddlepaddle/paddle:latest-dev-android $ docker pull paddlepaddle/paddle:latest-dev-android
``` ```
对于国内用户,我们提供了加速访问的镜像源:
```bash
$ docker pull docker.paddlepaddlehub.com/paddle:latest-dev-android
```
### 编译PaddlePaddle C-API库 ### 编译PaddlePaddle C-API库
构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。 构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。
Android的Docker开发镜像向用户提供两个可配置的参数: Android的Docker开发镜像向用户提供两个可配置的参数:
...@@ -56,15 +62,15 @@ Android的Docker开发镜像向用户提供两个可配置的参数: ...@@ -56,15 +62,15 @@ Android的Docker开发镜像向用户提供两个可配置的参数:
- 编译`armeabi-v7a``Android API 21`的PaddlePaddle库 - 编译`armeabi-v7a``Android API 21`的PaddlePaddle库
```bash ```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev $ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev
``` ```
- 编译`arm64-v8a``Android API 21`的PaddlePaddle库 - 编译`arm64-v8a``Android API 21`的PaddlePaddle库
```bash ```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev $ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev
``` ```
执行上述`docker run`命令时,容器默认执行[paddle/scripts/docker/build_android.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh)脚本。该脚本中记录了交叉编译Android版PaddlePaddle库常用的CMake配置,并且会根据`ANDROID_ABI``ANDROID_API`自动构建独立工具链、进行编译和安装。由于arm64架构要求Android API不小于21。因此当`ANDROID_ABI=arm64-v8a``ANDROID_API<21`时,Docker容器中将默认使用`Android API 21`的编译工具链。用户可以参考下文[配置交叉编译参数](#配置交叉编译参数)章节,根据个人的需求修改定制Docker容器所执行的脚本。编译安装结束之后,PaddlePaddle的C-API库将被安装到`$PWD/install_android`目录,所依赖的第三方库同时也被安装到`$PWD/install_android/third_party`目录。 执行上述`docker run`命令时,容器默认执行[paddle/scripts/docker/build_android.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh)脚本。该脚本中记录了交叉编译Android版PaddlePaddle库常用的CMake配置,并且会根据`ANDROID_ABI``ANDROID_API`自动构建独立工具链、进行编译和安装。由于arm64架构要求Android API不小于21。因此当`ANDROID_ABI=arm64-v8a``ANDROID_API<21`时,Docker容器中将默认使用`Android API 21`的编译工具链。用户可以参考下文[配置交叉编译参数](#配置交叉编译参数)章节,根据个人的需求修改定制Docker容器所执行的脚本。编译安装结束之后,PaddlePaddle的C-API库将被安装到`$PWD/install_android`目录,所依赖的第三方库同时也被安装到`$PWD/install_android/third_party`目录。
...@@ -155,7 +161,11 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ ...@@ -155,7 +161,11 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
.. ..
``` ```
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE``MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE``Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS`来影响PaddlePaddle的编译过程。 用户还可根据自己的需求设置其他编译参数。
- 设置`CMAKE_BUILD_TYPE``MinSizeRel`,最小化生成的库的大小。
- 设置`CMAKE_BUILD_TYPE``Release`,获得最快的执行速度,
- 用户亦可以通过手动设置`CMAKE_C/CXX_FLAGS`来影响PaddlePaddle的编译过程。
**性能TIPS**,为了达到最快的计算速度,在CMake参数配置上,有以下建议: **性能TIPS**,为了达到最快的计算速度,在CMake参数配置上,有以下建议:
......
...@@ -25,6 +25,12 @@ Users can directly use the published Docker image. ...@@ -25,6 +25,12 @@ Users can directly use the published Docker image.
$ docker pull paddlepaddle/paddle:latest-dev-android $ docker pull paddlepaddle/paddle:latest-dev-android
``` ```
For users in China, we provide a faster mirror.
```bash
$ docker pull docker.paddlepaddlehub.com/paddle:latest-dev-android
```
### Build the Inference Library ### Build the Inference Library
We can run the Docker image we just created to build the inference library of PaddlePaddle for Android using the command below: We can run the Docker image we just created to build the inference library of PaddlePaddle for Android using the command below:
...@@ -86,19 +92,19 @@ Android NDK includes everything we need to build the [*standalone toolchain*](ht ...@@ -86,19 +92,19 @@ Android NDK includes everything we need to build the [*standalone toolchain*](ht
- To build the standalone toolchain for `armeabi-v7a` and Android API level 21: - To build the standalone toolchain for `armeabi-v7a` and Android API level 21:
```bash ```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \ your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm --platform=android-21 --install-dir=your/path/to/arm_standalone_toolchain --arch=arm --platform=android-21 --install-dir=your/path/to/arm_standalone_toolchain
``` ```
The generated standalone toolchain will be in `your/path/to/arm_standalone_toolchain`. The generated standalone toolchain will be in `your/path/to/arm_standalone_toolchain`.
- To build the standalone toolchain for `arm64-v8a` and Android API level 21: - To build the standalone toolchain for `arm64-v8a` and Android API level 21:
```bash ```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \ your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm64 --platform=android-21 --install-dir=your/path/to/arm64_standalone_toolchain --arch=arm64 --platform=android-21 --install-dir=your/path/to/arm64_standalone_toolchain
``` ```
The generated standalone toolchain will be in `your/path/to/arm64_standalone_toolchain`. The generated standalone toolchain will be in `your/path/to/arm64_standalone_toolchain`.
......
...@@ -56,7 +56,7 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) ...@@ -56,7 +56,7 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor) shape_inference data_transform lod_tensor profiler)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
...@@ -68,9 +68,9 @@ py_proto_compile(framework_py_proto SRCS framework.proto) ...@@ -68,9 +68,9 @@ py_proto_compile(framework_py_proto SRCS framework.proto)
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init) add_dependencies(framework_py_proto framework_py_proto_init)
add_custom_command(TARGET framework_py_proto POST_BUILD add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/proto COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto
COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/proto/ COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/fluid/proto/
COMMENT "Copy generated python proto into directory paddle/v2/fluid/proto." COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
...@@ -80,7 +80,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) ...@@ -80,7 +80,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto backward glog lod_rank_table profiler feed_fetch_method) framework_proto backward glog lod_rank_table feed_fetch_method)
cc_library(prune SRCS prune.cc DEPS framework_proto) cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
......
...@@ -25,7 +25,6 @@ limitations under the License. */ ...@@ -25,7 +25,6 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
DEFINE_bool(check_nan_inf, false, DEFINE_bool(check_nan_inf, false,
...@@ -58,13 +57,13 @@ static void CreateTensor(Variable* var, proto::VarType::Type var_type) { ...@@ -58,13 +57,13 @@ static void CreateTensor(Variable* var, proto::VarType::Type var_type) {
var->GetMutable<ReaderHolder>(); var->GetMutable<ReaderHolder>();
} else if (var_type == proto::VarType::CHANNEL) { } else if (var_type == proto::VarType::CHANNEL) {
var->GetMutable<ChannelHolder>(); var->GetMutable<ChannelHolder>();
} else if (var_type == proto::VarType::NCCL_COM) { } else if (var_type == proto::VarType::RAW) {
// GetMutable will be called in ncclInit // GetMutable will be called in operator
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"Variable type %d is not in " "Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, " "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, NCCL_COM]", "LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
var_type); var_type);
} }
} }
...@@ -126,9 +125,6 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, ...@@ -126,9 +125,6 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
for (auto& op_desc : block.AllOps()) { for (auto& op_desc : block.AllOps()) {
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(op->Type(), pool.Get(place_));
VLOG(3) << place_ << " " << op->DebugStringEx(local_scope); VLOG(3) << place_ << " " << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
......
...@@ -113,7 +113,10 @@ message VarType { ...@@ -113,7 +113,10 @@ message VarType {
PLACE_LIST = 14; PLACE_LIST = 14;
READER = 15; READER = 15;
CHANNEL = 16; CHANNEL = 16;
NCCL_COM = 17; // Any runtime decided variable type is raw
// raw variables should manage their own allocations
// in operators like nccl_op
RAW = 17;
} }
required Type type = 1; required Type type = 1;
...@@ -164,4 +167,6 @@ message BlockDesc { ...@@ -164,4 +167,6 @@ message BlockDesc {
// Please refer to // Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md // https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
// for more details. // for more details.
// TODO(panyx0718): A model can have multiple programs. Need a
// way to distinguish them. Maybe ID or name?
message ProgramDesc { repeated BlockDesc blocks = 1; } message ProgramDesc { repeated BlockDesc blocks = 1; }
...@@ -31,8 +31,14 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) { ...@@ -31,8 +31,14 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) {
os << "{"; os << "{";
for (auto &v : lod) { for (auto &v : lod) {
os << "{"; os << "{";
bool is_first = true;
for (auto &i : v) { for (auto &i : v) {
os << i << ","; if (is_first) {
os << i;
is_first = false;
} else {
os << ", " << i;
}
} }
os << "}"; os << "}";
} }
......
...@@ -125,6 +125,8 @@ class OpDesc { ...@@ -125,6 +125,8 @@ class OpDesc {
BlockDesc *Block() { return this->block_; } BlockDesc *Block() { return this->block_; }
const BlockDesc &BlockRef() const { return *this->block_; }
void SetBlock(BlockDesc *block) { this->block_ = block; } void SetBlock(BlockDesc *block) { this->block_ = block; }
private: private:
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
...@@ -497,7 +498,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -497,7 +498,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
this->InferShape(&infer_shape_ctx); this->InferShape(&infer_shape_ctx);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto dev_ctx = pool.Get(place); auto dev_ctx = pool.Get(place);
// profile
platform::RecordEvent record_event(Type(), dev_ctx);
// check if op[type] has kernel registered. // check if op[type] has kernel registered.
auto& all_op_kernels = AllOpKernels(); auto& all_op_kernels = AllOpKernels();
auto kernels_iter = all_op_kernels.find(type_); auto kernels_iter = all_op_kernels.find(type_);
......
...@@ -32,23 +32,11 @@ void ReadBinaryFile(const std::string& filename, std::string& contents) { ...@@ -32,23 +32,11 @@ void ReadBinaryFile(const std::string& filename, std::string& contents) {
inputfs.close(); inputfs.close();
} }
bool IsParameter(const framework::VarDesc* var, bool IsPersistable(const framework::VarDesc* var) {
const framework::ProgramDesc& main_program) { if (var->Persistable() &&
if (var->Persistable()) { var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
// There are many unreachable variables in the program var->GetType() != framework::proto::VarType::FETCH_LIST) {
for (size_t i = 0; i < main_program.Size(); ++i) { return true;
const framework::BlockDesc& block = main_program.Block(i);
for (auto* op : block.AllOps()) {
if (op->Type() == framework::kFeedOpType) {
continue;
}
for (auto input_argument_name : op->InputArgumentNames()) {
if (input_argument_name == var->Name()) {
return true;
}
}
}
}
} }
return false; return false;
} }
...@@ -65,8 +53,8 @@ void LoadPersistables(framework::Executor& executor, ...@@ -65,8 +53,8 @@ void LoadPersistables(framework::Executor& executor,
std::vector<std::string> paramlist; std::vector<std::string> paramlist;
for (auto* var : global_block.AllVars()) { for (auto* var : global_block.AllVars()) {
if (IsParameter(var, main_program)) { if (IsPersistable(var)) {
VLOG(3) << "parameter's name: " << var->Name(); VLOG(3) << "persistable variable's name: " << var->Name();
framework::VarDesc* new_var = load_block->Var(var->Name()); framework::VarDesc* new_var = load_block->Var(var->Name());
new_var->SetShape(var->GetShape()); new_var->SetShape(var->GetShape());
...@@ -101,7 +89,6 @@ void LoadPersistables(framework::Executor& executor, ...@@ -101,7 +89,6 @@ void LoadPersistables(framework::Executor& executor,
executor.Run(*load_program, &scope, 0, true, true); executor.Run(*load_program, &scope, 0, true, true);
VLOG(3) << "Ran loading successfully";
delete load_program; delete load_program;
} }
......
...@@ -4,7 +4,7 @@ function(inference_test TARGET_NAME) ...@@ -4,7 +4,7 @@ function(inference_test TARGET_NAME)
set(multiValueArgs ARGS) set(multiValueArgs ARGS)
cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(PYTHON_TESTS_DIR ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/tests) set(PYTHON_TESTS_DIR ${PADDLE_SOURCE_DIR}/python/paddle/fluid/tests)
set(arg_list "") set(arg_list "")
if(inference_test_ARGS) if(inference_test_ARGS)
foreach(arg ${inference_test_ARGS}) foreach(arg ${inference_test_ARGS})
...@@ -30,5 +30,5 @@ inference_test(label_semantic_roles) ...@@ -30,5 +30,5 @@ inference_test(label_semantic_roles)
inference_test(recognize_digits ARGS mlp conv) inference_test(recognize_digits ARGS mlp conv)
inference_test(recommender_system) inference_test(recommender_system)
#inference_test(rnn_encoder_decoder) #inference_test(rnn_encoder_decoder)
inference_test(understand_sentiment) inference_test(understand_sentiment ARGS conv)
inference_test(word2vec) inference_test(word2vec)
...@@ -32,16 +32,42 @@ TEST(inference, label_semantic_roles) { ...@@ -32,16 +32,42 @@ TEST(inference, label_semantic_roles) {
paddle::framework::LoDTensor word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, paddle::framework::LoDTensor word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1,
ctx_p2, mark; ctx_p2, mark;
paddle::framework::LoD lod{{0, 4, 10}}; paddle::framework::LoD lod{{0, 4, 10}};
int64_t word_dict_len = 44068;
SetupLoDTensor(word, lod, static_cast<int64_t>(0), static_cast<int64_t>(1)); int64_t predicate_dict_len = 3162;
SetupLoDTensor( int64_t mark_dict_len = 2;
predicate, lod, static_cast<int64_t>(0), static_cast<int64_t>(1));
SetupLoDTensor(ctx_n2, lod, static_cast<int64_t>(0), static_cast<int64_t>(1)); SetupLoDTensor(word,
SetupLoDTensor(ctx_n1, lod, static_cast<int64_t>(0), static_cast<int64_t>(1)); lod,
SetupLoDTensor(ctx_0, lod, static_cast<int64_t>(0), static_cast<int64_t>(1)); static_cast<int64_t>(0),
SetupLoDTensor(ctx_p1, lod, static_cast<int64_t>(0), static_cast<int64_t>(1)); static_cast<int64_t>(word_dict_len - 1));
SetupLoDTensor(ctx_p2, lod, static_cast<int64_t>(0), static_cast<int64_t>(1)); SetupLoDTensor(predicate,
SetupLoDTensor(mark, lod, static_cast<int64_t>(0), static_cast<int64_t>(1)); lod,
static_cast<int64_t>(0),
static_cast<int64_t>(predicate_dict_len - 1));
SetupLoDTensor(ctx_n2,
lod,
static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
SetupLoDTensor(ctx_n1,
lod,
static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
SetupLoDTensor(ctx_0,
lod,
static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
SetupLoDTensor(ctx_p1,
lod,
static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
SetupLoDTensor(ctx_p2,
lod,
static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
SetupLoDTensor(mark,
lod,
static_cast<int64_t>(0),
static_cast<int64_t>(mark_dict_len - 1));
std::vector<paddle::framework::LoDTensor*> cpu_feeds; std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&word); cpu_feeds.push_back(&word);
......
...@@ -31,7 +31,12 @@ TEST(inference, understand_sentiment) { ...@@ -31,7 +31,12 @@ TEST(inference, understand_sentiment) {
paddle::framework::LoDTensor words; paddle::framework::LoDTensor words;
paddle::framework::LoD lod{{0, 4, 10}}; paddle::framework::LoD lod{{0, 4, 10}};
SetupLoDTensor(words, lod, static_cast<int64_t>(0), static_cast<int64_t>(10)); int64_t word_dict_len = 5147;
SetupLoDTensor(words,
lod,
static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
std::vector<paddle::framework::LoDTensor*> cpu_feeds; std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&words); cpu_feeds.push_back(&words);
......
...@@ -31,12 +31,12 @@ TEST(inference, word2vec) { ...@@ -31,12 +31,12 @@ TEST(inference, word2vec) {
paddle::framework::LoDTensor first_word, second_word, third_word, fourth_word; paddle::framework::LoDTensor first_word, second_word, third_word, fourth_word;
paddle::framework::LoD lod{{0, 1}}; paddle::framework::LoD lod{{0, 1}};
int64_t dict_size = 2072; // Hard-coding the size of dictionary int64_t dict_size = 2073; // The size of dictionary
SetupLoDTensor(first_word, lod, static_cast<int64_t>(0), dict_size); SetupLoDTensor(first_word, lod, static_cast<int64_t>(0), dict_size - 1);
SetupLoDTensor(second_word, lod, static_cast<int64_t>(0), dict_size); SetupLoDTensor(second_word, lod, static_cast<int64_t>(0), dict_size - 1);
SetupLoDTensor(third_word, lod, static_cast<int64_t>(0), dict_size); SetupLoDTensor(third_word, lod, static_cast<int64_t>(0), dict_size - 1);
SetupLoDTensor(fourth_word, lod, static_cast<int64_t>(0), dict_size); SetupLoDTensor(fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);
std::vector<paddle::framework::LoDTensor*> cpu_feeds; std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&first_word); cpu_feeds.push_back(&first_word);
......
...@@ -101,8 +101,8 @@ void TestInference(const std::string& dirname, ...@@ -101,8 +101,8 @@ void TestInference(const std::string& dirname,
if (IsCombined) { if (IsCombined) {
// All parameters are saved in a single file. // All parameters are saved in a single file.
// Hard-coding the file names of program and parameters in unittest. // Hard-coding the file names of program and parameters in unittest.
// Users are free to specify different filename // The file names should be consistent with that used in Python API
// (provided: the filenames are changed in the python api as well: io.py) // `fluid.io.save_inference_model`.
std::string prog_filename = "__model_combined__"; std::string prog_filename = "__model_combined__";
std::string param_filename = "__params_combined__"; std::string param_filename = "__params_combined__";
inference_program = paddle::inference::Load(executor, inference_program = paddle::inference::Load(executor,
......
...@@ -11,6 +11,8 @@ function(op_library TARGET) ...@@ -11,6 +11,8 @@ function(op_library TARGET)
set(cc_srcs) set(cc_srcs)
set(cu_srcs) set(cu_srcs)
set(cu_cc_srcs) set(cu_cc_srcs)
set(cudnn_cu_cc_srcs)
set(CUDNN_FILE)
set(op_common_deps operator op_registry math_function) set(op_common_deps operator op_registry math_function)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
...@@ -30,10 +32,16 @@ function(op_library TARGET) ...@@ -30,10 +32,16 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu)
endif() endif()
string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
endif()
else() else()
foreach(src ${op_library_SRCS}) foreach(src ${op_library_SRCS})
if (${src} MATCHES ".*\\.cu$") if (${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src}) list(APPEND cu_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND cudnn_cu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cu.cc$") elseif(${src} MATCHES ".*\\.cu.cc$")
list(APPEND cu_cc_srcs ${src}) list(APPEND cu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$") elseif(${src} MATCHES ".*\\.cc$")
...@@ -54,7 +62,7 @@ function(op_library TARGET) ...@@ -54,7 +62,7 @@ function(op_library TARGET)
set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE) set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE)
endif() endif()
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
else() else()
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS} cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS}
...@@ -98,6 +106,12 @@ function(op_library TARGET) ...@@ -98,6 +106,12 @@ function(op_library TARGET)
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len)
if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
# pybind USE_OP # pybind USE_OP
if (${pybind_flag} EQUAL 0) if (${pybind_flag} EQUAL 0)
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
...@@ -141,6 +155,7 @@ op_library(print_op DEPS lod_tensor) ...@@ -141,6 +155,7 @@ op_library(print_op DEPS lod_tensor)
op_library(adagrad_op DEPS selected_rows_functor) op_library(adagrad_op DEPS selected_rows_functor)
op_library(maxout_op DEPS maxouting) op_library(maxout_op DEPS maxouting)
op_library(unpool_op DEPS unpooling) op_library(unpool_op DEPS unpooling)
op_library(pool_op DEPS pooling)
op_library(pool_with_index_op DEPS pooling) op_library(pool_with_index_op DEPS pooling)
op_library(lod_rank_table_op DEPS lod_rank_table) op_library(lod_rank_table_op DEPS lod_rank_table)
op_library(lod_tensor_to_array_op DEPS lod_rank_table_op) op_library(lod_tensor_to_array_op DEPS lod_rank_table_op)
...@@ -152,43 +167,17 @@ op_library(lstm_op DEPS sequence2batch lstm_compute) ...@@ -152,43 +167,17 @@ op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(lstmp_op DEPS sequence2batch lstm_compute) op_library(lstmp_op DEPS sequence2batch lstm_compute)
op_library(gru_op DEPS sequence2batch gru_compute) op_library(gru_op DEPS sequence2batch gru_compute)
op_library(recurrent_op DEPS executor) op_library(recurrent_op DEPS executor)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
op_library(cos_sim_op DEPS cos_sim_functor) op_library(cos_sim_op DEPS cos_sim_functor)
op_library(parallel_do_op DEPS executor) op_library(parallel_do_op DEPS executor)
op_library(create_reader_op DEPS reader) op_library(create_reader_op DEPS reader)
# Regist multiple Kernel to pybind
if (WITH_GPU) if (WITH_GPU)
op_library(conv_op DEPS vol2col depthwise_conv)
op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS
vol2col depthwise_conv)
op_library(edit_distance_op SRCS edit_distance_op.cc edit_distance_op.cu DEPS math_function)
op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling)
op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
conv_transpose_cudnn_op.cu.cc DEPS vol2col)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d, CUDNN);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(pool2d, CUDNN);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d_transpose, CUDNN);\n")
else() else()
op_library(conv_op SRCS conv_op.cc DEPS vol2col) op_library(conv_op DEPS vol2col)
op_library(pool_op SRCS pool_op.cc DEPS pooling)
op_library(conv_transpose_op SRCS conv_transpose_op.cc DEPS vol2col)
endif() endif()
op_library(conv_transpose_op DEPS vol2col)
cc_library(batch_size_like SRCS batch_size_like.cc DEPS op_registry)
op_library(fill_constant_batch_size_like_op
SRCS fill_constant_batch_size_like_op.cc fill_constant_batch_size_like_op.cu.cc
DEPS batch_size_like)
op_library(uniform_random_batch_size_like_op
SRCS uniform_random_batch_size_like_op.cc
DEPS batch_size_like uniform_random_op)
op_library(gaussian_random_batch_size_like_op
SRCS gaussian_random_batch_size_like_op.cc
DEPS batch_size_like gaussian_random_op)
# FIXME(typhoonzero): save/load depends lodtensor serialization functions # FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library(save_op DEPS lod_tensor) op_library(save_op DEPS lod_tensor)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/batch_size_like.h"
namespace paddle {
namespace operators {
void BatchSizeLikeOp::InferShape(framework::InferShapeContext *ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of %s should not be null.", Type());
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of %s should not be null.",
Type());
auto &shape = ctx->Attrs().Get<std::vector<int>>("shape");
PADDLE_ENFORCE_GT(shape.size(), 0);
std::vector<int64_t> shape_int64(shape.size(), 0);
std::transform(shape.begin(), shape.end(), shape_int64.begin(),
[](int a) { return static_cast<int64_t>(a); });
auto output_dim = framework::make_ddim(shape_int64);
int input_dim_idx = ctx->Attrs().Get<int>("input_dim_idx");
PADDLE_ENFORCE_GE(input_dim_idx, 0);
PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), input_dim_idx);
int output_dim_idx = ctx->Attrs().Get<int>("output_dim_idx");
PADDLE_ENFORCE_GE(output_dim_idx, 0);
PADDLE_ENFORCE_GT(static_cast<int>(shape.size()), output_dim_idx);
output_dim[output_dim_idx] = ctx->GetInputDim("Input")[input_dim_idx];
ctx->SetOutputDim("Out", output_dim);
}
BatchSizeLikeOpMaker::BatchSizeLikeOpMaker(OpProto *proto,
OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor) Tensor "
"whose input_dim_idx'th dimension specifies the batch_size");
AddOutput("Out",
"(Tensor) Tensor of specified shape will be filled "
"with the specified value");
AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output");
AddAttr<int>("input_dim_idx",
"(int, default 0) The index of input's batch size dimension")
.SetDefault(0);
AddAttr<int>("output_dim_idx",
"(int, default 0) The index of output's batch size dimension")
.SetDefault(0);
}
} // namespace operators
} // namespace paddle
...@@ -24,12 +24,50 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel { ...@@ -24,12 +24,50 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override; void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of %s should not be null.", Type());
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of %s should not be null.", Type());
auto &shape = ctx->Attrs().Get<std::vector<int>>("shape");
PADDLE_ENFORCE_GT(shape.size(), 0);
std::vector<int64_t> shape_int64(shape.size(), 0);
std::transform(shape.begin(), shape.end(), shape_int64.begin(),
[](int a) { return static_cast<int64_t>(a); });
auto output_dim = framework::make_ddim(shape_int64);
int input_dim_idx = ctx->Attrs().Get<int>("input_dim_idx");
PADDLE_ENFORCE_GE(input_dim_idx, 0);
PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), input_dim_idx);
int output_dim_idx = ctx->Attrs().Get<int>("output_dim_idx");
PADDLE_ENFORCE_GE(output_dim_idx, 0);
PADDLE_ENFORCE_GT(static_cast<int>(shape.size()), output_dim_idx);
output_dim[output_dim_idx] = ctx->GetInputDim("Input")[input_dim_idx];
ctx->SetOutputDim("Out", output_dim);
}
}; };
class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker { class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BatchSizeLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker); BatchSizeLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor) Tensor "
"whose input_dim_idx'th dimension specifies the batch_size");
AddOutput("Out",
"(Tensor) Tensor of specified shape will be filled "
"with the specified value");
AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output");
AddAttr<int>("input_dim_idx",
"(int, default 0) The index of input's batch size dimension")
.SetDefault(0);
AddAttr<int>("output_dim_idx",
"(int, default 0) The index of output's batch size dimension")
.SetDefault(0);
}
}; };
} // namespace operators } // namespace operators
......
...@@ -94,6 +94,38 @@ class BipartiteMatchKernel : public framework::OpKernel<T> { ...@@ -94,6 +94,38 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
} }
} }
void ArgMaxMatch(const Tensor& dist, int* match_indices, T* match_dist,
T overlap_threshold) const {
constexpr T kEPS = static_cast<T>(1e-6);
int64_t row = dist.dims()[0];
int64_t col = dist.dims()[1];
auto* dist_data = dist.data<T>();
for (int64_t j = 0; j < col; ++j) {
if (match_indices[j] != -1) {
// the j-th column has been matched to one entity.
continue;
}
int max_row_idx = -1;
T max_dist = -1;
for (int i = 0; i < row; ++i) {
T dist = dist_data[i * col + j];
if (dist < kEPS) {
// distance is 0 between m-th row and j-th column
continue;
}
if (dist >= overlap_threshold && dist > max_dist) {
max_row_idx = i;
max_dist = dist;
}
}
if (max_row_idx != -1) {
PADDLE_ENFORCE_EQ(match_indices[j], -1);
match_indices[j] = max_row_idx;
match_dist[j] = max_dist;
}
}
}
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* dist_mat = context.Input<LoDTensor>("DistMat"); auto* dist_mat = context.Input<LoDTensor>("DistMat");
auto* match_indices = context.Output<Tensor>("ColToRowMatchIndices"); auto* match_indices = context.Output<Tensor>("ColToRowMatchIndices");
...@@ -120,13 +152,21 @@ class BipartiteMatchKernel : public framework::OpKernel<T> { ...@@ -120,13 +152,21 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
int* indices = match_indices->data<int>(); int* indices = match_indices->data<int>();
T* dist = match_dist->data<T>(); T* dist = match_dist->data<T>();
auto type = context.Attr<std::string>("match_type");
auto threshold = context.Attr<float>("dist_threshold");
if (n == 1) { if (n == 1) {
BipartiteMatch(*dist_mat, indices, dist); BipartiteMatch(*dist_mat, indices, dist);
if (type == "per_prediction") {
ArgMaxMatch(*dist_mat, indices, dist, threshold);
}
} else { } else {
auto lod = dist_mat->lod().back(); auto lod = dist_mat->lod().back();
for (size_t i = 0; i < lod.size() - 1; ++i) { for (size_t i = 0; i < lod.size() - 1; ++i) {
Tensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]); Tensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]);
BipartiteMatch(one_ins, indices + i * col, dist + i * col); BipartiteMatch(one_ins, indices + i * col, dist + i * col);
if (type == "per_prediction") {
ArgMaxMatch(one_ins, indices + i * col, dist + i * col, threshold);
}
} }
} }
} }
...@@ -147,6 +187,19 @@ class BipartiteMatchOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -147,6 +187,19 @@ class BipartiteMatchOpMaker : public framework::OpProtoAndCheckerMaker {
"This tensor can contain LoD information to represent a batch of " "This tensor can contain LoD information to represent a batch of "
"inputs. One instance of this batch can contain different numbers of " "inputs. One instance of this batch can contain different numbers of "
"entities."); "entities.");
AddAttr<std::string>(
"match_type",
"(string, defalut: per_prediction) "
"The type of matching method, should be 'bipartite' or "
"'per_prediction', 'bipartite' by defalut.")
.SetDefault("bipartite")
.InEnum({"bipartite", "per_prediction"});
AddAttr<float>(
"dist_threshold",
"(float, defalut: 0.5) "
"If `match_type` is 'per_prediction', this threshold is to determine "
"the extra matching bboxes based on the maximum distance.")
.SetDefault(0.5);
AddOutput("ColToRowMatchIndices", AddOutput("ColToRowMatchIndices",
"(Tensor) A 2-D Tensor with shape [N, M] in int type. " "(Tensor) A 2-D Tensor with shape [N, M] in int type. "
"N is the batch size. If ColToRowMatchIndices[i][j] is -1, it " "N is the batch size. If ColToRowMatchIndices[i][j] is -1, it "
...@@ -168,10 +221,10 @@ distance matrix. For input 2D matrix, the bipartite matching algorithm can ...@@ -168,10 +221,10 @@ distance matrix. For input 2D matrix, the bipartite matching algorithm can
find the matched column for each row, also can find the matched row for find the matched column for each row, also can find the matched row for
each column. And this operator only calculate matched indices from column each column. And this operator only calculate matched indices from column
to row. For each instance, the number of matched indices is the number of to row. For each instance, the number of matched indices is the number of
of columns of the input ditance matrix. of columns of the input distance matrix.
There are two outputs to save matched indices and distance. There are two outputs to save matched indices and distance.
A simple description, this algothrim matched the best (maximum distance) A simple description, this algorithm matched the best (maximum distance)
row entity to the column entity and the matched indices are not duplicated row entity to the column entity and the matched indices are not duplicated
in each row of ColToRowMatchIndices. If the column entity is not matched in each row of ColToRowMatchIndices. If the column entity is not matched
any row entity, set -1 in ColToRowMatchIndices. any row entity, set -1 in ColToRowMatchIndices.
......
...@@ -83,7 +83,7 @@ class CompareOp : public framework::OperatorWithKernel { ...@@ -83,7 +83,7 @@ class CompareOp : public framework::OperatorWithKernel {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
#define REGISTER_LOGICAL_OP(op_type, _equation) \ #define REGISTER_COMPARE_OP(op_type, _equation) \
struct _##op_type##Comment { \ struct _##op_type##Comment { \
static char type[]; \ static char type[]; \
static char equation[]; \ static char equation[]; \
...@@ -96,11 +96,17 @@ class CompareOp : public framework::OperatorWithKernel { ...@@ -96,11 +96,17 @@ class CompareOp : public framework::OperatorWithKernel {
::paddle::operators::CompareOpInferShape<_##op_type##Comment>, \ ::paddle::operators::CompareOpInferShape<_##op_type##Comment>, \
::paddle::framework::EmptyGradOpMaker); ::paddle::framework::EmptyGradOpMaker);
REGISTER_LOGICAL_OP(less_than, "Out = X < Y"); REGISTER_COMPARE_OP(less_than, "Out = X < Y");
REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor); REGISTER_COMPARE_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y"); REGISTER_COMPARE_OP(less_equal, "Out = X <= Y");
REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor); REGISTER_COMPARE_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_OP(equal, "Out = X == Y"); REGISTER_COMPARE_OP(greater_than, "Out = X > Y");
REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor); REGISTER_COMPARE_KERNEL(greater_than, CPU,
REGISTER_LOGICAL_OP(not_equal, "Out = X != Y"); paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_KERNEL(not_equal, CPU, paddle::operators::NotEqualFunctor); REGISTER_COMPARE_OP(greater_equal, "Out = X >= Y");
REGISTER_COMPARE_KERNEL(greater_equal, CPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_COMPARE_OP(equal, "Out = X == Y");
REGISTER_COMPARE_KERNEL(equal, CPU, paddle::operators::EqualFunctor);
REGISTER_COMPARE_OP(not_equal, "Out = X != Y");
REGISTER_COMPARE_KERNEL(not_equal, CPU, paddle::operators::NotEqualFunctor);
...@@ -14,7 +14,11 @@ limitations under the License. */ ...@@ -14,7 +14,11 @@ limitations under the License. */
#include "paddle/fluid/operators/compare_op.h" #include "paddle/fluid/operators/compare_op.h"
REGISTER_LOGICAL_KERNEL(less_than, CUDA, paddle::operators::LessThanFunctor); REGISTER_COMPARE_KERNEL(less_than, CUDA, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_KERNEL(less_equal, CUDA, paddle::operators::LessEqualFunctor); REGISTER_COMPARE_KERNEL(less_equal, CUDA, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_KERNEL(equal, CUDA, paddle::operators::EqualFunctor); REGISTER_COMPARE_KERNEL(greater_than, CUDA,
REGISTER_LOGICAL_KERNEL(not_equal, CUDA, paddle::operators::NotEqualFunctor); paddle::operators::GreaterThanFunctor);
REGISTER_COMPARE_KERNEL(greater_equal, CUDA,
paddle::operators::GreaterEqualFunctor);
REGISTER_COMPARE_KERNEL(equal, CUDA, paddle::operators::EqualFunctor);
REGISTER_COMPARE_KERNEL(not_equal, CUDA, paddle::operators::NotEqualFunctor);
...@@ -34,6 +34,18 @@ struct LessEqualFunctor { ...@@ -34,6 +34,18 @@ struct LessEqualFunctor {
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; } HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; }
}; };
template <typename T>
struct GreaterThanFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; }
};
template <typename T>
struct GreaterEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; }
};
template <typename T> template <typename T>
struct EqualFunctor { struct EqualFunctor {
using ELEM_TYPE = T; using ELEM_TYPE = T;
...@@ -76,7 +88,7 @@ class CompareOpKernel ...@@ -76,7 +88,7 @@ class CompareOpKernel
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
#define REGISTER_LOGICAL_KERNEL(op_type, dev, functor) \ #define REGISTER_COMPARE_KERNEL(op_type, dev, functor) \
REGISTER_OP_##dev##_KERNEL( \ REGISTER_OP_##dev##_KERNEL( \
op_type, ::paddle::operators::CompareOpKernel< \ op_type, ::paddle::operators::CompareOpKernel< \
::paddle::platform::dev##DeviceContext, functor<int>>, \ ::paddle::platform::dev##DeviceContext, functor<int>>, \
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/operators/strided_memcpy.h"
...@@ -34,12 +35,46 @@ class ConcatKernel : public framework::OpKernel<T> { ...@@ -34,12 +35,46 @@ class ConcatKernel : public framework::OpKernel<T> {
auto out_stride = framework::stride_numel(out->dims()); auto out_stride = framework::stride_numel(out->dims());
size_t output_offset = 0; size_t output_offset = 0;
for (auto* in : ins) {
auto in_stride = framework::stride_numel(in->dims()); // If axis >=1, copy to out immediately need to call many times
StridedNumelCopyWithAxis<T>(ctx.device_context(), axis, // of cuda memcpy. Copy the input to cpu and do the stride copy,
out->data<T>() + output_offset, out_stride, // then copy to gpu output.
in->data<T>(), in_stride, in_stride[axis]);
output_offset += in_stride[axis]; if (platform::is_gpu_place(place) && axis >= 1) {
platform::CPUPlace copy_place;
auto& cpu_ctx = *platform::DeviceContextPool::Instance().Get(copy_place);
framework::Tensor cpu_out;
cpu_out.Resize(out->dims());
cpu_out.mutable_data<T>(copy_place);
auto& dev_ctx = ctx.device_context();
std::vector<std::unique_ptr<framework::Tensor>> cpu_ins;
for (auto* in : ins) {
std::unique_ptr<framework::Tensor> cpu_in(new framework::Tensor);
framework::TensorCopy(*in, copy_place, dev_ctx, cpu_in.get());
cpu_ins.emplace_back(std::move(cpu_in));
}
// TODO(dzhwinter): overlap copy and compute stream
// https://devblogs.nvidia.com/how-overlap-data-transfers-cuda-cc/
dev_ctx.Wait();
for (auto& in : cpu_ins) {
auto& cpu_in = *in.get();
auto in_stride = framework::stride_numel(cpu_in.dims());
StridedNumelCopyWithAxis<T>(
cpu_ctx, axis, cpu_out.data<T>() + output_offset, out_stride,
cpu_in.data<T>(), in_stride, in_stride[axis]);
output_offset += in_stride[axis];
}
framework::TensorCopy(cpu_out, place, dev_ctx, out);
} else {
for (auto* in : ins) {
auto in_stride = framework::stride_numel(in->dims());
StridedNumelCopyWithAxis<T>(ctx.device_context(), axis,
out->data<T>() + output_offset, out_stride,
in->data<T>(), in_stride, in_stride[axis]);
output_offset += in_stride[axis];
}
} }
} }
}; };
......
...@@ -54,12 +54,6 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -54,12 +54,6 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
PADDLE_ENFORCE(in_dims[i + 2] + 2 * paddings[i] -
(dilations[i] * (filter_dims[i + 2] - 1) + 1) >
0,
"Due to the settings of paddings, filter_dims and "
"dilations, the output size is less than 0, please check "
"again.");
output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], output_shape.push_back(ConvOutputSize(in_dims[i + 2], filter_dims[i + 2],
dilations[i], paddings[i], dilations[i], paddings[i],
strides[i])); strides[i]));
......
...@@ -31,7 +31,14 @@ using Tensor = framework::Tensor; ...@@ -31,7 +31,14 @@ using Tensor = framework::Tensor;
inline int ConvOutputSize(int input_size, int filter_size, int dilation, inline int ConvOutputSize(int input_size, int filter_size, int dilation,
int padding, int stride) { int padding, int stride) {
const int dkernel = dilation * (filter_size - 1) + 1; const int dkernel = dilation * (filter_size - 1) + 1;
const int output_size = (input_size + 2 * padding - dkernel) / stride + 1; int output_size = (input_size + 2 * padding - dkernel) / stride + 1;
PADDLE_ENFORCE(
output_size > 0,
"Due to the settings of padding(%d), filter_size(%d), dilation(%d) and "
"stride(%d), the output size is less than 0, please check "
"again. Input_size:%d",
padding, filter_size, dilation, stride, input_size);
return output_size; return output_size;
} }
inline bool IsExpand(std::vector<int64_t>& filter_dim, inline bool IsExpand(std::vector<int64_t>& filter_dim,
......
...@@ -177,8 +177,8 @@ std::shared_ptr<grpc::Channel> RPCClient::GetChannel(const std::string& ep) { ...@@ -177,8 +177,8 @@ std::shared_ptr<grpc::Channel> RPCClient::GetChannel(const std::string& ep) {
args.SetMaxSendMessageSize(std::numeric_limits<int>::max()); args.SetMaxSendMessageSize(std::numeric_limits<int>::max());
args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max()); args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
auto ch = std::shared_ptr<grpc::Channel>( auto ch =
grpc::CreateCustomChannel(ep, grpc::InsecureChannelCredentials(), args)); grpc::CreateCustomChannel(ep, grpc::InsecureChannelCredentials(), args);
channels_[ep] = ch; channels_[ep] = ch;
return ch; return ch;
......
...@@ -129,6 +129,8 @@ class ListenAndServOp : public framework::OperatorBase { ...@@ -129,6 +129,8 @@ class ListenAndServOp : public framework::OperatorBase {
} }
if (exit_flag) { if (exit_flag) {
rpc_service_->ShutDown(); rpc_service_->ShutDown();
rpc_service_->SetCond(1);
break;
} }
try { try {
executor.Run(*program, &recv_scope, block->ID(), /*global_block*/ executor.Run(*program, &recv_scope, block->ID(), /*global_block*/
......
...@@ -65,7 +65,7 @@ class NCCLInitOpVarTypeInference : public framework::VarTypeInference { ...@@ -65,7 +65,7 @@ class NCCLInitOpVarTypeInference : public framework::VarTypeInference {
framework::BlockDesc *block) const override { framework::BlockDesc *block) const override {
auto out_var_name = op_desc.Output("Communicator").front(); auto out_var_name = op_desc.Output("Communicator").front();
auto &out_var = block->FindRecursiveOrCreateVar(out_var_name); auto &out_var = block->FindRecursiveOrCreateVar(out_var_name);
auto var_type = framework::proto::VarType::NCCL_COM; auto var_type = framework::proto::VarType::RAW;
out_var.SetType(var_type); out_var.SetType(var_type);
} }
}; };
......
...@@ -19,6 +19,11 @@ namespace operators { ...@@ -19,6 +19,11 @@ namespace operators {
int PoolOutputSize(int input_size, int filter_size, int padding, int stride) { int PoolOutputSize(int input_size, int filter_size, int padding, int stride) {
int output_size = (input_size - filter_size + 2 * padding) / stride + 1; int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
PADDLE_ENFORCE(output_size > 0,
"Due to the settings of padding(%d), filter_size(%d) and "
"stride(%d), the output size is less than 0, please check "
"again. Input_size:%d",
padding, filter_size, stride, input_size);
return output_size; return output_size;
} }
......
...@@ -121,10 +121,15 @@ class ReshapeGradOp : public framework::OperatorWithKernel { ...@@ -121,10 +121,15 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
REGISTER_OP(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, reshape_grad, REGISTER_OP(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, reshape_grad,
ops::ReshapeGradOp); ops::ReshapeGradOp);
REGISTER_OP_CPU_KERNEL(reshape, REGISTER_OP_CPU_KERNEL(reshape, ops::ReshapeKernel<CPU, float>,
ops::ReshapeKernel<paddle::platform::CPUPlace, float>); ops::ReshapeKernel<CPU, double>,
REGISTER_OP_CPU_KERNEL( ops::ReshapeKernel<CPU, int>,
reshape_grad, ops::ReshapeGradKernel<paddle::platform::CPUPlace, float>); ops::ReshapeKernel<CPU, int64_t>);
REGISTER_OP_CPU_KERNEL(reshape_grad, ops::ReshapeGradKernel<CPU, float>,
ops::ReshapeGradKernel<CPU, double>,
ops::ReshapeGradKernel<CPU, int>,
ops::ReshapeGradKernel<CPU, int64_t>);
...@@ -13,10 +13,14 @@ See the License for the specific language governing permissions and ...@@ -13,10 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/reshape_op.h" #include "paddle/fluid/operators/reshape_op.h"
using CUDA = paddle::platform::CUDADeviceContext;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(reshape, paddle::operators::ReshapeKernel<CUDA, float>,
reshape, paddle::operators::ReshapeKernel<CUDA, double>,
paddle::operators::ReshapeKernel<paddle::platform::CUDAPlace, float>); paddle::operators::ReshapeKernel<CUDA, int>,
REGISTER_OP_CUDA_KERNEL( paddle::operators::ReshapeKernel<CUDA, int64_t>);
reshape_grad, REGISTER_OP_CUDA_KERNEL(reshape_grad,
paddle::operators::ReshapeGradKernel<paddle::platform::CUDAPlace, float>); paddle::operators::ReshapeGradKernel<CUDA, float>,
paddle::operators::ReshapeGradKernel<CUDA, double>,
paddle::operators::ReshapeGradKernel<CUDA, int>,
paddle::operators::ReshapeGradKernel<CUDA, int64_t>);
...@@ -121,9 +121,27 @@ This operator will send tensor to recv_op at the parameter server. ...@@ -121,9 +121,27 @@ This operator will send tensor to recv_op at the parameter server.
} }
}; };
class SendOpVarTypeInference : public framework::VarTypeInference {
public:
void operator()(const framework::OpDesc& op_desc,
framework::BlockDesc* block) const override {
auto out_var_name = op_desc.Output("RPCClient").front();
auto& out_var = block->FindRecursiveOrCreateVar(out_var_name);
auto var_type = framework::proto::VarType::RAW;
out_var.SetType(var_type);
}
};
class SendOpShapeInference : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext* ctx) const override {}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(send, ops::SendOp, ops::SendOpMaker); REGISTER_OPERATOR(send, ops::SendOp, paddle::framework::EmptyGradOpMaker,
ops::SendOpMaker, ops::SendOpVarTypeInference,
ops::SendOpShapeInference);
...@@ -95,7 +95,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs, ...@@ -95,7 +95,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
for (auto kv : outputs) { for (auto kv : outputs) {
for (auto v : kv.second) { for (auto v : kv.second) {
auto var = block->Var(v); auto var = block->Var(v);
var->SetDataType(f::proto::DataType::FP32); var->SetDataType(f::proto::VarType::FP32);
} }
} }
...@@ -122,33 +122,37 @@ void StartServerNet(bool is_sparse) { ...@@ -122,33 +122,37 @@ void StartServerNet(bool is_sparse) {
// sub program run in listen_and_serv_op, for simple test we use sum // sub program run in listen_and_serv_op, for simple test we use sum
f::ProgramDesc program; f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0); f::BlockDesc *optimize_block = program.MutableBlock(0);
// X for server side tensors, RX for received tensers, must be of same shape. // X for server side tensors, RX for received tensers, must be of same shape.
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, block); AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block);
f::AttributeMap attrs; f::AttributeMap attrs;
attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
attrs.insert({"Fanin", 1});
attrs.insert({"ParamList", std::vector<std::string>({"Out"})}); attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
attrs.insert({"GradList", std::vector<std::string>({"x1"})}); attrs.insert({"GradList", std::vector<std::string>({"x1"})});
attrs.insert({"OptimizeBlock", block}); attrs.insert({"OptimizeBlock", optimize_block});
listen_and_serv_op = listen_and_serv_op =
f::OpRegistry::CreateOp("listen_and_serv", {}, {}, attrs); f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs);
listen_and_serv_op->Run(scope, place); listen_and_serv_op->Run(scope, place);
} }
TEST(SendRecvOp, CPUDense) { TEST(SendRecvOp, CPUDense) {
std::thread server_thread(StartServerNet, false); std::thread server_thread(StartServerNet, false);
sleep(10); // wait server to start sleep(5); // wait server to start
// local net // local net
f::Scope scope; f::Scope scope;
p::CPUPlace place; p::CPUPlace place;
InitTensorsInScope(scope, place); InitTensorsInScope(scope, place);
// create rpc client var
scope.Var("RPC_CLIENT_VAR");
f::AttributeMap attrs; f::AttributeMap attrs;
attrs.insert({"endpoints", std::vector<std::string>({"127.0.0.1:6174"})}); attrs.insert({"endpoints", std::vector<std::string>({"127.0.0.1:6174"})});
attrs.insert({"epmap", std::vector<std::string>({"127.0.0.1:6174"})}); attrs.insert({"epmap", std::vector<std::string>({"127.0.0.1:6174"})});
auto send_op = f::OpRegistry::CreateOp("send", {{"X", {"x1"}}}, auto send_op = f::OpRegistry::CreateOp(
{{"Out", {"Out"}}}, attrs); "send", {{"X", {"x1"}}},
{{"Out", {"Out"}}, {"RPCClient", {"RPC_CLIENT_VAR"}}}, attrs);
send_op->Run(scope, place); send_op->Run(scope, place);
auto in_var = scope.Var("x1"); auto in_var = scope.Var("x1");
...@@ -175,11 +179,13 @@ TEST(SendRecvOp, CPUSparse) { ...@@ -175,11 +179,13 @@ TEST(SendRecvOp, CPUSparse) {
p::CPUPlace place; p::CPUPlace place;
p::CPUDeviceContext ctx(place); p::CPUDeviceContext ctx(place);
InitSelectedRowsInScope(scope, place); InitSelectedRowsInScope(scope, place);
scope.Var("RPC_CLIENT_VAR");
f::AttributeMap attrs; f::AttributeMap attrs;
attrs.insert({"endpoints", std::vector<std::string>({"127.0.0.1:6174"})}); attrs.insert({"endpoints", std::vector<std::string>({"127.0.0.1:6174"})});
attrs.insert({"epmap", std::vector<std::string>({"127.0.0.1:6174"})}); attrs.insert({"epmap", std::vector<std::string>({"127.0.0.1:6174"})});
auto send_op = f::OpRegistry::CreateOp("send", {{"X", {"x1"}}}, auto send_op = f::OpRegistry::CreateOp(
{{"Out", {"Out"}}}, attrs); "send", {{"X", {"x1"}}},
{{"Out", {"Out"}}, {"RPCClient", {"RPC_CLIENT_VAR"}}}, attrs);
send_op->Run(scope, place); send_op->Run(scope, place);
auto x0 = scope.Var("x0")->GetMutable<f::SelectedRows>(); auto x0 = scope.Var("x0")->GetMutable<f::SelectedRows>();
......
proto_library(profiler_proto SRCS profiler.proto)
if(WITH_GPU) if(WITH_GPU)
cc_library(enforce SRCS enforce.cc DEPS) cc_library(enforce SRCS enforce.cc DEPS)
else() else()
...@@ -37,7 +39,8 @@ nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) ...@@ -37,7 +39,8 @@ nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context)
nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context) nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context)
cc_library(profiler SRCS profiler.cc DEPS device_context) cc_library(device_tracer SRCS device_tracer.cc DEPS profiler_proto ${GPU_CTX_DEPS})
cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer)
cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler)
nv_test(float16_gpu_test SRCS float16_test.cu) nv_test(float16_gpu_test SRCS float16_test.cu)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device_tracer.h"
#include <map>
#include <mutex>
#include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/string/printf.h"
namespace paddle {
namespace platform {
namespace {
thread_local const char *cur_annotation = nullptr;
std::once_flag tracer_once_flag;
DeviceTracer *tracer = nullptr;
} // namespace
#ifdef PADDLE_WITH_CUPTI
namespace {
// TODO(panyx0718): Revisit the buffer size here.
uint64_t kBufSize = 32 * 1024;
uint64_t kAlignSize = 8;
#define ALIGN_BUFFER(buffer, align) \
(((uintptr_t)(buffer) & ((align)-1)) \
? ((buffer) + (align) - ((uintptr_t)(buffer) & ((align)-1))) \
: (buffer))
#define CUPTI_CALL(call) \
do { \
CUptiResult _status = call; \
if (_status != CUPTI_SUCCESS) { \
const char *errstr; \
dynload::cuptiGetResultString(_status, &errstr); \
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \
__FILE__, __LINE__, #call, errstr); \
exit(-1); \
} \
} while (0)
void EnableActivity() {
// Device activity record is created when CUDA initializes, so we
// want to enable it before cuInit() or any CUDA runtime call.
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY));
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL));
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE));
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET));
CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD));
// We don't track these activities for now.
// CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONTEXT));
// CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER));
// CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME));
// CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NAME));
// CUPTI_CALL(dynload::cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MARKER));
}
void DisableActivity() {
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_KERNEL));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DEVICE));
// Disable all other activity record kinds.
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONTEXT));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DRIVER));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_RUNTIME));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMSET));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_NAME));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MARKER));
CUPTI_CALL(dynload::cuptiActivityDisable(CUPTI_ACTIVITY_KIND_OVERHEAD));
}
void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size,
size_t *maxNumRecords) {
uint8_t *buf = (uint8_t *)malloc(kBufSize + kAlignSize);
*size = kBufSize;
*buffer = ALIGN_BUFFER(buf, kAlignSize);
*maxNumRecords = 0;
}
void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
size_t size, size_t validSize) {
CUptiResult status;
CUpti_Activity *record = NULL;
if (validSize > 0) {
do {
status = dynload::cuptiActivityGetNextRecord(buffer, validSize, &record);
if (status == CUPTI_SUCCESS) {
switch (record->kind) {
case CUPTI_ACTIVITY_KIND_KERNEL:
case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: {
auto *kernel =
reinterpret_cast<const CUpti_ActivityKernel3 *>(record);
tracer->AddKernelRecords(kernel->start, kernel->end,
kernel->deviceId, kernel->streamId,
kernel->correlationId);
break;
}
default: { break; }
}
} else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
// Seems not an error in this case.
break;
} else {
CUPTI_CALL(status);
}
} while (1);
size_t dropped;
CUPTI_CALL(
dynload::cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
if (dropped != 0) {
fprintf(stderr, "Dropped %u activity records\n", (unsigned int)dropped);
}
}
free(buffer);
}
} // namespace
class DeviceTracerImpl : public DeviceTracer {
public:
DeviceTracerImpl() : enabled_(false) {}
void AddAnnotation(uint64_t id, const std::string &anno) {
std::lock_guard<std::mutex> l(trace_mu_);
correlations_[id] = anno;
}
void AddKernelRecords(uint64_t start, uint64_t end, uint32_t device_id,
uint32_t stream_id, uint32_t correlation_id) {
std::lock_guard<std::mutex> l(trace_mu_);
kernel_records_.push_back(
KernelRecord{start, end, device_id, stream_id, correlation_id});
}
bool IsEnabled() {
std::lock_guard<std::mutex> l(trace_mu_);
return enabled_;
}
void Enable() {
std::lock_guard<std::mutex> l(trace_mu_);
if (enabled_) {
fprintf(stderr, "DeviceTracer already enabled\n");
return;
}
EnableActivity();
// Register callbacks for buffer requests and completed by CUPTI.
CUPTI_CALL(dynload::cuptiActivityRegisterCallbacks(bufferRequested,
bufferCompleted));
CUptiResult ret;
ret = dynload::cuptiSubscribe(
&subscriber_, static_cast<CUpti_CallbackFunc>(ApiCallback), this);
if (ret == CUPTI_ERROR_MAX_LIMIT_REACHED) {
fprintf(stderr, "CUPTI subcriber limit reached.\n");
} else if (ret != CUPTI_SUCCESS) {
fprintf(stderr, "Failed to create CUPTI subscriber.\n");
}
CUPTI_CALL(
dynload::cuptiEnableCallback(1, subscriber_, CUPTI_CB_DOMAIN_DRIVER_API,
CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel));
CUPTI_CALL(dynload::cuptiGetTimestamp(&start_ns_));
enabled_ = true;
}
proto::Profile GenProfile() {
std::lock_guard<std::mutex> l(trace_mu_);
proto::Profile profile_pb;
profile_pb.set_start_ns(start_ns_);
profile_pb.set_end_ns(end_ns_);
std::map<std::string, std::vector<uint64_t>> event_times;
for (const KernelRecord &r : kernel_records_) {
if (correlations_.find(r.correlation_id) == correlations_.end()) {
fprintf(stderr, "cannot relate a kernel activity\n");
continue;
}
auto *event = profile_pb.add_events();
event->set_name(correlations_.at(r.correlation_id));
event->set_start_ns(r.start_ns);
event->set_end_ns(r.end_ns);
event->set_stream_id(r.stream_id);
event->set_device_id(r.device_id);
event_times[event->name()].push_back(r.end_ns - r.start_ns);
}
for (const auto &et : event_times) {
fprintf(
stderr, "%s: total: %fms invoked cuda kernels: %lu\n",
et.first.c_str(),
std::accumulate(et.second.begin(), et.second.end(), 0) / 1000000.0,
et.second.size());
}
return profile_pb;
}
void Disable() {
// flush might cause additional calls to DeviceTracker.
dynload::cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED);
std::lock_guard<std::mutex> l(trace_mu_);
DisableActivity();
dynload::cuptiUnsubscribe(subscriber_);
CUPTI_CALL(dynload::cuptiGetTimestamp(&end_ns_));
PADDLE_ENFORCE(dynload::cuptiFinalize());
enabled_ = false;
}
private:
static void CUPTIAPI ApiCallback(void *userdata, CUpti_CallbackDomain domain,
CUpti_CallbackId cbid, const void *cbdata) {
auto *cbInfo = reinterpret_cast<const CUpti_CallbackData *>(cbdata);
DeviceTracer *tracer = reinterpret_cast<DeviceTracer *>(userdata);
if ((domain == CUPTI_CB_DOMAIN_DRIVER_API) &&
(cbid == CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel)) {
if (cbInfo->callbackSite == CUPTI_API_ENTER) {
const std::string anno =
cur_annotation ? cur_annotation : cbInfo->symbolName;
tracer->AddAnnotation(cbInfo->correlationId, anno);
}
} else {
VLOG(1) << "Unhandled API Callback for " << domain << " " << cbid;
}
}
std::mutex trace_mu_;
bool enabled_;
uint64_t start_ns_;
uint64_t end_ns_;
std::vector<KernelRecord> kernel_records_;
std::unordered_map<uint32_t, std::string> correlations_;
CUpti_SubscriberHandle subscriber_;
};
#endif // PADDLE_WITH_CUPTI
class DeviceTracerDummy : public DeviceTracer {
public:
DeviceTracerDummy() {}
void AddAnnotation(uint64_t id, const std::string &anno) {}
void AddKernelRecords(uint64_t start, uint64_t end, uint32_t device_id,
uint32_t stream_id, uint32_t correlation_id) {}
bool IsEnabled() { return false; }
void Enable() {}
proto::Profile GenProfile() { return proto::Profile(); }
void Disable() {}
};
void CreateTracer(DeviceTracer **t) {
#ifdef PADDLE_WITH_CUPTI
*t = new DeviceTracerImpl();
#else
*t = new DeviceTracerDummy();
#endif // PADDLE_WITH_CUPTI
}
DeviceTracer *GetDeviceTracer() {
std::call_once(tracer_once_flag, CreateTracer, &tracer);
return tracer;
}
void SetCurAnnotation(const char *anno) { cur_annotation = anno; }
void ClearCurAnnotation() { cur_annotation = nullptr; }
} // namespace platform
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/profiler.pb.h"
namespace paddle {
namespace platform {
///////////////////////
// WARN: Under Development. Don't depend on it yet.
//////////////////////
// DeviceTracer performs the following tasks:
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
// 2. Collect cuda statistics: start/end ts, memory, etc.
// 3. Generate a protobuf for further analysis.
class DeviceTracer {
public:
struct KernelRecord {
uint64_t start_ns;
uint64_t end_ns;
uint32_t device_id;
uint32_t stream_id;
uint32_t correlation_id;
};
virtual ~DeviceTracer() {}
// Needs to be called once before use.
virtual void Enable() = 0;
// Needs to be called once after use.
virtual void Disable() = 0;
// Add a pair to correlate internal cuda id with high level
// annotation (string). So cuda statistics can be represented by
// human-readable annotations.
virtual void AddAnnotation(uint64_t id, const std::string& anno) = 0;
// Add a cuda kernel stats. `correlation_id` will be mapped to annotation
// added before for human readability.
virtual void AddKernelRecords(uint64_t start, uint64_t end,
uint32_t device_id, uint32_t stream_id,
uint32_t correlation_id) = 0;
// Generate a proto after done (Disabled).
virtual proto::Profile GenProfile() = 0;
virtual bool IsEnabled() = 0;
};
// Get a DeviceTracer.
DeviceTracer* GetDeviceTracer();
// Set a name for the cuda kernel operation being launched by the thread.
void SetCurAnnotation(const char* anno);
// Clear the name after the operation is done.
void ClearCurAnnotation();
} // namespace platform
} // namespace paddle
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc
DEPS dynamic_loader) list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc)
if (CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc)
endif(CUPTI_FOUND)
nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_CUPTI
#include "paddle/fluid/platform/dynload/cupti.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cupti_dso_flag;
void *cupti_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUPTI_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace platform
} // namespace paddle
#endif // PADDLE_WITH_CUPTI
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUPTI
#include <cuda.h>
#include <cupti.h>
#include <dlfcn.h>
#include <mutex>
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
namespace paddle {
namespace platform {
namespace dynload {
extern std::once_flag cupti_dso_flag;
extern void *cupti_dso_handle;
/**
* The following macro definition can generate structs
* (for each function) to dynamic load cupti routine
* via operator overloading.
*
* note: default dynamic linked libs
*/
#ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline CUptiResult CUPTIAPI operator()(Args... args) { \
typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...); \
std::call_once(cupti_dso_flag, \
paddle::platform::dynload::GetCUPTIDsoHandle, \
&cupti_dso_handle); \
void *p_##__name = dlsym(cupti_dso_handle, #__name); \
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#else
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
inline CUptiResult CUPTIAPI operator()(Args... args) { \
return __name(args...); \
} \
}; \
extern DynLoad__##__name __name
#endif
#define CUPTI_ROUTINE_EACH(__macro) \
__macro(cuptiActivityEnable); \
__macro(cuptiActivityDisable); \
__macro(cuptiActivityRegisterCallbacks); \
__macro(cuptiActivityGetAttribute); \
__macro(cuptiActivitySetAttribute); \
__macro(cuptiGetTimestamp); \
__macro(cuptiActivityGetNextRecord); \
__macro(cuptiGetResultString); \
__macro(cuptiActivityGetNumDroppedRecords); \
__macro(cuptiActivityFlushAll); \
__macro(cuptiFinalize); \
__macro(cuptiSubscribe); \
__macro(cuptiUnsubscribe); \
__macro(cuptiEnableCallback);
CUPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUPTI_WRAP);
#undef DECLARE_DYNAMIC_LOAD_CUPTI_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
#endif // PADDLE_WITH_CUPTI
...@@ -40,10 +40,14 @@ DEFINE_string(nccl_dir, "", ...@@ -40,10 +40,14 @@ DEFINE_string(nccl_dir, "",
"libcurand. For instance, /usr/local/cuda/lib64. If default, " "libcurand. For instance, /usr/local/cuda/lib64. If default, "
"dlopen will search cuda from LD_LIBRARY_PATH"); "dlopen will search cuda from LD_LIBRARY_PATH");
DEFINE_string(cupti_dir, "", "Specify path for loading cupti.so.");
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
static const char* cupti_lib_path = CUPTI_LIB_PATH;
static inline std::string join(const std::string& part1, static inline std::string join(const std::string& part1,
const std::string& part2) { const std::string& part2) {
// directory separator // directory separator
...@@ -143,6 +147,18 @@ void GetCUDNNDsoHandle(void** dso_handle) { ...@@ -143,6 +147,18 @@ void GetCUDNNDsoHandle(void** dso_handle) {
#endif #endif
} }
void GetCUPTIDsoHandle(void** dso_handle) {
std::string cupti_path = cupti_lib_path;
if (!FLAGS_cupti_dir.empty()) {
cupti_path = FLAGS_cupti_dir;
}
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(cupti_path, "libcupti.dylib", dso_handle, false);
#else
GetDsoHandleFromSearchPath(cupti_path, "libcupti.so", dso_handle, false);
#endif
}
void GetCurandDsoHandle(void** dso_handle) { void GetCurandDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle); GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle);
......
...@@ -34,6 +34,8 @@ void GetCublasDsoHandle(void** dso_handle); ...@@ -34,6 +34,8 @@ void GetCublasDsoHandle(void** dso_handle);
*/ */
void GetCUDNNDsoHandle(void** dso_handle); void GetCUDNNDsoHandle(void** dso_handle);
void GetCUPTIDsoHandle(void** dso_handle);
/** /**
* @brief load the DSO of CURAND * @brief load the DSO of CURAND
* *
......
...@@ -129,9 +129,6 @@ TEST(NCCL, all_reduce) { ...@@ -129,9 +129,6 @@ TEST(NCCL, all_reduce) {
} // namespace paddle } // namespace paddle
int main(int argc, char** argv) { int main(int argc, char** argv) {
// FIXME(tonyyang-svail):
// Due to the driver issue on our CI, disable for now
return 0;
dev_count = paddle::platform::GetCUDADeviceCount(); dev_count = paddle::platform::GetCUDADeviceCount();
if (dev_count <= 1) { if (dev_count <= 1) {
LOG(WARNING) LOG(WARNING)
......
...@@ -15,7 +15,13 @@ limitations under the License. */ ...@@ -15,7 +15,13 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include <iomanip> #include <iomanip>
#include <map> #include <map>
#ifdef PADDLE_WITH_CUDA
#include <cuda.h>
#endif // PADDLE_WITH_CUDA
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/string/printf.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -132,10 +138,13 @@ RecordEvent::RecordEvent(const std::string& name, ...@@ -132,10 +138,13 @@ RecordEvent::RecordEvent(const std::string& name,
dev_ctx_ = dev_ctx; dev_ctx_ = dev_ctx;
name_ = name; name_ = name;
PushEvent(name_, dev_ctx_); PushEvent(name_, dev_ctx_);
// Maybe need the same push/pop behavior.
SetCurAnnotation(name_.c_str());
} }
RecordEvent::~RecordEvent() { RecordEvent::~RecordEvent() {
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
ClearCurAnnotation();
PopEvent(name_, dev_ctx_); PopEvent(name_, dev_ctx_);
} }
...@@ -147,7 +156,14 @@ void EnableProfiler(ProfilerState state) { ...@@ -147,7 +156,14 @@ void EnableProfiler(ProfilerState state) {
"The profiling state should be disabled when calling ", "The profiling state should be disabled when calling ",
"EnableProfiler."); "EnableProfiler.");
g_state = state; g_state = state;
g_profiler_place = (g_state == ProfilerState::kCUDA) ? "CUDA" : "CPU"; if (g_state == ProfilerState::kCUDA) {
g_profiler_place = "CUDA";
} else if (g_state == ProfilerState::kCPU) {
g_profiler_place = "CPU";
} else {
g_profiler_place = "All";
GetDeviceTracer()->Enable();
}
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (g_state == ProfilerState::kCUDA) { if (g_state == ProfilerState::kCUDA) {
// Generate some dummy evenets first to reduce the startup overhead. // Generate some dummy evenets first to reduce the startup overhead.
...@@ -190,6 +206,12 @@ void DisableProfiler(EventSortingKey sorted_key) { ...@@ -190,6 +206,12 @@ void DisableProfiler(EventSortingKey sorted_key) {
Mark("_stop_profiler_", nullptr); Mark("_stop_profiler_", nullptr);
g_state = ProfilerState::kDisabled; g_state = ProfilerState::kDisabled;
DeviceTracer* tracer = GetDeviceTracer();
if (g_profiler_place == "All" && tracer && tracer->IsEnabled()) {
tracer->Disable();
tracer->GenProfile();
}
std::vector<std::vector<Event>> all_events = GetAllEvents(); std::vector<std::vector<Event>> all_events = GetAllEvents();
ParseEvents(all_events, sorted_key); ParseEvents(all_events, sorted_key);
ResetProfiler(); ResetProfiler();
...@@ -254,9 +276,11 @@ void ParseEvents(std::vector<std::vector<Event>>& events, ...@@ -254,9 +276,11 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
} }
if (rit != pushed_events.rend()) { if (rit != pushed_events.rend()) {
double event_time = (g_profiler_place == "CUDA") double event_time =
? rit->CudaElapsedMs(events[i][j]) (g_profiler_place == "CUDA" || g_profiler_place == "All")
: rit->CpuElapsedMs(events[i][j]); ? rit->CudaElapsedMs(events[i][j])
: rit->CpuElapsedMs(events[i][j]);
std::string event_name = std::string event_name =
"thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); "thread" + std::to_string(rit->thread_id()) + "::" + rit->name();
max_name_width = std::max(max_name_width, event_name.size()); max_name_width = std::max(max_name_width, event_name.size());
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <mutex> #include <mutex>
#include <vector> #include <vector>
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/profiler.pb.h"
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -93,6 +94,7 @@ enum ProfilerState { ...@@ -93,6 +94,7 @@ enum ProfilerState {
kDisabled, // disabled state kDisabled, // disabled state
kCPU, // CPU profiling state kCPU, // CPU profiling state
kCUDA, // GPU profiling state kCUDA, // GPU profiling state
kAll, // Profile both CPU and GPU. (Currently experimental).
}; };
void Mark(const std::string& name, const DeviceContext* dev_ctx); void Mark(const std::string& name, const DeviceContext* dev_ctx);
...@@ -102,7 +104,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx); ...@@ -102,7 +104,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
void PopEvent(const std::string& name, const DeviceContext* dev_ctx); void PopEvent(const std::string& name, const DeviceContext* dev_ctx);
struct RecordEvent { struct RecordEvent {
explicit RecordEvent(const std::string& name, const DeviceContext* dev_ctx); RecordEvent(const std::string& name, const DeviceContext* dev_ctx);
~RecordEvent(); ~RecordEvent();
...@@ -110,9 +112,12 @@ struct RecordEvent { ...@@ -110,9 +112,12 @@ struct RecordEvent {
const DeviceContext* dev_ctx_; const DeviceContext* dev_ctx_;
// Event name // Event name
std::string name_; std::string name_;
// Need to distinguish name by op type, block_id, program_id and perhaps
// different kernel invocations within an op.
std::string full_name_;
}; };
// Return the event list of all threads. Asummed the returned value calls // Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread. // event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> GetAllEvents(); std::vector<std::vector<Event>> GetAllEvents();
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
syntax = "proto2";
package paddle.platform.proto;
message Event {
optional string name = 1;
optional uint64 start_ns = 2;
optional uint64 end_ns = 3;
optional uint32 device_id = 5;
optional uint32 stream_id = 6;
}
message Profile {
repeated Event events = 1;
optional uint64 start_ns = 2;
optional uint64 end_ns = 3;
}
\ No newline at end of file
...@@ -252,7 +252,7 @@ void BindVarDsec(py::module &m) { ...@@ -252,7 +252,7 @@ void BindVarDsec(py::module &m) {
.value("CHANNEL", proto::VarType::CHANNEL) .value("CHANNEL", proto::VarType::CHANNEL)
.value("PLACE_LIST", proto::VarType::PLACE_LIST) .value("PLACE_LIST", proto::VarType::PLACE_LIST)
.value("READER", proto::VarType::READER) .value("READER", proto::VarType::READER)
.value("NCCL_COM", proto::VarType::NCCL_COM); .value("RAW", proto::VarType::RAW);
} }
void BindOpDesc(py::module &m) { void BindOpDesc(py::module &m) {
......
...@@ -49,11 +49,6 @@ PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray); ...@@ -49,11 +49,6 @@ PYBIND11_MAKE_OPAQUE(paddle::framework::LoDTensorArray);
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
static size_t UniqueIntegerGenerator(const std::string &prefix) {
static std::unordered_map<std::string, std::atomic<size_t>> generators;
return generators[prefix].fetch_add(1);
}
bool IsCompiledWithCUDA() { bool IsCompiledWithCUDA() {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
return false; return false;
...@@ -410,7 +405,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -410,7 +405,6 @@ All parameter, weight, gradient are variables in Paddle.
(void (Executor::*)(const ProgramDesc &, Scope *, int, bool, bool)) & (void (Executor::*)(const ProgramDesc &, Scope *, int, bool, bool)) &
Executor::Run); Executor::Run);
m.def("unique_integer", UniqueIntegerGenerator);
m.def("init_gflags", framework::InitGflags); m.def("init_gflags", framework::InitGflags);
m.def("init_glog", framework::InitGLOG); m.def("init_glog", framework::InitGLOG);
m.def("init_devices", &framework::InitDevices); m.def("init_devices", &framework::InitDevices);
...@@ -465,6 +459,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -465,6 +459,7 @@ All parameter, weight, gradient are variables in Paddle.
.value("kDisabled", platform::ProfilerState::kDisabled) .value("kDisabled", platform::ProfilerState::kDisabled)
.value("kCPU", platform::ProfilerState::kCPU) .value("kCPU", platform::ProfilerState::kCPU)
.value("kCUDA", platform::ProfilerState::kCUDA) .value("kCUDA", platform::ProfilerState::kCUDA)
.value("kAll", platform::ProfilerState::kAll)
.export_values(); .export_values();
py::enum_<platform::EventSortingKey>(m, "EventSortingKey", py::arithmetic()) py::enum_<platform::EventSortingKey>(m, "EventSortingKey", py::arithmetic())
......
...@@ -58,7 +58,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" ...@@ -58,7 +58,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
| `WITH_AVX` | OFF | Set to "ON" to enable AVX support. | | `WITH_AVX` | OFF | Set to "ON" to enable AVX support. |
| `WITH_TESTING` | OFF | Build unit tests binaries. | | `WITH_TESTING` | OFF | Build unit tests binaries. |
| `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. | | `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. |
| `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. | | `WITH_GOLANG` | OFF | Build fault-tolerant parameter server written in go. |
| `WITH_SWIG_PY` | ON | Build with SWIG python API support. | | `WITH_SWIG_PY` | ON | Build with SWIG python API support. |
| `WITH_C_API` | OFF | Build capi libraries for inference. | | `WITH_C_API` | OFF | Build capi libraries for inference. |
| `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. | | `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. |
......
...@@ -40,7 +40,7 @@ function cmake_gen() { ...@@ -40,7 +40,7 @@ function cmake_gen() {
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF}
-DWITH_MKL=${WITH_MKL:-ON} -DWITH_MKL=${WITH_MKL:-ON}
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-ON} -DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
-DWITH_SWIG_PY=ON -DWITH_SWIG_PY=ON
-DWITH_C_API=${WITH_C_API:-OFF} -DWITH_C_API=${WITH_C_API:-OFF}
...@@ -49,6 +49,7 @@ function cmake_gen() { ...@@ -49,6 +49,7 @@ function cmake_gen() {
-DCUDNN_ROOT=/usr/ -DCUDNN_ROOT=/usr/
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON}
-DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_TESTING=${WITH_TESTING:-ON}
-DWITH_FAST_BUNDLE_TEST=ON
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
======================================== ========================================
EOF EOF
...@@ -64,7 +65,7 @@ EOF ...@@ -64,7 +65,7 @@ EOF
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \ -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \
-DWITH_MKL=${WITH_MKL:-ON} \ -DWITH_MKL=${WITH_MKL:-ON} \
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-ON} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \ -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
-DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_C_API=${WITH_C_API:-OFF} \
...@@ -72,6 +73,7 @@ EOF ...@@ -72,6 +73,7 @@ EOF
-DCUDNN_ROOT=/usr/ \ -DCUDNN_ROOT=/usr/ \
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \
-DWITH_TESTING=${WITH_TESTING:-ON} \ -DWITH_TESTING=${WITH_TESTING:-ON} \
-DWITH_FAST_BUNDLE_TEST=ON \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
} }
...@@ -171,7 +173,7 @@ EOF ...@@ -171,7 +173,7 @@ EOF
EOF EOF
if [[ ${WITH_GPU} == "ON" ]]; then if [[ ${WITH_GPU} == "ON" ]]; then
NCCL_DEPS="apt-get install -y libnccl-dev &&" NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&"
else else
NCCL_DEPS="" NCCL_DEPS=""
fi fi
......
...@@ -28,10 +28,9 @@ int main(int argc, char** argv) { ...@@ -28,10 +28,9 @@ int main(int argc, char** argv) {
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
new_argv.push_back( new_argv.push_back(
strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory," strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
"warpctc_dir"));
#else #else
new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,warpctc_dir")); new_argv.push_back(strdup("--tryfromenv=use_pinned_memory"));
#endif #endif
int new_argc = static_cast<int>(new_argv.size()); int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data(); char** new_argv_address = new_argv.data();
......
...@@ -3,12 +3,14 @@ file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) ...@@ -3,12 +3,14 @@ file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py)
file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py)
file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py)
file(GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py) file(GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py)
file(GLOB_RECURSE FLUID_PY_FILES ./paddle/fluid/ *.py)
set(PY_FILES paddle/__init__.py set(PY_FILES paddle/__init__.py
${TRAINER_PY_FILES} ${TRAINER_PY_FILES}
${HELPERS_PY_FILES} ${HELPERS_PY_FILES}
${UTILS_PY_FILES} ${UTILS_PY_FILES}
${V2_PY_FILES}) ${V2_PY_FILES}
${FLUID_PY_FILES})
add_custom_target(copy_paddle_master) add_custom_target(copy_paddle_master)
...@@ -43,10 +45,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -43,10 +45,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/fluid/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/fluid/core.so
DEPENDS paddle_pybind) DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so) add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/fluid/core.so)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
...@@ -72,7 +74,7 @@ if (WITH_TESTING) ...@@ -72,7 +74,7 @@ if (WITH_TESTING)
add_subdirectory(paddle/v2/tests) add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests) add_subdirectory(paddle/v2/plot/tests)
add_subdirectory(paddle/v2/fluid/tests) add_subdirectory(paddle/fluid/tests)
endif() endif()
endif() endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
......
...@@ -39,6 +39,7 @@ from concurrency import (Go, make_channel, channel_send, channel_recv, ...@@ -39,6 +39,7 @@ from concurrency import (Go, make_channel, channel_send, channel_recv,
import clip import clip
from memory_optimization_transpiler import memory_optimize from memory_optimization_transpiler import memory_optimize
import profiler import profiler
import unique_name
Tensor = LoDTensor Tensor = LoDTensor
...@@ -63,6 +64,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [ ...@@ -63,6 +64,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [
'DistributeTranspiler', 'DistributeTranspiler',
'memory_optimize', 'memory_optimize',
'profiler', 'profiler',
'unique_name',
] ]
......
...@@ -12,10 +12,11 @@ ...@@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.v2.fluid import framework as framework from paddle.fluid import framework as framework
from . import core from . import core
import collections import collections
import copy import copy
import unique_name
__all__ = [ __all__ = [
'append_backward', 'append_backward',
...@@ -391,7 +392,7 @@ def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map): ...@@ -391,7 +392,7 @@ def _rename_grad_(block, start_op_idx, grad_to_var, target_grad_map):
for name in op_desc.output_arg_names(): for name in op_desc.output_arg_names():
if block.desc.find_var(name.encode("ascii")): if block.desc.find_var(name.encode("ascii")):
new_name = "%s_%s" % (name, core.unique_integer(name)) new_name = unique_name.generate(name)
op_desc.rename_output(name, new_name) op_desc.rename_output(name, new_name)
var_map[name] = new_name var_map[name] = new_name
......
...@@ -26,7 +26,7 @@ A `scoped_function` will take a `function` as input. That function will be ...@@ -26,7 +26,7 @@ A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope. invoked in a new local scope.
""" """
import paddle.v2.fluid.core import paddle.fluid.core
import threading import threading
__tl_scope__ = threading.local() __tl_scope__ = threading.local()
...@@ -44,13 +44,13 @@ __all__ = [ ...@@ -44,13 +44,13 @@ __all__ = [
def get_cur_scope(): def get_cur_scope():
""" """
Get current scope. Get current scope.
:rtype: paddle.v2.fluid.core.Scope :rtype: paddle.fluid.core.Scope
""" """
cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None) cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None)
if cur_scope_stack is None: if cur_scope_stack is None:
__tl_scope__.cur_scope = list() __tl_scope__.cur_scope = list()
if len(__tl_scope__.cur_scope) == 0: if len(__tl_scope__.cur_scope) == 0:
__tl_scope__.cur_scope.append(paddle.v2.fluid.core.Scope()) __tl_scope__.cur_scope.append(paddle.fluid.core.Scope())
return __tl_scope__.cur_scope[-1] return __tl_scope__.cur_scope[-1]
......
...@@ -226,8 +226,7 @@ class DistributeTranspiler: ...@@ -226,8 +226,7 @@ class DistributeTranspiler:
rpc_client_var = program.global_block().create_var( rpc_client_var = program.global_block().create_var(
name="RPC_CLIENT_VAR", name="RPC_CLIENT_VAR",
persistable=True, persistable=True,
dtype='float32', # dtype and shape is not used in fact type=core.VarDesc.VarType.RAW)
shape=[0])
# create send_op # create send_op
program.global_block().append_op( program.global_block().append_op(
......
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
import numpy as np import numpy as np
import layers import layers
from framework import Program, unique_name, Variable, program_guard from framework import Program, Variable, program_guard
import unique_name
from layer_helper import LayerHelper from layer_helper import LayerHelper
__all__ = [ __all__ = [
...@@ -96,7 +97,7 @@ class Evaluator(object): ...@@ -96,7 +97,7 @@ class Evaluator(object):
""" """
state = self.helper.create_variable( state = self.helper.create_variable(
name="_".join([unique_name(self.helper.name), suffix]), name="_".join([unique_name.generate(self.helper.name), suffix]),
persistable=True, persistable=True,
dtype=dtype, dtype=dtype,
shape=shape) shape=shape)
......
...@@ -20,6 +20,7 @@ import numpy as np ...@@ -20,6 +20,7 @@ import numpy as np
import proto.framework_pb2 as framework_pb2 import proto.framework_pb2 as framework_pb2
from . import core from . import core
import unique_name
__all__ = [ __all__ = [
'Block', 'Block',
...@@ -47,20 +48,6 @@ def grad_var_name(var_name): ...@@ -47,20 +48,6 @@ def grad_var_name(var_name):
return var_name + GRAD_VAR_SUFFIX return var_name + GRAD_VAR_SUFFIX
def unique_name(prefix):
"""
Generate unique names with prefix
Args:
prefix(str): The prefix of return string
Returns(str): A unique string with the prefix
"""
uid = core.unique_integer(prefix) # unique during whole process.
return "_".join([prefix, str(uid)])
def convert_np_dtype_to_dtype_(np_dtype): def convert_np_dtype_to_dtype_(np_dtype):
""" """
Convert the data type in numpy to the data type in Paddle Convert the data type in numpy to the data type in Paddle
...@@ -175,7 +162,7 @@ class Variable(object): ...@@ -175,7 +162,7 @@ class Variable(object):
self.error_clip = error_clip self.error_clip = error_clip
if name is None: if name is None:
name = Variable._unique_var_name_() name = unique_name.generate('_generated_var')
is_new_var = False is_new_var = False
self.desc = self.block.desc.find_var(name) self.desc = self.block.desc.find_var(name)
...@@ -307,12 +294,6 @@ class Variable(object): ...@@ -307,12 +294,6 @@ class Variable(object):
def type(self): def type(self):
return self.desc.type() return self.desc.type()
@staticmethod
def _unique_var_name_():
prefix = "_generated_var"
uid = core.unique_integer(prefix) # unique during whole process.
return "_".join([prefix, str(uid)])
def set_error_clip(self, error_clip): def set_error_clip(self, error_clip):
self.error_clip = error_clip self.error_clip = error_clip
...@@ -766,13 +747,8 @@ class Block(object): ...@@ -766,13 +747,8 @@ class Block(object):
if not self.has_var(name): if not self.has_var(name):
raise ValueError("var %s is not in current" % name) raise ValueError("var %s is not in current" % name)
v = self.var(name) v = self.var(name)
stop_gradient = None
trainable = None
optimize_attr = None
regularizer = None
gradient_clip_attr = None
error_clip = None
if type(v) == Parameter: if type(v) == Parameter:
var_type = "Parameter"
stop_gradient = v.stop_gradient stop_gradient = v.stop_gradient
trainable = v.trainable trainable = v.trainable
optimize_attr = v.optimize_attr optimize_attr = v.optimize_attr
...@@ -780,15 +756,16 @@ class Block(object): ...@@ -780,15 +756,16 @@ class Block(object):
gradient_clip_attr = v.gradient_clip_attr gradient_clip_attr = v.gradient_clip_attr
error_clip = v.error_clip error_clip = v.error_clip
elif type(v) == Variable: elif type(v) == Variable:
var_type = "Variable"
error_clip = v.error_clip error_clip = v.error_clip
stop_gradient = v.stop_gradient stop_gradient = v.stop_gradient
else: else:
raise ValueError("unsupported var type: %s", type(v)) raise ValueError("unsupported var type: %s", type(v))
self.desc.rename_var(name, new_name) self.desc.rename_var(name, new_name)
# NOTE: v is destroyed by C++ after calling rename_var.
d = self.desc.find_var(new_name) d = self.desc.find_var(new_name)
var = None if var_type == "Parameter":
if type(v) == Parameter:
var = Parameter( var = Parameter(
self, self,
d.shape(), d.shape(),
...@@ -800,9 +777,10 @@ class Block(object): ...@@ -800,9 +777,10 @@ class Block(object):
regularizer=regularizer, regularizer=regularizer,
gradient_clip_attr=gradient_clip_attr, gradient_clip_attr=gradient_clip_attr,
error_clip=error_clip) error_clip=error_clip)
elif type(v) == Variable: elif var_type == "Variable":
var = Variable( var = Variable(
self, self,
type=v.type,
name=new_name, name=new_name,
error_clip=error_clip, error_clip=error_clip,
stop_gradient=stop_gradient) stop_gradient=stop_gradient)
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import os import os
from paddle.v2.fluid.evaluator import Evaluator from paddle.fluid.evaluator import Evaluator
from paddle.v2.fluid.framework import Program, Parameter, default_main_program, Variable from paddle.fluid.framework import Program, Parameter, default_main_program, Variable
from . import core from . import core
__all__ = [ __all__ = [
...@@ -68,7 +68,7 @@ def save_vars(executor, ...@@ -68,7 +68,7 @@ def save_vars(executor,
main_program=None, main_program=None,
vars=None, vars=None,
predicate=None, predicate=None,
save_file_name=None): filename=None):
""" """
Save variables to directory by executor. Save variables to directory by executor.
...@@ -80,8 +80,8 @@ def save_vars(executor, ...@@ -80,8 +80,8 @@ def save_vars(executor,
as a bool. If it returns true, the corresponding input variable will be saved. as a bool. If it returns true, the corresponding input variable will be saved.
:param vars: variables need to be saved. If vars is specified, program & predicate :param vars: variables need to be saved. If vars is specified, program & predicate
will be ignored will be ignored
:param save_file_name: The name of a single file that all vars are saved to. :param filename: The name of a single file that all vars are saved to.
If it is None, save variables to separate files. If it is None, save variables to separate files.
:return: None :return: None
""" """
...@@ -95,7 +95,7 @@ def save_vars(executor, ...@@ -95,7 +95,7 @@ def save_vars(executor,
executor, executor,
dirname=dirname, dirname=dirname,
vars=filter(predicate, main_program.list_vars()), vars=filter(predicate, main_program.list_vars()),
save_file_name=save_file_name) filename=filename)
else: else:
save_program = Program() save_program = Program()
save_block = save_program.global_block() save_block = save_program.global_block()
...@@ -103,7 +103,7 @@ def save_vars(executor, ...@@ -103,7 +103,7 @@ def save_vars(executor,
save_var_map = {} save_var_map = {}
for each_var in vars: for each_var in vars:
new_var = _clone_var_in_block_(save_block, each_var) new_var = _clone_var_in_block_(save_block, each_var)
if save_file_name is None: if filename is None:
save_block.append_op( save_block.append_op(
type='save', type='save',
inputs={'X': [new_var]}, inputs={'X': [new_var]},
...@@ -112,7 +112,7 @@ def save_vars(executor, ...@@ -112,7 +112,7 @@ def save_vars(executor,
else: else:
save_var_map[new_var.name] = new_var save_var_map[new_var.name] = new_var
if save_file_name is not None: if filename is not None:
save_var_list = [] save_var_list = []
for name in sorted(save_var_map.keys()): for name in sorted(save_var_map.keys()):
save_var_list.append(save_var_map[name]) save_var_list.append(save_var_map[name])
...@@ -121,12 +121,12 @@ def save_vars(executor, ...@@ -121,12 +121,12 @@ def save_vars(executor,
type='save_combine', type='save_combine',
inputs={'X': save_var_list}, inputs={'X': save_var_list},
outputs={}, outputs={},
attrs={'file_path': os.path.join(dirname, save_file_name)}) attrs={'file_path': os.path.join(dirname, filename)})
executor.run(save_program) executor.run(save_program)
def save_params(executor, dirname, main_program=None, save_file_name=None): def save_params(executor, dirname, main_program=None, filename=None):
""" """
Save all parameters to directory with executor. Save all parameters to directory with executor.
""" """
...@@ -136,11 +136,10 @@ def save_params(executor, dirname, main_program=None, save_file_name=None): ...@@ -136,11 +136,10 @@ def save_params(executor, dirname, main_program=None, save_file_name=None):
main_program=main_program, main_program=main_program,
vars=None, vars=None,
predicate=is_parameter, predicate=is_parameter,
save_file_name=save_file_name) filename=filename)
def save_persistables(executor, dirname, main_program=None, def save_persistables(executor, dirname, main_program=None, filename=None):
save_file_name=None):
""" """
Save all persistables to directory with executor. Save all persistables to directory with executor.
""" """
...@@ -150,7 +149,7 @@ def save_persistables(executor, dirname, main_program=None, ...@@ -150,7 +149,7 @@ def save_persistables(executor, dirname, main_program=None,
main_program=main_program, main_program=main_program,
vars=None, vars=None,
predicate=is_persistable, predicate=is_persistable,
save_file_name=save_file_name) filename=filename)
def load_vars(executor, def load_vars(executor,
...@@ -158,7 +157,7 @@ def load_vars(executor, ...@@ -158,7 +157,7 @@ def load_vars(executor,
main_program=None, main_program=None,
vars=None, vars=None,
predicate=None, predicate=None,
load_file_name=None): filename=None):
""" """
Load variables from directory by executor. Load variables from directory by executor.
...@@ -170,8 +169,8 @@ def load_vars(executor, ...@@ -170,8 +169,8 @@ def load_vars(executor,
as a bool. If it returns true, the corresponding input variable will be loaded. as a bool. If it returns true, the corresponding input variable will be loaded.
:param vars: variables need to be loaded. If vars is specified, program & :param vars: variables need to be loaded. If vars is specified, program &
predicate will be ignored predicate will be ignored
:param load_file_name: The name of the single file that all vars are loaded from. :param filename: The name of the single file that all vars are loaded from.
If it is None, load variables from separate files. If it is None, load variables from separate files.
:return: None :return: None
""" """
...@@ -185,7 +184,7 @@ def load_vars(executor, ...@@ -185,7 +184,7 @@ def load_vars(executor,
executor, executor,
dirname=dirname, dirname=dirname,
vars=filter(predicate, main_program.list_vars()), vars=filter(predicate, main_program.list_vars()),
load_file_name=load_file_name) filename=filename)
else: else:
load_prog = Program() load_prog = Program()
load_block = load_prog.global_block() load_block = load_prog.global_block()
...@@ -194,7 +193,7 @@ def load_vars(executor, ...@@ -194,7 +193,7 @@ def load_vars(executor,
for each_var in vars: for each_var in vars:
assert isinstance(each_var, Variable) assert isinstance(each_var, Variable)
new_var = _clone_var_in_block_(load_block, each_var) new_var = _clone_var_in_block_(load_block, each_var)
if load_file_name is None: if filename is None:
load_block.append_op( load_block.append_op(
type='load', type='load',
inputs={}, inputs={},
...@@ -203,7 +202,7 @@ def load_vars(executor, ...@@ -203,7 +202,7 @@ def load_vars(executor,
else: else:
load_var_map[new_var.name] = new_var load_var_map[new_var.name] = new_var
if load_file_name is not None: if filename is not None:
load_var_list = [] load_var_list = []
for name in sorted(load_var_map.keys()): for name in sorted(load_var_map.keys()):
load_var_list.append(load_var_map[name]) load_var_list.append(load_var_map[name])
...@@ -212,12 +211,12 @@ def load_vars(executor, ...@@ -212,12 +211,12 @@ def load_vars(executor,
type='load_combine', type='load_combine',
inputs={}, inputs={},
outputs={"Out": load_var_list}, outputs={"Out": load_var_list},
attrs={'file_path': os.path.join(dirname, load_file_name)}) attrs={'file_path': os.path.join(dirname, filename)})
executor.run(load_prog) executor.run(load_prog)
def load_params(executor, dirname, main_program=None, load_file_name=None): def load_params(executor, dirname, main_program=None, filename=None):
""" """
load all parameters from directory by executor. load all parameters from directory by executor.
""" """
...@@ -226,11 +225,10 @@ def load_params(executor, dirname, main_program=None, load_file_name=None): ...@@ -226,11 +225,10 @@ def load_params(executor, dirname, main_program=None, load_file_name=None):
dirname=dirname, dirname=dirname,
main_program=main_program, main_program=main_program,
predicate=is_parameter, predicate=is_parameter,
load_file_name=load_file_name) filename=filename)
def load_persistables(executor, dirname, main_program=None, def load_persistables(executor, dirname, main_program=None, filename=None):
load_file_name=None):
""" """
load all persistables from directory by executor. load all persistables from directory by executor.
""" """
...@@ -239,7 +237,7 @@ def load_persistables(executor, dirname, main_program=None, ...@@ -239,7 +237,7 @@ def load_persistables(executor, dirname, main_program=None,
dirname=dirname, dirname=dirname,
main_program=main_program, main_program=main_program,
predicate=is_persistable, predicate=is_persistable,
load_file_name=load_file_name) filename=filename)
def get_inference_program(target_vars, main_program=None): def get_inference_program(target_vars, main_program=None):
...@@ -299,7 +297,8 @@ def save_inference_model(dirname, ...@@ -299,7 +297,8 @@ def save_inference_model(dirname,
target_vars, target_vars,
executor, executor,
main_program=None, main_program=None,
save_file_name=None): model_filename=None,
params_filename=None):
""" """
Build a model especially for inference, Build a model especially for inference,
and save it to directory by the executor. and save it to directory by the executor.
...@@ -310,8 +309,11 @@ def save_inference_model(dirname, ...@@ -310,8 +309,11 @@ def save_inference_model(dirname,
:param executor: executor that save inference model :param executor: executor that save inference model
:param main_program: original program, which will be pruned to build the inference model. :param main_program: original program, which will be pruned to build the inference model.
Default default_main_program(). Default default_main_program().
:param save_file_name: The name of a single file that all parameters are saved to. :param model_filename: The name of file to save inference program.
If it is None, save parameters to separate files. If not specified, default filename `__model__` will be used.
:param params_filename: The name of file to save parameters.
It is used for the case that all parameters are saved in a single binary file.
If not specified, parameters are considered saved in separate files.
:return: None :return: None
""" """
...@@ -342,15 +344,19 @@ def save_inference_model(dirname, ...@@ -342,15 +344,19 @@ def save_inference_model(dirname,
prepend_feed_ops(inference_program, feeded_var_names) prepend_feed_ops(inference_program, feeded_var_names)
append_fetch_ops(inference_program, fetch_var_names) append_fetch_ops(inference_program, fetch_var_names)
if save_file_name == None: if model_filename is not None:
model_file_name = dirname + "/__model__" model_filename = os.path.basename(model_filename)
else: else:
model_file_name = dirname + "/__model_combined__" model_filename = "__model__"
model_filename = os.path.join(dirname, model_filename)
with open(model_file_name, "wb") as f: if params_filename is not None:
params_filename = os.path.basename(params_filename)
with open(model_filename, "wb") as f:
f.write(inference_program.desc.serialize_to_string()) f.write(inference_program.desc.serialize_to_string())
save_persistables(executor, dirname, inference_program, save_file_name) save_persistables(executor, dirname, inference_program, params_filename)
def get_feed_targets_names(program): def get_feed_targets_names(program):
...@@ -371,15 +377,21 @@ def get_fetch_targets_names(program): ...@@ -371,15 +377,21 @@ def get_fetch_targets_names(program):
return fetch_targets_names return fetch_targets_names
def load_inference_model(dirname, executor, load_file_name=None): def load_inference_model(dirname,
executor,
model_filename=None,
params_filename=None):
""" """
Load inference model from a directory Load inference model from a directory
:param dirname: directory path :param dirname: directory path
:param executor: executor that load inference model :param executor: executor that load inference model
:param load_file_name: The name of the single file that all parameters are loaded from. :param model_filename: The name of file to load inference program.
If it is None, load parameters from separate files. If not specified, default filename `__model__` will be used.
:param params_filename: The name of file to load parameters.
It is used for the case that all parameters are saved in a single binary file.
If not specified, parameters are considered saved in separate files.
:return: [program, feed_target_names, fetch_targets] :return: [program, feed_target_names, fetch_targets]
program: program especially for inference. program: program especially for inference.
feed_target_names: Names of variables that need to feed data feed_target_names: Names of variables that need to feed data
...@@ -388,16 +400,20 @@ def load_inference_model(dirname, executor, load_file_name=None): ...@@ -388,16 +400,20 @@ def load_inference_model(dirname, executor, load_file_name=None):
if not os.path.isdir(dirname): if not os.path.isdir(dirname):
raise ValueError("There is no directory named '%s'", dirname) raise ValueError("There is no directory named '%s'", dirname)
if load_file_name == None: if model_filename is not None:
model_file_name = dirname + "/__model__" model_filename = os.path.basename(model_filename)
else: else:
model_file_name = dirname + "/__model_combined__" model_filename = "__model__"
model_filename = os.path.join(dirname, model_filename)
if params_filename is not None:
params_filename = os.path.basename(params_filename)
with open(model_file_name, "rb") as f: with open(model_filename, "rb") as f:
program_desc_str = f.read() program_desc_str = f.read()
program = Program.parse_from_string(program_desc_str) program = Program.parse_from_string(program_desc_str)
load_persistables(executor, dirname, program, load_file_name) load_persistables(executor, dirname, program, params_filename)
feed_target_names = get_feed_targets_names(program) feed_target_names = get_feed_targets_names(program)
fetch_target_names = get_fetch_targets_names(program) fetch_target_names = get_fetch_targets_names(program)
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
import copy import copy
import itertools import itertools
from framework import Variable, Parameter, default_main_program, default_startup_program, \ from framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating
unique_name, dtype_is_floating import unique_name
from paddle.v2.fluid.initializer import Constant, Xavier from paddle.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr, WeightNormParamAttr from param_attr import ParamAttr, WeightNormParamAttr
...@@ -27,7 +27,7 @@ class LayerHelper(object): ...@@ -27,7 +27,7 @@ class LayerHelper(object):
self.layer_type = layer_type self.layer_type = layer_type
name = self.kwargs.get('name', None) name = self.kwargs.get('name', None)
if name is None: if name is None:
self.kwargs['name'] = unique_name(self.layer_type) self.kwargs['name'] = unique_name.generate(self.layer_type)
@property @property
def name(self): def name(self):
...@@ -117,17 +117,20 @@ class LayerHelper(object): ...@@ -117,17 +117,20 @@ class LayerHelper(object):
block=self.startup_program.global_block()): block=self.startup_program.global_block()):
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name(".".join([self.name, 'weight_norm_norm'])), name=unique_name.generate(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
abs_out = block.create_var( abs_out = block.create_var(
name=unique_name(".".join([self.name, 'weight_norm_abs'])), name=unique_name.generate(".".join(
[self.name, 'weight_norm_abs'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
block.append_op( block.append_op(
type='abs', inputs={'X': x}, outputs={'Out': abs_out}) type='abs', inputs={'X': x}, outputs={'Out': abs_out})
pow_out = block.create_var( pow_out = block.create_var(
name=unique_name(".".join([self.name, 'weight_norm_pow'])), name=unique_name.generate(".".join(
[self.name, 'weight_norm_pow'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
block.append_op( block.append_op(
...@@ -136,7 +139,8 @@ class LayerHelper(object): ...@@ -136,7 +139,8 @@ class LayerHelper(object):
outputs={'Out': pow_out}, outputs={'Out': pow_out},
attrs={'factor': float(p)}) attrs={'factor': float(p)})
sum_out = block.create_var( sum_out = block.create_var(
name=unique_name(".".join([self.name, 'weight_norm_sum'])), name=unique_name.generate(".".join(
[self.name, 'weight_norm_sum'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
block.append_op( block.append_op(
...@@ -161,7 +165,7 @@ class LayerHelper(object): ...@@ -161,7 +165,7 @@ class LayerHelper(object):
block=self.startup_program.global_block()): block=self.startup_program.global_block()):
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name(".".join( name=unique_name.generate(".".join(
[self.name, 'weight_norm_reshape'])), [self.name, 'weight_norm_reshape'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
...@@ -178,7 +182,7 @@ class LayerHelper(object): ...@@ -178,7 +182,7 @@ class LayerHelper(object):
block=self.startup_program.global_block()): block=self.startup_program.global_block()):
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name(".".join( name=unique_name.generate(".".join(
[self.name, 'weight_norm_transpose'])), [self.name, 'weight_norm_transpose'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
...@@ -196,7 +200,8 @@ class LayerHelper(object): ...@@ -196,7 +200,8 @@ class LayerHelper(object):
"""Computes the norm over all dimensions except dim""" """Computes the norm over all dimensions except dim"""
if out is None: if out is None:
out = block.create_var( out = block.create_var(
name=unique_name(".".join([self.name, 'weight_norm_norm'])), name=unique_name.generate(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype, dtype=dtype,
persistable=False) persistable=False)
if dim is None: if dim is None:
...@@ -286,7 +291,7 @@ class LayerHelper(object): ...@@ -286,7 +291,7 @@ class LayerHelper(object):
assert isinstance(attr, ParamAttr) assert isinstance(attr, ParamAttr)
suffix = 'b' if is_bias else 'w' suffix = 'b' if is_bias else 'w'
if attr.name is None: if attr.name is None:
attr.name = unique_name(".".join([self.name, suffix])) attr.name = unique_name.generate(".".join([self.name, suffix]))
if default_initializer is None and attr.initializer is None: if default_initializer is None and attr.initializer is None:
if is_bias: if is_bias:
...@@ -316,7 +321,7 @@ class LayerHelper(object): ...@@ -316,7 +321,7 @@ class LayerHelper(object):
def create_tmp_variable(self, dtype, stop_gradient=False): def create_tmp_variable(self, dtype, stop_gradient=False):
return self.main_program.current_block().create_var( return self.main_program.current_block().create_var(
name=unique_name(".".join([self.name, 'tmp'])), name=unique_name.generate(".".join([self.name, 'tmp'])),
dtype=dtype, dtype=dtype,
persistable=False, persistable=False,
stop_gradient=stop_gradient) stop_gradient=stop_gradient)
......
...@@ -428,7 +428,8 @@ class StaticRNN(object): ...@@ -428,7 +428,8 @@ class StaticRNN(object):
raise ValueError( raise ValueError(
"if init is None, memory at least need shape and batch_ref") "if init is None, memory at least need shape and batch_ref")
parent_block = self.parent_block() parent_block = self.parent_block()
var_name = unique_name("@".join([self.helper.name, "memory_boot"])) var_name = unique_name.generate("@".join(
[self.helper.name, "memory_boot"]))
boot_var = parent_block.create_var( boot_var = parent_block.create_var(
name=var_name, name=var_name,
shape=shape, shape=shape,
...@@ -450,7 +451,7 @@ class StaticRNN(object): ...@@ -450,7 +451,7 @@ class StaticRNN(object):
return self.memory(init=boot_var) return self.memory(init=boot_var)
else: else:
pre_mem = self.helper.create_variable( pre_mem = self.helper.create_variable(
name=unique_name("@".join([self.helper.name, "mem"])), name=unique_name.generate("@".join([self.helper.name, "mem"])),
dtype=init.dtype, dtype=init.dtype,
shape=init.shape) shape=init.shape)
self.memories[pre_mem.name] = StaticRNNMemoryLink( self.memories[pre_mem.name] = StaticRNNMemoryLink(
...@@ -652,7 +653,8 @@ class While(object): ...@@ -652,7 +653,8 @@ class While(object):
parent_block.append_op( parent_block.append_op(
type='while', type='while',
inputs={ inputs={
'X': [parent_block.var(x_name) for x_name in x_name_list], 'X':
[parent_block.var_recursive(x_name) for x_name in x_name_list],
'Condition': [self.cond_var] 'Condition': [self.cond_var]
}, },
outputs={'Out': out_vars, outputs={'Out': out_vars,
...@@ -709,7 +711,7 @@ def lod_rank_table(x, level=0): ...@@ -709,7 +711,7 @@ def lod_rank_table(x, level=0):
helper = LayerHelper("lod_rank_table", **locals()) helper = LayerHelper("lod_rank_table", **locals())
table = helper.create_variable( table = helper.create_variable(
type=core.VarDesc.VarType.LOD_RANK_TABLE, type=core.VarDesc.VarType.LOD_RANK_TABLE,
name=unique_name("lod_rank_table")) name=unique_name.generate("lod_rank_table"))
helper.append_op( helper.append_op(
type='lod_rank_table', type='lod_rank_table',
inputs={'X': x}, inputs={'X': x},
...@@ -807,7 +809,7 @@ def lod_tensor_to_array(x, table): ...@@ -807,7 +809,7 @@ def lod_tensor_to_array(x, table):
""" """
helper = LayerHelper("lod_tensor_to_array", **locals()) helper = LayerHelper("lod_tensor_to_array", **locals())
array = helper.create_variable( array = helper.create_variable(
name=unique_name("lod_tensor_to_array"), name=unique_name.generate("lod_tensor_to_array"),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=x.dtype) dtype=x.dtype)
helper.append_op( helper.append_op(
...@@ -1264,11 +1266,11 @@ class IfElse(object): ...@@ -1264,11 +1266,11 @@ class IfElse(object):
if id(x) not in self.input_table: if id(x) not in self.input_table:
parent_block = self.parent_block() parent_block = self.parent_block()
out_true = parent_block.create_var( out_true = parent_block.create_var(
name=unique_name('ifelse_input' + self.helper.name), name=unique_name.generate('ifelse_input' + self.helper.name),
dtype=x.dtype) dtype=x.dtype)
out_false = parent_block.create_var( out_false = parent_block.create_var(
name=unique_name('ifelse_input' + self.helper.name), name=unique_name.generate('ifelse_input' + self.helper.name),
dtype=x.dtype) dtype=x.dtype)
parent_block.append_op( parent_block.append_op(
type='split_lod_tensor', type='split_lod_tensor',
...@@ -1310,7 +1312,8 @@ class IfElse(object): ...@@ -1310,7 +1312,8 @@ class IfElse(object):
raise TypeError("Each output should be a variable") raise TypeError("Each output should be a variable")
# create outside tensor # create outside tensor
outside_out = parent_block.create_var( outside_out = parent_block.create_var(
name=unique_name("_".join([self.helper.name, 'output'])), name=unique_name.generate("_".join(
[self.helper.name, 'output'])),
dtype=each_out.dtype) dtype=each_out.dtype)
out_table.append(outside_out) out_table.append(outside_out)
...@@ -1373,7 +1376,7 @@ class DynamicRNN(object): ...@@ -1373,7 +1376,7 @@ class DynamicRNN(object):
parent_block = self._parent_block_() parent_block = self._parent_block_()
if self.lod_rank_table is None: if self.lod_rank_table is None:
self.lod_rank_table = parent_block.create_var( self.lod_rank_table = parent_block.create_var(
name=unique_name('lod_rank_table'), name=unique_name.generate('lod_rank_table'),
type=core.VarDesc.VarType.LOD_RANK_TABLE) type=core.VarDesc.VarType.LOD_RANK_TABLE)
self.lod_rank_table.stop_gradient = True self.lod_rank_table.stop_gradient = True
parent_block.append_op( parent_block.append_op(
...@@ -1381,7 +1384,8 @@ class DynamicRNN(object): ...@@ -1381,7 +1384,8 @@ class DynamicRNN(object):
inputs={"X": x}, inputs={"X": x},
outputs={"Out": self.lod_rank_table}) outputs={"Out": self.lod_rank_table})
self.max_seq_len = parent_block.create_var( self.max_seq_len = parent_block.create_var(
name=unique_name('dynamic_rnn_max_seq_len'), dtype='int64') name=unique_name.generate('dynamic_rnn_max_seq_len'),
dtype='int64')
self.max_seq_len.stop_gradient = False self.max_seq_len.stop_gradient = False
parent_block.append_op( parent_block.append_op(
type='max_sequence_len', type='max_sequence_len',
...@@ -1395,7 +1399,7 @@ class DynamicRNN(object): ...@@ -1395,7 +1399,7 @@ class DynamicRNN(object):
outputs={'Out': self.cond}) outputs={'Out': self.cond})
input_array = parent_block.create_var( input_array = parent_block.create_var(
name=unique_name('dynamic_rnn_input_array'), name=unique_name.generate('dynamic_rnn_input_array'),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=x.dtype) dtype=x.dtype)
self.input_array.append((input_array, x.dtype)) self.input_array.append((input_array, x.dtype))
...@@ -1416,7 +1420,7 @@ class DynamicRNN(object): ...@@ -1416,7 +1420,7 @@ class DynamicRNN(object):
"static_input() must be called after step_input().") "static_input() must be called after step_input().")
parent_block = self._parent_block_() parent_block = self._parent_block_()
x_reordered = parent_block.create_var( x_reordered = parent_block.create_var(
name=unique_name("dynamic_rnn_static_input_reordered"), name=unique_name.generate("dynamic_rnn_static_input_reordered"),
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
dtype=x.dtype) dtype=x.dtype)
parent_block.append_op( parent_block.append_op(
...@@ -1478,7 +1482,7 @@ class DynamicRNN(object): ...@@ -1478,7 +1482,7 @@ class DynamicRNN(object):
'invoked before ' 'invoked before '
'memory(init=init, need_reordered=True, ...).') 'memory(init=init, need_reordered=True, ...).')
init_reordered = parent_block.create_var( init_reordered = parent_block.create_var(
name=unique_name('dynamic_rnn_mem_init_reordered'), name=unique_name.generate('dynamic_rnn_mem_init_reordered'),
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
dtype=init.dtype) dtype=init.dtype)
parent_block.append_op( parent_block.append_op(
...@@ -1490,7 +1494,7 @@ class DynamicRNN(object): ...@@ -1490,7 +1494,7 @@ class DynamicRNN(object):
outputs={'Out': [init_reordered]}) outputs={'Out': [init_reordered]})
init_tensor = init_reordered init_tensor = init_reordered
mem_array = parent_block.create_var( mem_array = parent_block.create_var(
name=unique_name('dynamic_rnn_mem_array'), name=unique_name.generate('dynamic_rnn_mem_array'),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=init.dtype) dtype=init.dtype)
parent_block.append_op( parent_block.append_op(
...@@ -1510,9 +1514,10 @@ class DynamicRNN(object): ...@@ -1510,9 +1514,10 @@ class DynamicRNN(object):
) )
parent_block = self._parent_block_() parent_block = self._parent_block_()
init = parent_block.create_var( init = parent_block.create_var(
name=unique_name('mem_init'), dtype=dtype) name=unique_name.generate('mem_init'), dtype=dtype)
arr, dtype = self.input_array[0] arr, dtype = self.input_array[0]
in0 = parent_block.create_var(name=unique_name('in0'), dtype=dtype) in0 = parent_block.create_var(
name=unique_name.generate('in0'), dtype=dtype)
parent_block.append_op( parent_block.append_op(
type='read_from_array', type='read_from_array',
inputs={'X': [arr], inputs={'X': [arr],
...@@ -1551,7 +1556,7 @@ class DynamicRNN(object): ...@@ -1551,7 +1556,7 @@ class DynamicRNN(object):
parent_block = self._parent_block_() parent_block = self._parent_block_()
for each in outputs: for each in outputs:
outside_array = parent_block.create_var( outside_array = parent_block.create_var(
name=unique_name("_".join( name=unique_name.generate("_".join(
[self.helper.name, "output_array", each.name])), [self.helper.name, "output_array", each.name])),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=each.dtype) dtype=each.dtype)
......
...@@ -16,6 +16,7 @@ All layers just related to the detection neural network. ...@@ -16,6 +16,7 @@ All layers just related to the detection neural network.
""" """
from layer_function_generator import generate_layer_fn from layer_function_generator import generate_layer_fn
from layer_function_generator import autodoc
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
import tensor import tensor
import ops import ops
...@@ -28,6 +29,7 @@ __all__ = [ ...@@ -28,6 +29,7 @@ __all__ = [
'target_assign', 'target_assign',
'detection_output', 'detection_output',
'ssd_loss', 'ssd_loss',
'detection_map',
] ]
__auto__ = [ __auto__ = [
...@@ -132,7 +134,48 @@ def detection_output(scores, ...@@ -132,7 +134,48 @@ def detection_output(scores,
return nmsed_outs return nmsed_outs
def bipartite_match(dist_matrix, name=None): @autodoc()
def detection_map(detect_res,
label,
pos_count=None,
true_pos=None,
false_pos=None,
overlap_threshold=0.3,
evaluate_difficult=True,
ap_type='integral'):
helper = LayerHelper("detection_map", **locals())
map_out = helper.create_tmp_variable(dtype='float32')
accum_pos_count_out = helper.create_tmp_variable(dtype='int32')
accum_true_pos_out = helper.create_tmp_variable(dtype='float32')
accum_false_pos_out = helper.create_tmp_variable(dtype='float32')
helper.append_op(
type="detection_map",
inputs={
'Label': label,
'DetectRes': detect_res,
'PosCount': pos_count,
'TruePos': true_pos,
'FalsePos': false_pos
},
outputs={
'MAP': map_out,
'AccumPosCount': accum_pos_count_out,
'AccumTruePos': accum_true_pos_out,
'AccumFalsePos': accum_false_pos_out
},
attrs={
'overlap_threshold': overlap_threshold,
'evaluate_difficult': evaluate_difficult,
'ap_type': ap_type
})
return map_out, accum_pos_count_out, accum_true_pos_out, accum_false_pos_out
def bipartite_match(dist_matrix,
match_type=None,
dist_threshold=None,
name=None):
""" """
**Bipartite matchint operator** **Bipartite matchint operator**
...@@ -164,6 +207,11 @@ def bipartite_match(dist_matrix, name=None): ...@@ -164,6 +207,11 @@ def bipartite_match(dist_matrix, name=None):
This tensor can contain LoD information to represent a batch of This tensor can contain LoD information to represent a batch of
inputs. One instance of this batch can contain different numbers of inputs. One instance of this batch can contain different numbers of
entities. entities.
match_type(string|None): The type of matching method, should be
'bipartite' or 'per_prediction', 'bipartite' by defalut.
dist_threshold(float|None): If `match_type` is 'per_prediction',
this threshold is to determine the extra matching bboxes based
on the maximum distance, 0.5 by defalut.
Returns: Returns:
match_indices(Variable): A 2-D Tensor with shape [N, M] in int type. match_indices(Variable): A 2-D Tensor with shape [N, M] in int type.
N is the batch size. If match_indices[i][j] is -1, it N is the batch size. If match_indices[i][j] is -1, it
...@@ -183,6 +231,10 @@ def bipartite_match(dist_matrix, name=None): ...@@ -183,6 +231,10 @@ def bipartite_match(dist_matrix, name=None):
helper.append_op( helper.append_op(
type='bipartite_match', type='bipartite_match',
inputs={'DistMat': dist_matrix}, inputs={'DistMat': dist_matrix},
attrs={
'match_type': match_type,
'dist_threshold': dist_threshold,
},
outputs={ outputs={
'ColToRowMatchIndices': match_indices, 'ColToRowMatchIndices': match_indices,
'ColToRowMatchDist': match_distance 'ColToRowMatchDist': match_distance
...@@ -276,6 +328,7 @@ def ssd_loss(location, ...@@ -276,6 +328,7 @@ def ssd_loss(location,
conf_loss_weight=1.0, conf_loss_weight=1.0,
match_type='per_prediction', match_type='per_prediction',
mining_type='max_negative', mining_type='max_negative',
normalize=True,
sample_size=None): sample_size=None):
""" """
**Multi-box loss layer for object dection algorithm of SSD** **Multi-box loss layer for object dection algorithm of SSD**
...@@ -324,18 +377,20 @@ def ssd_loss(location, ...@@ -324,18 +377,20 @@ def ssd_loss(location,
`overlap_threshold` to determine the extra matching bboxes when `overlap_threshold` to determine the extra matching bboxes when
finding matched boxes. 0.5 by default. finding matched boxes. 0.5 by default.
neg_pos_ratio (float): The ratio of the negative boxes to the positive neg_pos_ratio (float): The ratio of the negative boxes to the positive
boxes, used only when mining_type is max_negative, 3.0 by defalut. boxes, used only when mining_type is 'max_negative', 3.0 by defalut.
neg_overlap (float): The negative overlap upper bound for the unmatched neg_overlap (float): The negative overlap upper bound for the unmatched
predictions. Use only when mining_type is max_negative, predictions. Use only when mining_type is 'max_negative',
0.5 by default. 0.5 by default.
sample_size (int): The max sample size of negative box, used only when
mining_type is hard_example.
loc_loss_weight (float): Weight for localization loss, 1.0 by default. loc_loss_weight (float): Weight for localization loss, 1.0 by default.
conf_loss_weight (float): Weight for confidence loss, 1.0 by default. conf_loss_weight (float): Weight for confidence loss, 1.0 by default.
match_type (str): The type of matching method during training, should match_type (str): The type of matching method during training, should
be 'bipartite' or 'per_prediction'. be 'bipartite' or 'per_prediction', 'per_prediction' by defalut.
mining_type (str): The hard example mining type, should be 'hard_example' mining_type (str): The hard example mining type, should be 'hard_example'
or 'max_negative', now only support `max_negative`. or 'max_negative', now only support `max_negative`.
normalize (bool): Whether to normalize the SSD loss by the total number
of output locations, True by defalut.
sample_size (int): The max sample size of negative box, used only when
mining_type is 'hard_example'.
Returns: Returns:
Variable: The weighted sum of the localization loss and confidence loss, Variable: The weighted sum of the localization loss and confidence loss,
...@@ -381,7 +436,8 @@ def ssd_loss(location, ...@@ -381,7 +436,8 @@ def ssd_loss(location,
# 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
iou = iou_similarity(x=gt_box, y=prior_box) iou = iou_similarity(x=gt_box, y=prior_box)
# 1.2 Compute matched boundding box by bipartite matching algorithm. # 1.2 Compute matched boundding box by bipartite matching algorithm.
matched_indices, matched_dist = bipartite_match(iou) matched_indices, matched_dist = bipartite_match(iou, match_type,
overlap_threshold)
# 2. Compute confidence for mining hard examples # 2. Compute confidence for mining hard examples
# 2.1. Get the target label based on matched indices # 2.1. Get the target label based on matched indices
...@@ -440,10 +496,15 @@ def ssd_loss(location, ...@@ -440,10 +496,15 @@ def ssd_loss(location,
# 5.1 Compute confidence loss. # 5.1 Compute confidence loss.
target_label = __reshape_to_2d(target_label) target_label = __reshape_to_2d(target_label)
target_label = tensor.cast(x=target_label, dtype='int64') target_label = tensor.cast(x=target_label, dtype='int64')
conf_loss = nn.softmax_with_cross_entropy(confidence, target_label) conf_loss = nn.softmax_with_cross_entropy(confidence, target_label)
target_conf_weight = __reshape_to_2d(target_conf_weight) target_conf_weight = __reshape_to_2d(target_conf_weight)
conf_loss = conf_loss * target_conf_weight conf_loss = conf_loss * target_conf_weight
# the target_label and target_conf_weight do not have gradient.
target_label.stop_gradient = True
target_conf_weight.stop_gradient = True
# 5.2 Compute regression loss. # 5.2 Compute regression loss.
location = __reshape_to_2d(location) location = __reshape_to_2d(location)
target_bbox = __reshape_to_2d(target_bbox) target_bbox = __reshape_to_2d(target_bbox)
...@@ -452,8 +513,19 @@ def ssd_loss(location, ...@@ -452,8 +513,19 @@ def ssd_loss(location,
target_loc_weight = __reshape_to_2d(target_loc_weight) target_loc_weight = __reshape_to_2d(target_loc_weight)
loc_loss = loc_loss * target_loc_weight loc_loss = loc_loss * target_loc_weight
# the target_bbox and target_loc_weight do not have gradient.
target_bbox.stop_gradient = True
target_loc_weight.stop_gradient = True
# 5.3 Compute overall weighted loss. # 5.3 Compute overall weighted loss.
loss = conf_loss_weight * conf_loss + loc_loss_weight * loc_loss loss = conf_loss_weight * conf_loss + loc_loss_weight * loc_loss
# reshape to [N, Np], N is the batch size and Np is the prior box number.
loss = ops.reshape(x=loss, shape=[-1, num_prior])
loss = nn.reduce_sum(loss, dim=1, keep_dim=True)
if normalize:
normalizer = nn.reduce_sum(target_loc_weight)
loss = loss / normalizer
return loss return loss
......
...@@ -25,7 +25,8 @@ __all__ = ['get_places'] ...@@ -25,7 +25,8 @@ __all__ = ['get_places']
@autodoc() @autodoc()
def get_places(device_count=None, device_type=None): def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals()) helper = LayerHelper('get_places', **locals())
out_places = helper.create_variable(name=unique_name(helper.name + ".out")) out_places = helper.create_variable(
name=unique_name.generate(helper.name + ".out"))
attrs = dict() attrs = dict()
if device_count is not None: if device_count is not None:
attrs['device_count'] = int(device_count) attrs['device_count'] = int(device_count)
......
...@@ -130,7 +130,7 @@ def generate_layer_fn(op_type): ...@@ -130,7 +130,7 @@ def generate_layer_fn(op_type):
o_name = not_intermediate_outputs[0].name o_name = not_intermediate_outputs[0].name
intermediate_output_names = [output.name for output in intermediate_outputs] intermediate_output_names = [output.name for output in intermediate_outputs]
def infer_and_check_dtype(op_proto, **kwargs): def infer_and_check_dtype(op_proto, *args, **kwargs):
""" """
This function performs the sanity check for dtype and This function performs the sanity check for dtype and
instance type. instance type.
...@@ -141,6 +141,10 @@ def generate_layer_fn(op_type): ...@@ -141,6 +141,10 @@ def generate_layer_fn(op_type):
val = kwargs.pop(name, []) val = kwargs.pop(name, [])
if not isinstance(val, list) and not isinstance(val, tuple): if not isinstance(val, list) and not isinstance(val, tuple):
val = [val] val = [val]
if len(val) == 0:
val = [args[0]]
args = args[1:]
for each in val: for each in val:
if not isinstance(each, Variable): if not isinstance(each, Variable):
raise ValueError("input of {0} must be variable".format( raise ValueError("input of {0} must be variable".format(
...@@ -155,10 +159,10 @@ def generate_layer_fn(op_type): ...@@ -155,10 +159,10 @@ def generate_layer_fn(op_type):
return dtype return dtype
def func(**kwargs): def func(*args, **kwargs):
helper = LayerHelper(op_type, **kwargs) helper = LayerHelper(op_type, **kwargs)
dtype = infer_and_check_dtype(op_proto, **kwargs) dtype = infer_and_check_dtype(op_proto, *args, **kwargs)
inputs = dict() inputs = dict()
for ipt in op_proto.inputs: for ipt in op_proto.inputs:
...@@ -166,6 +170,9 @@ def generate_layer_fn(op_type): ...@@ -166,6 +170,9 @@ def generate_layer_fn(op_type):
val = kwargs.pop(name, []) val = kwargs.pop(name, [])
if not isinstance(val, list) and not isinstance(val, tuple): if not isinstance(val, list) and not isinstance(val, tuple):
val = [val] val = [val]
if len(val) == 0 and len(args) != 0:
val = args[0]
args = args[1:]
inputs[ipt.name] = val inputs[ipt.name] = val
outputs = dict() outputs = dict()
......
...@@ -21,7 +21,7 @@ __all__ = ['monkey_patch_variable'] ...@@ -21,7 +21,7 @@ __all__ = ['monkey_patch_variable']
def monkey_patch_variable(): def monkey_patch_variable():
def unique_tmp_name(): def unique_tmp_name():
return unique_name("tmp") return unique_name.generate("tmp")
def safe_get_dtype(var): def safe_get_dtype(var):
try: try:
...@@ -157,7 +157,9 @@ def monkey_patch_variable(): ...@@ -157,7 +157,9 @@ def monkey_patch_variable():
("__eq__", "equal", False), ("__eq__", "equal", False),
("__ne__", "not_equal", False), ("__ne__", "not_equal", False),
("__lt__", "less_than", False), ("__lt__", "less_than", False),
("__le__", "less_equal", False)): ("__le__", "less_equal", False),
("__gt__", "greater_than", False),
("__ge__", "greater_equal", False)):
setattr(Variable, method_name, setattr(Variable, method_name,
_elemwise_method_creator_(method_name, op_type, reverse)) _elemwise_method_creator_(method_name, op_type, reverse))
......
...@@ -21,6 +21,7 @@ from ..framework import Variable ...@@ -21,6 +21,7 @@ from ..framework import Variable
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from layer_function_generator import autodoc from layer_function_generator import autodoc
from tensor import concat from tensor import concat
import utils
__all__ = [ __all__ = [
'fc', 'fc',
...@@ -1103,8 +1104,8 @@ def sequence_conv(input, ...@@ -1103,8 +1104,8 @@ def sequence_conv(input,
def conv2d(input, def conv2d(input,
num_filters, num_filters,
filter_size, filter_size,
stride=None, stride=1,
padding=None, padding=0,
groups=None, groups=None,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
...@@ -1217,12 +1218,10 @@ def conv2d(input, ...@@ -1217,12 +1218,10 @@ def conv2d(input,
raise ValueError("num_channels must be divisible by groups.") raise ValueError("num_channels must be divisible by groups.")
num_filter_channels = num_channels / groups num_filter_channels = num_channels / groups
if isinstance(filter_size, int): filter_size = utils.convert_to_list(filter_size, 2, 'filter_size')
filter_size = [filter_size, filter_size] stride = utils.convert_to_list(stride, 2, 'stride')
if isinstance(stride, int): padding = utils.convert_to_list(padding, 2, 'padding')
stride = [stride, stride]
if isinstance(padding, int):
padding = [padding, padding]
if not isinstance(use_cudnn, bool): if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False") raise ValueError("use_cudnn should be True or False")
...@@ -1397,10 +1396,10 @@ def sequence_last_step(input): ...@@ -1397,10 +1396,10 @@ def sequence_last_step(input):
def pool2d(input, def pool2d(input,
pool_size, pool_size=-1,
pool_type, pool_type="max",
pool_stride=None, pool_stride=1,
pool_padding=None, pool_padding=0,
global_pooling=False, global_pooling=False,
use_cudnn=True, use_cudnn=True,
name=None): name=None):
...@@ -1408,20 +1407,20 @@ def pool2d(input, ...@@ -1408,20 +1407,20 @@ def pool2d(input,
This function adds the operator for pooling in 2 dimensions, using the This function adds the operator for pooling in 2 dimensions, using the
pooling configurations mentioned in input parameters. pooling configurations mentioned in input parameters.
""" """
if pool_padding is None:
pool_padding = [0, 0]
if pool_stride is None:
pool_stride = [1, 1]
if pool_type not in ["max", "avg"]: if pool_type not in ["max", "avg"]:
raise ValueError( raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.", "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
str(pool_type)) str(pool_type))
if isinstance(pool_size, int):
pool_size = [pool_size, pool_size] if global_pooling is False and pool_size == -1:
if isinstance(pool_stride, int): raise ValueError(
pool_stride = [pool_stride, pool_stride] "When the global_pooling is False, pool_size must be passed "
if isinstance(pool_padding, int): "and be a valid value. Received pool_size: " + str(pool_size))
pool_padding = [pool_padding, pool_padding]
pool_size = utils.convert_to_list(pool_size, 2, 'pool_size')
pool_padding = utils.convert_to_list(pool_padding, 2, 'pool_padding')
pool_stride = utils.convert_to_list(pool_stride, 2, 'pool_stride')
if not isinstance(use_cudnn, bool): if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False") raise ValueError("use_cudnn should be True or False")
...@@ -1484,21 +1483,21 @@ def batch_norm(input, ...@@ -1484,21 +1483,21 @@ def batch_norm(input,
bias = helper.create_parameter( bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True) attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
mean = helper.create_global_variable( mean = helper.create_parameter(
name=moving_mean_name, attr=ParamAttr(
dtype=input.dtype, name=moving_mean_name, initializer=Constant(0.0), trainable=False),
shape=param_shape, shape=param_shape,
persistable=True, dtype=input.dtype)
stop_gradient=True) mean.stop_gradient = True
helper.set_variable_initializer(var=mean, initializer=Constant(0.0))
variance = helper.create_global_variable( variance = helper.create_parameter(
name=moving_variance_name, attr=ParamAttr(
dtype=input.dtype, name=moving_variance_name,
initializer=Constant(1.0),
trainable=False),
shape=param_shape, shape=param_shape,
persistable=True, dtype=input.dtype)
stop_gradient=True) variance.stop_gradient = True
helper.set_variable_initializer(var=variance, initializer=Constant(1.0))
# create output # create output
# mean and mean_out share the same memory # mean and mean_out share the same memory
...@@ -1650,9 +1649,9 @@ def conv2d_transpose(input, ...@@ -1650,9 +1649,9 @@ def conv2d_transpose(input,
num_filters, num_filters,
output_size=None, output_size=None,
filter_size=None, filter_size=None,
padding=None, padding=0,
stride=None, stride=1,
dilation=None, dilation=1,
param_attr=None, param_attr=None,
use_cudnn=True, use_cudnn=True,
name=None): name=None):
...@@ -1748,26 +1747,12 @@ def conv2d_transpose(input, ...@@ -1748,26 +1747,12 @@ def conv2d_transpose(input,
raise TypeError("Input of conv2d_transpose must be Variable") raise TypeError("Input of conv2d_transpose must be Variable")
input_channel = input.shape[1] input_channel = input.shape[1]
op_attr = dict() padding = utils.convert_to_list(padding, 2, 'padding')
stride = utils.convert_to_list(stride, 2, 'stride')
if isinstance(padding, int): dilation = utils.convert_to_list(dilation, 2, 'dilation')
op_attr['paddings'] = [padding, padding]
elif padding is not None:
op_attr['paddings'] = padding
if isinstance(stride, int):
op_attr['strides'] = [stride, stride]
elif stride is not None:
op_attr['strides'] = stride
if isinstance(dilation, int):
op_attr['dilations'] = [dilation, dilation]
elif dilation is not None:
op_attr['dilations'] = dilation
if not isinstance(use_cudnn, bool): if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False") raise ValueError("use_cudnn should be True or False")
op_attr['use_cudnn'] = use_cudnn
if filter_size is None: if filter_size is None:
if output_size is None: if output_size is None:
...@@ -1775,10 +1760,6 @@ def conv2d_transpose(input, ...@@ -1775,10 +1760,6 @@ def conv2d_transpose(input,
if isinstance(output_size, int): if isinstance(output_size, int):
output_size = [output_size, output_size] output_size = [output_size, output_size]
padding = op_attr.get('paddings', [0, 0])
stride = op_attr.get('strides', [1, 1])
dilation = op_attr.get('dilations', [1, 1])
h_in = input.shape[2] h_in = input.shape[2]
w_in = input.shape[3] w_in = input.shape[3]
...@@ -1787,9 +1768,9 @@ def conv2d_transpose(input, ...@@ -1787,9 +1768,9 @@ def conv2d_transpose(input,
filter_size_w = (output_size[1] - (w_in - 1) * stride[1] + 2 * filter_size_w = (output_size[1] - (w_in - 1) * stride[1] + 2 *
padding[1] - 1) / dilation[1] + 1 padding[1] - 1) / dilation[1] + 1
filter_size = [filter_size_h, filter_size_w] filter_size = [filter_size_h, filter_size_w]
else:
elif isinstance(filter_size, int): filter_size = utils.convert_to_list(filter_size, 2,
filter_size = [filter_size, filter_size] 'conv2d_transpose.filter_size')
filter_shape = [input_channel, num_filters] + filter_size filter_shape = [input_channel, num_filters] + filter_size
img_filter = helper.create_parameter( img_filter = helper.create_parameter(
...@@ -1801,7 +1782,12 @@ def conv2d_transpose(input, ...@@ -1801,7 +1782,12 @@ def conv2d_transpose(input,
inputs={'Input': [input], inputs={'Input': [input],
'Filter': [img_filter]}, 'Filter': [img_filter]},
outputs={'Output': out}, outputs={'Output': out},
attrs=op_attr) attrs={
'strides': stride,
'paddings': padding,
'dilations': dilation,
'use_cudnn': use_cudnn
})
return out return out
......
...@@ -160,8 +160,8 @@ def sums(input, out=None): ...@@ -160,8 +160,8 @@ def sums(input, out=None):
a0 = layers.array_read(array=tmp, i=i) a0 = layers.array_read(array=tmp, i=i)
i = layers.increment(x=i) i = layers.increment(x=i)
a1 = layers.array_read(array=tmp, i=i) a1 = layers.array_read(array=tmp, i=i)
mean_a0 = layers.mean(x=a0) mean_a0 = layers.mean(a0)
mean_a1 = layers.mean(x=a1) mean_a1 = layers.mean(a1)
a_sum = layers.sums(input=[mean_a0, mean_a1]) a_sum = layers.sums(input=[mean_a0, mean_a1])
""" """
helper = LayerHelper('sum', **locals()) helper = LayerHelper('sum', **locals())
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
def convert_to_list(value, n, name, dtype=np.int):
"""
Converts a single numerical type or iterable of numerical
types into an numerical type list.
Arguments:
value: The value to validate and convert. Could an int, or any iterable
of ints.
n: The size of the list to be returned.
name: The name of the argument being validated, e.g. "stride" or
"filter_size". This is only used to format error messages.
dtype: the numerical type of the element of the list to be returned.
Returns:
A list of n dtypes.
Raises:
ValueError: If something else than an int/long or iterable thereof was
passed.
"""
if isinstance(value, dtype):
return [value, ] * n
else:
try:
value_list = list(value)
except TypeError:
raise ValueError("The " + name +
"'s type must be list or tuple. Received: " + str(
value))
if len(value_list) != n:
raise ValueError("The " + name + "'s length must be " + str(n) +
". Received: " + str(value))
for single_value in value_list:
try:
dtype(single_value)
except (ValueError, TypeError):
raise ValueError(
"The " + name + "'s type must be a list or tuple of " + str(
n) + " " + str(dtype) + " . Received: " + str(
value) + " "
"including element " + str(single_value) + " of type" + " "
+ str(type(single_value)))
return value_list
...@@ -17,8 +17,8 @@ import json ...@@ -17,8 +17,8 @@ import json
import logging import logging
from collections import defaultdict from collections import defaultdict
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import paddle.fluid.proto.framework_pb2 as framework_pb2
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import paddle.fluid.proto.framework_pb2 as framework_pb2
def get_all_op_protos(): def get_all_op_protos():
......
...@@ -17,7 +17,8 @@ from collections import defaultdict ...@@ -17,7 +17,8 @@ from collections import defaultdict
import framework import framework
import layers import layers
from backward import append_backward from backward import append_backward
from framework import unique_name, program_guard from framework import program_guard
import unique_name
from initializer import Constant from initializer import Constant
from layer_helper import LayerHelper from layer_helper import LayerHelper
from regularizer import append_regularization_ops from regularizer import append_regularization_ops
...@@ -35,10 +36,18 @@ class Optimizer(object): ...@@ -35,10 +36,18 @@ class Optimizer(object):
""" """
def __init__(self, learning_rate, global_step=None, regularization=None): def __init__(self, learning_rate, global_step=None, regularization=None):
assert learning_rate is not None if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, framework.Variable):
raise TypeError("learning rate should be float or Variable")
self._global_step = global_step self._global_step = global_step
self.regularization = regularization self.regularization = regularization
self._global_learning_rate = learning_rate self._learning_rate = learning_rate
# each program should have a independent learning rate
# program -> Variable(learning_rate)
self._learning_rate_map = dict()
if isinstance(self._learning_rate, framework.Variable):
self._learning_rate_map[framework.default_main_program(
)] = self._learning_rate
# Dictionary of accumulators. Some optimizer subclasses need to # Dictionary of accumulators. Some optimizer subclasses need to
# allocate and manage extra variables associated with the parameters # allocate and manage extra variables associated with the parameters
# to train. These variables are called accumulators. # to train. These variables are called accumulators.
...@@ -47,26 +56,33 @@ class Optimizer(object): ...@@ -47,26 +56,33 @@ class Optimizer(object):
self.helper = None self.helper = None
def _create_global_learning_rate(self): def _create_global_learning_rate(self):
if isinstance(self._global_learning_rate, float): lr = self.global_learning_rate()
self._global_learning_rate = layers.create_global_var(
name=unique_name("learning_rate"), if isinstance(lr, framework.Variable):
shape=[1], return
value=float(self._global_learning_rate), else:
dtype='float32', if not isinstance(self._learning_rate, float):
persistable=True) raise TypeError(
"learning rate variable is create outside optimizer,"
if not isinstance(self._global_learning_rate, framework.Variable): "can not create new learning rate variable for new program")
raise ValueError("learning rate should be a Variable, "
"actual type is %s", # create learning rate in the current main program
type(self._global_learning_rate)) self._learning_rate_map[framework.default_main_program(
)] = layers.create_global_var(
@property name=unique_name.generate("learning_rate"),
def global_learning_rate(self): shape=[1],
value=float(self._learning_rate),
dtype='float32',
persistable=True)
def global_learning_rate(self, program=None):
""" """
get global decayed learning rate get global decayed learning rate
:return: :return:
""" """
return self._global_learning_rate if program is None:
program = framework.default_main_program()
return self._learning_rate_map.get(program, None)
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
""" append optimize operator to block and return all the added optimize_op """ append optimize operator to block and return all the added optimize_op
...@@ -77,7 +93,7 @@ class Optimizer(object): ...@@ -77,7 +93,7 @@ class Optimizer(object):
# create learning rate variable for every parameter # create learning rate variable for every parameter
param = param_and_grad[0] param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate'] param_lr = param.optimize_attr['learning_rate']
return self._global_learning_rate * param_lr return self.global_learning_rate() * param_lr
def _create_accumulators(self, block, parameters): def _create_accumulators(self, block, parameters):
"""Create all accumulators needed by the parameters """Create all accumulators needed by the parameters
...@@ -118,7 +134,7 @@ class Optimizer(object): ...@@ -118,7 +134,7 @@ class Optimizer(object):
assert isinstance(self.helper, LayerHelper) assert isinstance(self.helper, LayerHelper)
var = self.helper.create_global_variable( var = self.helper.create_global_variable(
name=unique_name(name), name=unique_name.generate(name),
persistable=True, persistable=True,
dtype=dtype or param.dtype, dtype=dtype or param.dtype,
type=param.type, type=param.type,
...@@ -379,7 +395,7 @@ class AdamOptimizer(Optimizer): ...@@ -379,7 +395,7 @@ class AdamOptimizer(Optimizer):
# Create beta1 and beta2 power tensors # Create beta1 and beta2 power tensors
beta_shape = [1] beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable( self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name('beta1_pow_acc'), name=unique_name.generate('beta1_pow_acc'),
dtype='float32', dtype='float32',
shape=beta_shape, shape=beta_shape,
lod_level=0, lod_level=0,
...@@ -388,7 +404,7 @@ class AdamOptimizer(Optimizer): ...@@ -388,7 +404,7 @@ class AdamOptimizer(Optimizer):
self._beta1_pow_acc, initializer=Constant(self._beta1)) self._beta1_pow_acc, initializer=Constant(self._beta1))
self._beta2_pow_acc = self.helper.create_global_variable( self._beta2_pow_acc = self.helper.create_global_variable(
name=unique_name('beta2_pow_acc'), name=unique_name.generate('beta2_pow_acc'),
dtype='float32', dtype='float32',
shape=beta_shape, shape=beta_shape,
lod_level=0, lod_level=0,
...@@ -481,7 +497,7 @@ class AdamaxOptimizer(Optimizer): ...@@ -481,7 +497,7 @@ class AdamaxOptimizer(Optimizer):
# Create beta1 power accumulator tensor # Create beta1 power accumulator tensor
beta_shape = [1] beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable( self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name('beta1_pow_acc'), name=unique_name.generate('beta1_pow_acc'),
dtype='float32', dtype='float32',
shape=beta_shape, shape=beta_shape,
lod_level=0, lod_level=0,
......
...@@ -97,9 +97,14 @@ def profiler(state, sorted_key=None): ...@@ -97,9 +97,14 @@ def profiler(state, sorted_key=None):
The `ave` means sorting by the average execution time. The `ave` means sorting by the average execution time.
""" """
if state not in ['CPU', 'GPU']: if state not in ['CPU', 'GPU', "All"]:
raise ValueError("The state must be 'CPU' or 'GPU'.") raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
prof_state = core.ProfilerState.kCUDA if state == "GPU" else core.ProfilerState.kCPU if state == "GPU":
prof_state = core.ProfilerState.kCUDA
elif state == "CPU":
prof_state = core.ProfilerState.kCPU
else:
prof_state = core.ProfilerState.kAll
core.enable_profiler(prof_state) core.enable_profiler(prof_state)
yield yield
......
...@@ -7,5 +7,4 @@ endforeach() ...@@ -7,5 +7,4 @@ endforeach()
add_subdirectory(unittests) add_subdirectory(unittests)
add_subdirectory(book) add_subdirectory(book)
add_subdirectory(book_distribute)
add_subdirectory(book_memory_optimization) add_subdirectory(book_memory_optimization)
...@@ -14,15 +14,15 @@ ...@@ -14,15 +14,15 @@
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import contextlib import contextlib
import math import math
import sys import sys
import unittest import unittest
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
dict_size = 30000 dict_size = 30000
source_dict_dim = target_dict_dim = dict_size source_dict_dim = target_dict_dim = dict_size
...@@ -147,7 +147,7 @@ def seq_to_seq_net(): ...@@ -147,7 +147,7 @@ def seq_to_seq_net():
label = fluid.layers.data( label = fluid.layers.data(
name='label_sequence', shape=[1], dtype='int64', lod_level=1) name='label_sequence', shape=[1], dtype='int64', lod_level=1)
cost = fluid.layers.cross_entropy(input=prediction, label=label) cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
return avg_cost, prediction return avg_cost, prediction
...@@ -228,32 +228,34 @@ def infer(use_cuda, save_dirname=None): ...@@ -228,32 +228,34 @@ def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc, inference_scope = fluid.core.Scope()
# the feed_target_names (the names of variables that will be feeded with fluid.scope_guard(inference_scope):
# data using feed operators), and the fetch_targets (variables that # Use fluid.io.load_inference_model to obtain the inference program desc,
# we want to obtain data from using fetch operators). # the feed_target_names (the names of variables that will be feeded
[inference_program, feed_target_names, # data using feed operators), and the fetch_targets (variables that
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) # we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
lod = [0, 4, 10] fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
word_data = create_random_lodtensor(lod, place, low=0, high=1)
trg_word = create_random_lodtensor(lod, place, low=0, high=1) lod = [0, 4, 10]
word_data = create_random_lodtensor(lod, place, low=0, high=1)
# Construct feed as a dictionary of {feed_target_name: feed_target_data} trg_word = create_random_lodtensor(lod, place, low=0, high=1)
# and results will contain a list of data corresponding to fetch_targets.
assert feed_target_names[0] == 'source_sequence' # Construct feed as a dictionary of {feed_target_name: feed_target_data}
assert feed_target_names[1] == 'target_sequence' # and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program, assert feed_target_names[0] == 'source_sequence'
feed={ assert feed_target_names[1] == 'target_sequence'
feed_target_names[0]: word_data, results = exe.run(inference_program,
feed_target_names[1]: trg_word, feed={
}, feed_target_names[0]: word_data,
fetch_list=fetch_targets, feed_target_names[1]: trg_word,
return_numpy=False) },
print(results[0].lod()) fetch_list=fetch_targets,
np_data = np.array(results[0]) return_numpy=False)
print("Inference shape: ", np_data.shape) print(results[0].lod())
print("Inference results: ", np_data) np_data = np.array(results[0])
print("Inference shape: ", np_data.shape)
print("Inference results: ", np_data)
def main(use_cuda): def main(use_cuda):
......
...@@ -13,15 +13,16 @@ ...@@ -13,15 +13,16 @@
# limitations under the License. # limitations under the License.
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import contextlib import contextlib
import numpy import numpy
import unittest import unittest
import math import math
import sys import sys
import os
def train(use_cuda, save_dirname): def train(use_cuda, save_dirname, is_local):
x = fluid.layers.data(name='x', shape=[13], dtype='float32') x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None) y_predict = fluid.layers.fc(input=x, size=1, act=None)
...@@ -29,10 +30,10 @@ def train(use_cuda, save_dirname): ...@@ -29,10 +30,10 @@ def train(use_cuda, save_dirname):
y = fluid.layers.data(name='y', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y) cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost) optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20 BATCH_SIZE = 20
...@@ -42,27 +43,57 @@ def train(use_cuda, save_dirname): ...@@ -42,27 +43,57 @@ def train(use_cuda, save_dirname):
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) def train_loop(main_program):
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
PASS_NUM = 100 exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
for data in train_reader(): PASS_NUM = 100
avg_loss_value, = exe.run(fluid.default_main_program(), for pass_id in range(PASS_NUM):
feed=feeder.feed(data), for data in train_reader():
fetch_list=[avg_cost]) avg_loss_value, = exe.run(main_program,
print(avg_loss_value) feed=feeder.feed(data),
if avg_loss_value[0] < 10.0: fetch_list=[avg_cost])
if save_dirname is not None: print(avg_loss_value)
fluid.io.save_inference_model(save_dirname, ['x'], if avg_loss_value[0] < 10.0:
[y_predict], exe) if save_dirname is not None:
return fluid.io.save_inference_model(save_dirname, ['x'],
if math.isnan(float(avg_loss_value)): [y_predict], exe)
sys.exit("got NaN loss, training failed.") return
raise AssertionError("Fit a line cost is too large, {0:2.2}".format( if math.isnan(float(avg_loss_value)):
avg_loss_value[0])) sys.exit("got NaN loss, training failed.")
raise AssertionError("Fit a line cost is too large, {0:2.2}".format(
avg_loss_value[0]))
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def infer(use_cuda, save_dirname=None): def infer(use_cuda, save_dirname=None):
...@@ -72,33 +103,36 @@ def infer(use_cuda, save_dirname=None): ...@@ -72,33 +103,36 @@ def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc, inference_scope = fluid.core.Scope()
# the feed_target_names (the names of variables that will be feeded with fluid.scope_guard(inference_scope):
# data using feed operators), and the fetch_targets (variables that # Use fluid.io.load_inference_model to obtain the inference program desc,
# we want to obtain data from using fetch operators). # the feed_target_names (the names of variables that will be feeded
[inference_program, feed_target_names, # data using feed operators), and the fetch_targets (variables that
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) # we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
# The input's dimension should be 2-D and the second dim is 13 fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
# The input data should be >= 0
batch_size = 10 # The input's dimension should be 2-D and the second dim is 13
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") # The input data should be >= 0
assert feed_target_names[0] == 'x' batch_size = 10
results = exe.run(inference_program, tensor_x = numpy.random.uniform(0, 10,
feed={feed_target_names[0]: tensor_x}, [batch_size, 13]).astype("float32")
fetch_list=fetch_targets) assert feed_target_names[0] == 'x'
print("infer shape: ", results[0].shape) results = exe.run(inference_program,
print("infer results: ", results[0]) feed={feed_target_names[0]: tensor_x},
fetch_list=fetch_targets)
print("infer shape: ", results[0].shape)
def main(use_cuda): print("infer results: ", results[0])
def main(use_cuda, is_local=True):
if use_cuda and not fluid.core.is_compiled_with_cuda(): if use_cuda and not fluid.core.is_compiled_with_cuda():
return return
# Directory for saving the trained model # Directory for saving the trained model
save_dirname = "fit_a_line.inference.model" save_dirname = "fit_a_line.inference.model"
train(use_cuda, save_dirname) train(use_cuda, save_dirname, is_local)
infer(use_cuda, save_dirname) infer(use_cuda, save_dirname)
......
...@@ -15,12 +15,13 @@ ...@@ -15,12 +15,13 @@
from __future__ import print_function from __future__ import print_function
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import contextlib import contextlib
import math import math
import sys import sys
import numpy import numpy
import unittest import unittest
import os
def resnet_cifar10(input, depth=32): def resnet_cifar10(input, depth=32):
...@@ -92,7 +93,7 @@ def vgg16_bn_drop(input): ...@@ -92,7 +93,7 @@ def vgg16_bn_drop(input):
return fc2 return fc2
def train(net_type, use_cuda, save_dirname): def train(net_type, use_cuda, save_dirname, is_local):
classdim = 10 classdim = 10
data_shape = [3, 32, 32] data_shape = [3, 32, 32]
...@@ -110,14 +111,14 @@ def train(net_type, use_cuda, save_dirname): ...@@ -110,14 +111,14 @@ def train(net_type, use_cuda, save_dirname):
predict = fluid.layers.fc(input=net, size=classdim, act='softmax') predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=predict, label=label) acc = fluid.layers.accuracy(input=predict, label=label)
# Test program # Test program
test_program = fluid.default_main_program().clone() test_program = fluid.default_main_program().clone()
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(avg_cost) optimize_ops, params_grads = optimizer.minimize(avg_cost)
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 1 PASS_NUM = 1
...@@ -133,38 +134,68 @@ def train(net_type, use_cuda, save_dirname): ...@@ -133,38 +134,68 @@ def train(net_type, use_cuda, save_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
exe.run(fluid.default_startup_program())
def train_loop(main_program):
loss = 0.0 exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM): loss = 0.0
for batch_id, data in enumerate(train_reader()): for pass_id in range(PASS_NUM):
exe.run(feed=feeder.feed(data)) for batch_id, data in enumerate(train_reader()):
exe.run(main_program, feed=feeder.feed(data))
if (batch_id % 10) == 0:
acc_list = [] if (batch_id % 10) == 0:
avg_loss_list = [] acc_list = []
for tid, test_data in enumerate(test_reader()): avg_loss_list = []
loss_t, acc_t = exe.run(program=test_program, for tid, test_data in enumerate(test_reader()):
feed=feeder.feed(test_data), loss_t, acc_t = exe.run(program=test_program,
fetch_list=[avg_cost, acc]) feed=feeder.feed(test_data),
if math.isnan(float(loss_t)): fetch_list=[avg_cost, acc])
sys.exit("got NaN loss, training failed.") if math.isnan(float(loss_t)):
acc_list.append(float(acc_t)) sys.exit("got NaN loss, training failed.")
avg_loss_list.append(float(loss_t)) acc_list.append(float(acc_t))
break # Use 1 segment for speeding up CI avg_loss_list.append(float(loss_t))
break # Use 1 segment for speeding up CI
acc_value = numpy.array(acc_list).mean()
avg_loss_value = numpy.array(avg_loss_list).mean() acc_value = numpy.array(acc_list).mean()
avg_loss_value = numpy.array(avg_loss_list).mean()
print(
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. print(
format(pass_id, batch_id + 1, 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
float(avg_loss_value), float(acc_value))) format(pass_id, batch_id + 1,
float(avg_loss_value), float(acc_value)))
if acc_value > 0.01: # Low threshold for speeding up CI
fluid.io.save_inference_model(save_dirname, ["pixel"], if acc_value > 0.01: # Low threshold for speeding up CI
[predict], exe) fluid.io.save_inference_model(save_dirname, ["pixel"],
return [predict], exe)
return
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def infer(use_cuda, save_dirname=None): def infer(use_cuda, save_dirname=None):
...@@ -174,32 +205,36 @@ def infer(use_cuda, save_dirname=None): ...@@ -174,32 +205,36 @@ def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc, inference_scope = fluid.core.Scope()
# the feed_target_names (the names of variables that will be feeded with fluid.scope_guard(inference_scope):
# data using feed operators), and the fetch_targets (variables that # Use fluid.io.load_inference_model to obtain the inference program desc,
# we want to obtain data from using fetch operators). # the feed_target_names (the names of variables that will be feeded
[inference_program, feed_target_names, # data using feed operators), and the fetch_targets (variables that
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) # we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
# The input's dimension of conv should be 4-D or 5-D. fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
tensor_img = numpy.random.rand(1, 3, 32, 32).astype("float32")
# The input's dimension of conv should be 4-D or 5-D.
# Construct feed as a dictionary of {feed_target_name: feed_target_data} # Use normilized image pixels as input data, which should be in the range [0, 1.0].
# and results will contain a list of data corresponding to fetch_targets. batch_size = 1
results = exe.run(inference_program, tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets) # Construct feed as a dictionary of {feed_target_name: feed_target_data}
print("infer results: ", results[0]) # and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img},
def main(net_type, use_cuda): fetch_list=fetch_targets)
print("infer results: ", results[0])
def main(net_type, use_cuda, is_local=True):
if use_cuda and not fluid.core.is_compiled_with_cuda(): if use_cuda and not fluid.core.is_compiled_with_cuda():
return return
# Directory for saving the trained model # Directory for saving the trained model
save_dirname = "image_classification_" + net_type + ".inference.model" save_dirname = "image_classification_" + net_type + ".inference.model"
train(net_type, use_cuda, save_dirname) train(net_type, use_cuda, save_dirname, is_local)
infer(use_cuda, save_dirname) infer(use_cuda, save_dirname)
......
...@@ -17,16 +17,17 @@ import math ...@@ -17,16 +17,17 @@ import math
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05 import paddle.v2.dataset.conll05 as conll05
import paddle.v2.fluid as fluid import paddle.fluid as fluid
from paddle.v2.fluid.initializer import init_on_cpu from paddle.fluid.initializer import init_on_cpu
import contextlib import contextlib
import time import time
import unittest import unittest
import os
word_dict, verb_dict, label_dict = conll05.get_dict() word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict) word_dict_len = len(word_dict)
label_dict_len = len(label_dict) label_dict_len = len(label_dict)
pred_len = len(verb_dict) pred_dict_len = len(verb_dict)
mark_dict_len = 2 mark_dict_len = 2
word_dim = 32 word_dim = 32
...@@ -53,7 +54,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, ...@@ -53,7 +54,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
# 8 features # 8 features
predicate_embedding = fluid.layers.embedding( predicate_embedding = fluid.layers.embedding(
input=predicate, input=predicate,
size=[pred_len, word_dim], size=[pred_dict_len, word_dim],
dtype='float32', dtype='float32',
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
param_attr='vemb') param_attr='vemb')
...@@ -138,7 +139,7 @@ def create_random_lodtensor(lod, place, low, high): ...@@ -138,7 +139,7 @@ def create_random_lodtensor(lod, place, low, high):
return res return res
def train(use_cuda, save_dirname=None): def train(use_cuda, save_dirname=None, is_local=True):
# define network topology # define network topology
word = fluid.layers.data( word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1) name='word_data', shape=[1], dtype='int64', lod_level=1)
...@@ -164,7 +165,7 @@ def train(use_cuda, save_dirname=None): ...@@ -164,7 +165,7 @@ def train(use_cuda, save_dirname=None):
label=target, label=target,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='crfw', learning_rate=mix_hidden_lr)) name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(x=crf_cost) avg_cost = fluid.layers.mean(crf_cost)
# TODO(qiao) # TODO(qiao)
# check other optimizers and check why out will be NAN # check other optimizers and check why out will be NAN
...@@ -178,7 +179,7 @@ def train(use_cuda, save_dirname=None): ...@@ -178,7 +179,7 @@ def train(use_cuda, save_dirname=None):
decay_rate=0.5, decay_rate=0.5,
staircase=True), staircase=True),
global_step=global_step) global_step=global_step)
sgd_optimizer.minimize(avg_cost) optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
# TODO(qiao) # TODO(qiao)
# add dependency track and move this config before optimizer # add dependency track and move this config before optimizer
...@@ -204,44 +205,78 @@ def train(use_cuda, save_dirname=None): ...@@ -204,44 +205,78 @@ def train(use_cuda, save_dirname=None):
place=place) place=place)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) def train_loop(main_program):
exe.run(fluid.default_startup_program())
embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor()
embedding_param.set( embedding_param = fluid.global_scope().find_var(
load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) embedding_name).get_tensor()
embedding_param.set(
start_time = time.time() load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
batch_id = 0 place)
for pass_id in xrange(PASS_NUM):
chunk_evaluator.reset(exe) start_time = time.time()
for data in train_data(): batch_id = 0
cost, precision, recall, f1_score = exe.run( for pass_id in xrange(PASS_NUM):
fluid.default_main_program(), chunk_evaluator.reset(exe)
feed=feeder.feed(data), for data in train_data():
fetch_list=[avg_cost] + chunk_evaluator.metrics) cost, precision, recall, f1_score = exe.run(
pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval( main_program,
exe) feed=feeder.feed(data),
fetch_list=[avg_cost] + chunk_evaluator.metrics)
if batch_id % 10 == 0: pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
print("avg_cost:" + str(cost) + " precision:" + str( exe)
precision) + " recall:" + str(recall) + " f1_score:" + str(
f1_score) + " pass_precision:" + str( if batch_id % 10 == 0:
pass_precision) + " pass_recall:" + str(pass_recall) print("avg_cost:" + str(cost) + " precision:" + str(
+ " pass_f1_score:" + str(pass_f1_score)) precision) + " recall:" + str(recall) + " f1_score:" +
if batch_id != 0: str(f1_score) + " pass_precision:" + str(
print("second per batch: " + str((time.time() - start_time) pass_precision) + " pass_recall:" + str(
/ batch_id)) pass_recall) + " pass_f1_score:" + str(
# Set the threshold low to speed up the CI test pass_f1_score))
if float(pass_precision) > 0.05: if batch_id != 0:
if save_dirname is not None: print("second per batch: " + str((time.time(
fluid.io.save_inference_model(save_dirname, [ ) - start_time) / batch_id))
'word_data', 'verb_data', 'ctx_n2_data', # Set the threshold low to speed up the CI test
'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', if float(pass_precision) > 0.05:
'ctx_p2_data', 'mark_data' if save_dirname is not None:
], [feature_out], exe) # TODO(liuyiqun): Change the target to crf_decode
return fluid.io.save_inference_model(save_dirname, [
'word_data', 'verb_data', 'ctx_n2_data',
batch_id = batch_id + 1 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
'ctx_p2_data', 'mark_data'
], [feature_out], exe)
return
batch_id = batch_id + 1
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def infer(use_cuda, save_dirname=None): def infer(use_cuda, save_dirname=None):
...@@ -251,61 +286,70 @@ def infer(use_cuda, save_dirname=None): ...@@ -251,61 +286,70 @@ def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc, inference_scope = fluid.core.Scope()
# the feed_target_names (the names of variables that will be feeded with fluid.scope_guard(inference_scope):
# data using feed operators), and the fetch_targets (variables that # Use fluid.io.load_inference_model to obtain the inference program desc,
# we want to obtain data from using fetch operators). # the feed_target_names (the names of variables that will be feeded
[inference_program, feed_target_names, # data using feed operators), and the fetch_targets (variables that
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) # we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
lod = [0, 4, 10] fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
ts_word = create_random_lodtensor(lod, place, low=0, high=1)
ts_pred = create_random_lodtensor(lod, place, low=0, high=1) lod = [0, 4, 10]
ts_ctx_n2 = create_random_lodtensor(lod, place, low=0, high=1) word = create_random_lodtensor(
ts_ctx_n1 = create_random_lodtensor(lod, place, low=0, high=1) lod, place, low=0, high=word_dict_len - 1)
ts_ctx_0 = create_random_lodtensor(lod, place, low=0, high=1) pred = create_random_lodtensor(
ts_ctx_p1 = create_random_lodtensor(lod, place, low=0, high=1) lod, place, low=0, high=pred_dict_len - 1)
ts_ctx_p2 = create_random_lodtensor(lod, place, low=0, high=1) ctx_n2 = create_random_lodtensor(
ts_mark = create_random_lodtensor(lod, place, low=0, high=1) lod, place, low=0, high=word_dict_len - 1)
ctx_n1 = create_random_lodtensor(
# Construct feed as a dictionary of {feed_target_name: feed_target_data} lod, place, low=0, high=word_dict_len - 1)
# and results will contain a list of data corresponding to fetch_targets. ctx_0 = create_random_lodtensor(
assert feed_target_names[0] == 'word_data' lod, place, low=0, high=word_dict_len - 1)
assert feed_target_names[1] == 'verb_data' ctx_p1 = create_random_lodtensor(
assert feed_target_names[2] == 'ctx_n2_data' lod, place, low=0, high=word_dict_len - 1)
assert feed_target_names[3] == 'ctx_n1_data' ctx_p2 = create_random_lodtensor(
assert feed_target_names[4] == 'ctx_0_data' lod, place, low=0, high=word_dict_len - 1)
assert feed_target_names[5] == 'ctx_p1_data' mark = create_random_lodtensor(
assert feed_target_names[6] == 'ctx_p2_data' lod, place, low=0, high=mark_dict_len - 1)
assert feed_target_names[7] == 'mark_data'
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
results = exe.run(inference_program, # and results will contain a list of data corresponding to fetch_targets.
feed={ assert feed_target_names[0] == 'word_data'
feed_target_names[0]: ts_word, assert feed_target_names[1] == 'verb_data'
feed_target_names[1]: ts_pred, assert feed_target_names[2] == 'ctx_n2_data'
feed_target_names[2]: ts_ctx_n2, assert feed_target_names[3] == 'ctx_n1_data'
feed_target_names[3]: ts_ctx_n1, assert feed_target_names[4] == 'ctx_0_data'
feed_target_names[4]: ts_ctx_0, assert feed_target_names[5] == 'ctx_p1_data'
feed_target_names[5]: ts_ctx_p1, assert feed_target_names[6] == 'ctx_p2_data'
feed_target_names[6]: ts_ctx_p2, assert feed_target_names[7] == 'mark_data'
feed_target_names[7]: ts_mark
}, results = exe.run(inference_program,
fetch_list=fetch_targets, feed={
return_numpy=False) feed_target_names[0]: word,
print(results[0].lod()) feed_target_names[1]: pred,
np_data = np.array(results[0]) feed_target_names[2]: ctx_n2,
print("Inference Shape: ", np_data.shape) feed_target_names[3]: ctx_n1,
print("Inference results: ", np_data) feed_target_names[4]: ctx_0,
feed_target_names[5]: ctx_p1,
feed_target_names[6]: ctx_p2,
def main(use_cuda): feed_target_names[7]: mark
},
fetch_list=fetch_targets,
return_numpy=False)
print(results[0].lod())
np_data = np.array(results[0])
print("Inference Shape: ", np_data.shape)
def main(use_cuda, is_local=True):
if use_cuda and not fluid.core.is_compiled_with_cuda(): if use_cuda and not fluid.core.is_compiled_with_cuda():
return return
# Directory for saving the trained model # Directory for saving the trained model
save_dirname = "label_semantic_roles.inference.model" save_dirname = "label_semantic_roles.inference.model"
train(use_cuda, save_dirname) train(use_cuda, save_dirname, is_local)
infer(use_cuda, save_dirname) infer(use_cuda, save_dirname)
......
...@@ -15,11 +15,12 @@ import contextlib ...@@ -15,11 +15,12 @@ import contextlib
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.layers as pd import paddle.fluid.layers as pd
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import unittest import unittest
import os
dict_size = 30000 dict_size = 30000
source_dict_dim = target_dict_dim = dict_size source_dict_dim = target_dict_dim = dict_size
...@@ -168,7 +169,7 @@ def to_lodtensor(data, place): ...@@ -168,7 +169,7 @@ def to_lodtensor(data, place):
return res return res
def train_main(use_cuda, is_sparse): def train_main(use_cuda, is_sparse, is_local=True):
if use_cuda and not fluid.core.is_compiled_with_cuda(): if use_cuda and not fluid.core.is_compiled_with_cuda():
return return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
...@@ -178,10 +179,10 @@ def train_main(use_cuda, is_sparse): ...@@ -178,10 +179,10 @@ def train_main(use_cuda, is_sparse):
label = pd.data( label = pd.data(
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
cost = pd.cross_entropy(input=rnn_out, label=label) cost = pd.cross_entropy(input=rnn_out, label=label)
avg_cost = pd.mean(x=cost) avg_cost = pd.mean(cost)
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimizer.minimize(avg_cost) optimize_ops, params_grads = optimizer.minimize(avg_cost)
train_data = paddle.batch( train_data = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -190,27 +191,57 @@ def train_main(use_cuda, is_sparse): ...@@ -190,27 +191,57 @@ def train_main(use_cuda, is_sparse):
exe = Executor(place) exe = Executor(place)
exe.run(framework.default_startup_program()) def train_loop(main_program):
exe.run(framework.default_startup_program())
batch_id = 0
for pass_id in xrange(1): batch_id = 0
for data in train_data(): for pass_id in xrange(1):
word_data = to_lodtensor(map(lambda x: x[0], data), place) for data in train_data():
trg_word = to_lodtensor(map(lambda x: x[1], data), place) word_data = to_lodtensor(map(lambda x: x[0], data), place)
trg_word_next = to_lodtensor(map(lambda x: x[2], data), place) trg_word = to_lodtensor(map(lambda x: x[1], data), place)
outs = exe.run(framework.default_main_program(), trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
feed={ outs = exe.run(main_program,
'src_word_id': word_data, feed={
'target_language_word': trg_word, 'src_word_id': word_data,
'target_language_next_word': trg_word_next 'target_language_word': trg_word,
}, 'target_language_next_word': trg_word_next
fetch_list=[avg_cost]) },
avg_cost_val = np.array(outs[0]) fetch_list=[avg_cost])
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + avg_cost_val = np.array(outs[0])
" avg_cost=" + str(avg_cost_val)) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
if batch_id > 3: " avg_cost=" + str(avg_cost_val))
break if batch_id > 3:
batch_id += 1 break
batch_id += 1
if is_local:
train_loop(framework.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def decode_main(use_cuda, is_sparse): def decode_main(use_cuda, is_sparse):
......
...@@ -13,34 +13,14 @@ ...@@ -13,34 +13,14 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import argparse import argparse
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle.v2 as paddle
import sys import sys
import numpy import numpy
import unittest import unittest
import math import math
import sys import sys
import os
def parse_arg():
parser = argparse.ArgumentParser()
parser.add_argument(
"nn_type",
help="The neural network type, in ['mlp', 'conv']",
type=str,
choices=['mlp', 'conv'])
parser.add_argument(
"--parallel",
help='Run in parallel or not',
default=False,
action="store_true")
parser.add_argument(
"--use_cuda",
help="Run the program by using CUDA",
default=False,
action="store_true")
return parser.parse_args()
BATCH_SIZE = 64 BATCH_SIZE = 64
...@@ -48,7 +28,7 @@ BATCH_SIZE = 64 ...@@ -48,7 +28,7 @@ BATCH_SIZE = 64
def loss_net(hidden, label): def loss_net(hidden, label):
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(x=loss) avg_loss = fluid.layers.mean(loss)
acc = fluid.layers.accuracy(input=prediction, label=label) acc = fluid.layers.accuracy(input=prediction, label=label)
return prediction, avg_loss, acc return prediction, avg_loss, acc
...@@ -78,7 +58,13 @@ def conv_net(img, label): ...@@ -78,7 +58,13 @@ def conv_net(img, label):
return loss_net(conv_pool_2, label) return loss_net(conv_pool_2, label)
def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename): def train(nn_type,
use_cuda,
parallel,
save_dirname=None,
model_filename=None,
params_filename=None,
is_local=True):
if use_cuda and not fluid.core.is_compiled_with_cuda(): if use_cuda and not fluid.core.is_compiled_with_cuda():
return return
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
...@@ -101,20 +87,19 @@ def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename): ...@@ -101,20 +87,19 @@ def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename):
avg_loss, acc = pd() avg_loss, acc = pd()
# get mean loss and acc through every devices. # get mean loss and acc through every devices.
avg_loss = fluid.layers.mean(x=avg_loss) avg_loss = fluid.layers.mean(avg_loss)
acc = fluid.layers.mean(x=acc) acc = fluid.layers.mean(acc)
else: else:
prediction, avg_loss, acc = net_conf(img, label) prediction, avg_loss, acc = net_conf(img, label)
test_program = fluid.default_main_program().clone() test_program = fluid.default_main_program().clone()
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(avg_loss) optimize_ops, params_grads = optimizer.minimize(avg_loss)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -124,88 +109,133 @@ def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename): ...@@ -124,88 +109,133 @@ def train(nn_type, use_cuda, parallel, save_dirname, save_param_filename):
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
feeder = fluid.DataFeeder(feed_list=[img, label], place=place) feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
PASS_NUM = 100 def train_loop(main_program):
for pass_id in range(PASS_NUM): exe.run(fluid.default_startup_program())
for batch_id, data in enumerate(train_reader()):
# train a mini-batch, fetch nothing PASS_NUM = 100
exe.run(feed=feeder.feed(data)) for pass_id in range(PASS_NUM):
if (batch_id + 1) % 10 == 0: for batch_id, data in enumerate(train_reader()):
acc_set = [] # train a mini-batch, fetch nothing
avg_loss_set = [] exe.run(main_program, feed=feeder.feed(data))
for test_data in test_reader(): if (batch_id + 1) % 10 == 0:
acc_np, avg_loss_np = exe.run(program=test_program, acc_set = []
feed=feeder.feed(test_data), avg_loss_set = []
fetch_list=[acc, avg_loss]) for test_data in test_reader():
acc_set.append(float(acc_np)) acc_np, avg_loss_np = exe.run(
avg_loss_set.append(float(avg_loss_np)) program=test_program,
# get test acc and loss feed=feeder.feed(test_data),
acc_val = numpy.array(acc_set).mean() fetch_list=[acc, avg_loss])
avg_loss_val = numpy.array(avg_loss_set).mean() acc_set.append(float(acc_np))
if float(acc_val) > 0.2: # Smaller value to increase CI speed avg_loss_set.append(float(avg_loss_np))
if save_dirname is not None: # get test acc and loss
fluid.io.save_inference_model( acc_val = numpy.array(acc_set).mean()
save_dirname, ["img"], [prediction], avg_loss_val = numpy.array(avg_loss_set).mean()
exe, if float(acc_val
save_file_name=save_param_filename) ) > 0.2: # Smaller value to increase CI speed
return if save_dirname is not None:
else: fluid.io.save_inference_model(
print( save_dirname, ["img"], [prediction],
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. exe,
format(pass_id, batch_id + 1, model_filename=model_filename,
float(avg_loss_val), float(acc_val))) params_filename=params_filename)
if math.isnan(float(avg_loss_val)): return
sys.exit("got NaN loss, training failed.") else:
raise AssertionError("Loss of recognize digits is too large") print(
'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.
format(pass_id, batch_id + 1,
def infer(use_cuda, save_dirname=None, param_filename=None): float(avg_loss_val), float(acc_val)))
if math.isnan(float(avg_loss_val)):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Loss of recognize digits is too large")
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
pserver_endpoints = os.getenv("PSERVERS")
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def infer(use_cuda,
save_dirname=None,
model_filename=None,
params_filename=None):
if save_dirname is None: if save_dirname is None:
return return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc, inference_scope = fluid.core.Scope()
# the feed_target_names (the names of variables that will be feeded with fluid.scope_guard(inference_scope):
# data using feed operators), and the fetch_targets (variables that # Use fluid.io.load_inference_model to obtain the inference program desc,
# we want to obtain data from using fetch operators). # the feed_target_names (the names of variables that will be feeded
[inference_program, feed_target_names, fetch_targets # data using feed operators), and the fetch_targets (variables that
] = fluid.io.load_inference_model(save_dirname, exe, param_filename) # we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
# The input's dimension of conv should be 4-D or 5-D. fetch_targets] = fluid.io.load_inference_model(
# Use normilized image pixels as input data, which should be in the range [-1.0, 1.0]. save_dirname, exe, model_filename, params_filename)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0, # The input's dimension of conv should be 4-D or 5-D.
[batch_size, 1, 28, 28]).astype("float32") # Use normilized image pixels as input data, which should be in the range [-1.0, 1.0].
batch_size = 1
# Construct feed as a dictionary of {feed_target_name: feed_target_data} tensor_img = numpy.random.uniform(
# and results will contain a list of data corresponding to fetch_targets. -1.0, 1.0, [batch_size, 1, 28, 28]).astype("float32")
results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img}, # Construct feed as a dictionary of {feed_target_name: feed_target_data}
fetch_list=fetch_targets) # and results will contain a list of data corresponding to fetch_targets.
print("infer results: ", results[0]) results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets)
print("infer results: ", results[0])
def main(use_cuda, parallel, nn_type, combine): def main(use_cuda, parallel, nn_type, combine):
save_dirname = None
model_filename = None
params_filename = None
if not use_cuda and not parallel: if not use_cuda and not parallel:
save_dirname = "recognize_digits_" + nn_type + ".inference.model" save_dirname = "recognize_digits_" + nn_type + ".inference.model"
save_filename = None
if combine == True: if combine == True:
save_filename = "__params_combined__" model_filename = "__model_combined__"
else: params_filename = "__params_combined__"
save_dirname = None
save_filename = None
# call train() with is_local argument to run distributed train
train( train(
nn_type=nn_type, nn_type=nn_type,
use_cuda=use_cuda, use_cuda=use_cuda,
parallel=parallel, parallel=parallel,
save_dirname=save_dirname, save_dirname=save_dirname,
save_param_filename=save_filename) model_filename=model_filename,
params_filename=params_filename)
infer( infer(
use_cuda=use_cuda, use_cuda=use_cuda,
save_dirname=save_dirname, save_dirname=save_dirname,
param_filename=save_filename) model_filename=model_filename,
params_filename=params_filename)
class TestRecognizeDigits(unittest.TestCase): class TestRecognizeDigits(unittest.TestCase):
......
...@@ -14,14 +14,15 @@ ...@@ -14,14 +14,15 @@
import math import math
import sys import sys
import os
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.fluid.nets as nets
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
IS_SPARSE = True IS_SPARSE = True
USE_GPU = False USE_GPU = False
...@@ -147,24 +148,23 @@ def model(): ...@@ -147,24 +148,23 @@ def model():
label = layers.data(name='score', shape=[1], dtype='float32') label = layers.data(name='score', shape=[1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label) square_cost = layers.square_error_cost(input=scale_infer, label=label)
avg_cost = layers.mean(x=square_cost) avg_cost = layers.mean(square_cost)
return scale_infer, avg_cost return scale_infer, avg_cost
def train(use_cuda, save_dirname): def train(use_cuda, save_dirname, is_local=True):
scale_infer, avg_cost = model() scale_infer, avg_cost = model()
# test program # test program
test_program = fluid.default_main_program().clone() test_program = fluid.default_main_program().clone()
sgd_optimizer = SGDOptimizer(learning_rate=0.2) sgd_optimizer = SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(avg_cost) optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(framework.default_startup_program())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -212,36 +212,69 @@ def train(use_cuda, save_dirname): ...@@ -212,36 +212,69 @@ def train(use_cuda, save_dirname):
feed_tensors[key] = tensor feed_tensors[key] = tensor
return feed_tensors return feed_tensors
PASS_NUM = 100 def train_loop(main_program):
for pass_id in range(PASS_NUM): exe.run(framework.default_startup_program())
for batch_id, data in enumerate(train_reader()):
# train a mini-batch PASS_NUM = 100
outs = exe.run(program=fluid.default_main_program(), for pass_id in range(PASS_NUM):
feed=func_feed(feeding, data), for batch_id, data in enumerate(train_reader()):
fetch_list=[avg_cost]) # train a mini-batch
out = np.array(outs[0]) outs = exe.run(program=main_program,
if (batch_id + 1) % 10 == 0: feed=func_feed(feeding, data),
avg_cost_set = [] fetch_list=[avg_cost])
for test_data in test_reader(): out = np.array(outs[0])
avg_cost_np = exe.run(program=test_program, if (batch_id + 1) % 10 == 0:
feed=func_feed(feeding, test_data), avg_cost_set = []
fetch_list=[avg_cost]) for test_data in test_reader():
avg_cost_set.append(avg_cost_np[0]) avg_cost_np = exe.run(
break # test only 1 segment for speeding up CI program=test_program,
feed=func_feed(feeding, test_data),
# get test avg_cost fetch_list=[avg_cost])
test_avg_cost = np.array(avg_cost_set).mean() avg_cost_set.append(avg_cost_np[0])
if test_avg_cost < 6.0: break # test only 1 segment for speeding up CI
# if avg_cost less than 6.0, we think our code is good.
if save_dirname is not None: # get test avg_cost
fluid.io.save_inference_model(save_dirname, [ test_avg_cost = np.array(avg_cost_set).mean()
"user_id", "gender_id", "age_id", "job_id", if test_avg_cost < 6.0:
"movie_id", "category_id", "movie_title" # if avg_cost less than 6.0, we think our code is good.
], [scale_infer], exe) if save_dirname is not None:
return fluid.io.save_inference_model(save_dirname, [
"user_id", "gender_id", "age_id", "job_id",
if math.isnan(float(out[0])): "movie_id", "category_id", "movie_title"
sys.exit("got NaN loss, training failed.") ], [scale_infer], exe)
return
if math.isnan(float(out[0])):
sys.exit("got NaN loss, training failed.")
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def infer(use_cuda, save_dirname=None): def infer(use_cuda, save_dirname=None):
...@@ -251,13 +284,6 @@ def infer(use_cuda, save_dirname=None): ...@@ -251,13 +284,6 @@ def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded
# data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
def create_lod_tensor(data, lod=None): def create_lod_tensor(data, lod=None):
tensor = fluid.LoDTensor() tensor = fluid.LoDTensor()
if lod is None: if lod is None:
...@@ -275,44 +301,53 @@ def infer(use_cuda, save_dirname=None): ...@@ -275,44 +301,53 @@ def infer(use_cuda, save_dirname=None):
tensor.set(flattened_data, place) tensor.set(flattened_data, place)
return tensor return tensor
# Use the first data from paddle.dataset.movielens.test() as input inference_scope = fluid.core.Scope()
assert feed_target_names[0] == "user_id" with fluid.scope_guard(inference_scope):
user_id = create_lod_tensor([[1]]) # Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded
assert feed_target_names[1] == "gender_id" # data using feed operators), and the fetch_targets (variables that
gender_id = create_lod_tensor([[1]]) # we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
assert feed_target_names[2] == "age_id" fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
age_id = create_lod_tensor([[0]])
# Use the first data from paddle.dataset.movielens.test() as input
assert feed_target_names[3] == "job_id" assert feed_target_names[0] == "user_id"
job_id = create_lod_tensor([[10]]) user_id = create_lod_tensor([[1]])
assert feed_target_names[4] == "movie_id" assert feed_target_names[1] == "gender_id"
movie_id = create_lod_tensor([[783]]) gender_id = create_lod_tensor([[1]])
assert feed_target_names[5] == "category_id" assert feed_target_names[2] == "age_id"
category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]]) age_id = create_lod_tensor([[0]])
assert feed_target_names[6] == "movie_title" assert feed_target_names[3] == "job_id"
movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]], job_id = create_lod_tensor([[10]])
[[0, 5]])
assert feed_target_names[4] == "movie_id"
# Construct feed as a dictionary of {feed_target_name: feed_target_data} movie_id = create_lod_tensor([[783]])
# and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program, assert feed_target_names[5] == "category_id"
feed={ category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
feed_target_names[0]: user_id,
feed_target_names[1]: gender_id, assert feed_target_names[6] == "movie_title"
feed_target_names[2]: age_id, movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
feed_target_names[3]: job_id, [[0, 5]])
feed_target_names[4]: movie_id,
feed_target_names[5]: category_id, # Construct feed as a dictionary of {feed_target_name: feed_target_data}
feed_target_names[6]: movie_title # and results will contain a list of data corresponding to fetch_targets.
}, results = exe.run(inference_program,
fetch_list=fetch_targets, feed={
return_numpy=False) feed_target_names[0]: user_id,
print("inferred score: ", np.array(results[0])) feed_target_names[1]: gender_id,
feed_target_names[2]: age_id,
feed_target_names[3]: job_id,
feed_target_names[4]: movie_id,
feed_target_names[5]: category_id,
feed_target_names[6]: movie_title
},
fetch_list=fetch_targets,
return_numpy=False)
print("inferred score: ", np.array(results[0]))
def main(use_cuda): def main(use_cuda):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,12 +14,13 @@ ...@@ -14,12 +14,13 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle.v2 as paddle
import contextlib import contextlib
import math import math
import numpy as np import numpy as np
import sys import sys
import os
def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
...@@ -42,7 +43,47 @@ def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, ...@@ -42,7 +43,47 @@ def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
size=class_dim, size=class_dim,
act="softmax") act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label) cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, accuracy, prediction
def dyn_rnn_lstm(data, label, input_dim, class_dim=2, emb_dim=32,
lstm_size=128):
emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True)
sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
rnn = fluid.layers.DynamicRNN()
with rnn.block():
word = rnn.step_input(sentence)
prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
def gate_common(ipt, hidden, size):
gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
return gate0 + gate1
forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
lstm_size))
cell = forget_gate * prev_cell + input_gate * cell_gate
hidden = output_gate * fluid.layers.tanh(x=cell)
rnn.update_memory(prev_cell, cell)
rnn.update_memory(prev_hidden, hidden)
rnn.output(hidden)
last = fluid.layers.sequence_last_step(rnn())
prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label) accuracy = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, accuracy, prediction return avg_cost, accuracy, prediction
...@@ -79,7 +120,7 @@ def stacked_lstm_net(data, ...@@ -79,7 +120,7 @@ def stacked_lstm_net(data,
size=class_dim, size=class_dim,
act='softmax') act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label) cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label) accuracy = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, accuracy, prediction return avg_cost, accuracy, prediction
...@@ -92,7 +133,12 @@ def create_random_lodtensor(lod, place, low, high): ...@@ -92,7 +133,12 @@ def create_random_lodtensor(lod, place, low, high):
return res return res
def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None): def train(word_dict,
net_method,
use_cuda,
parallel=False,
save_dirname=None,
is_local=True):
BATCH_SIZE = 128 BATCH_SIZE = 128
PASS_NUM = 5 PASS_NUM = 5
dict_dim = len(word_dict) dict_dim = len(word_dict)
...@@ -118,13 +164,13 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None): ...@@ -118,13 +164,13 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
pd.write_output(acc) pd.write_output(acc)
cost, acc = pd() cost, acc = pd()
cost = fluid.layers.mean(x=cost) cost = fluid.layers.mean(cost)
acc_out = fluid.layers.mean(x=acc) acc_out = fluid.layers.mean(acc)
prediction = None prediction = None
assert save_dirname is None assert save_dirname is None
adagrad = fluid.optimizer.Adagrad(learning_rate=0.002) adagrad = fluid.optimizer.Adagrad(learning_rate=0.002)
adagrad.minimize(cost) optimize_ops, params_grads = adagrad.minimize(cost)
train_data = paddle.batch( train_data = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -134,55 +180,88 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None): ...@@ -134,55 +180,88 @@ def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
exe = fluid.Executor(place) exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[data, label], place=place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program()) def train_loop(main_program):
exe.run(fluid.default_startup_program())
for pass_id in xrange(PASS_NUM):
for data in train_data(): for pass_id in xrange(PASS_NUM):
cost_val, acc_val = exe.run(fluid.default_main_program(), for data in train_data():
feed=feeder.feed(data), cost_val, acc_val = exe.run(main_program,
fetch_list=[cost, acc_out]) feed=feeder.feed(data),
print("cost=" + str(cost_val) + " acc=" + str(acc_val)) fetch_list=[cost, acc_out])
if cost_val < 0.4 and acc_val > 0.8: print("cost=" + str(cost_val) + " acc=" + str(acc_val))
if save_dirname is not None: if cost_val < 0.4 and acc_val > 0.8:
fluid.io.save_inference_model(save_dirname, ["words"], if save_dirname is not None:
prediction, exe) fluid.io.save_inference_model(save_dirname, ["words"],
return prediction, exe)
if math.isnan(float(cost_val)): return
sys.exit("got NaN loss, training failed.") if math.isnan(float(cost_val)):
raise AssertionError("Cost is too large for {0}".format( sys.exit("got NaN loss, training failed.")
net_method.__name__)) raise AssertionError("Cost is too large for {0}".format(
net_method.__name__))
def infer(use_cuda, save_dirname=None): if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def infer(word_dict, use_cuda, save_dirname=None):
if save_dirname is None: if save_dirname is None:
return return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc, inference_scope = fluid.core.Scope()
# the feed_target_names (the names of variables that will be feeded with fluid.scope_guard(inference_scope):
# data using feed operators), and the fetch_targets (variables that # Use fluid.io.load_inference_model to obtain the inference program desc,
# we want to obtain data from using fetch operators). # the feed_target_names (the names of variables that will be feeded
[inference_program, feed_target_names, # data using feed operators), and the fetch_targets (variables that
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) # we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
lod = [0, 4, 10] fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
word_dict = paddle.dataset.imdb.word_dict()
tensor_words = create_random_lodtensor( word_dict_len = len(word_dict)
lod, place, low=0, high=len(word_dict) - 1)
lod = [0, 4, 10]
# Construct feed as a dictionary of {feed_target_name: feed_target_data} tensor_words = create_random_lodtensor(
# and results will contain a list of data corresponding to fetch_targets. lod, place, low=0, high=word_dict_len - 1)
assert feed_target_names[0] == "words"
results = exe.run(inference_program, # Construct feed as a dictionary of {feed_target_name: feed_target_data}
feed={feed_target_names[0]: tensor_words}, # and results will contain a list of data corresponding to fetch_targets.
fetch_list=fetch_targets, assert feed_target_names[0] == "words"
return_numpy=False) results = exe.run(inference_program,
print(results[0].lod()) feed={feed_target_names[0]: tensor_words},
np_data = np.array(results[0]) fetch_list=fetch_targets,
print("Inference Shape: ", np_data.shape) return_numpy=False)
print("Inference results: ", np_data) print(results[0].lod())
np_data = np.array(results[0])
print("Inference Shape: ", np_data.shape)
print("Inference results: ", np_data)
def main(word_dict, net_method, use_cuda, parallel=False, save_dirname=None): def main(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
...@@ -218,7 +297,7 @@ class TestUnderstandSentiment(unittest.TestCase): ...@@ -218,7 +297,7 @@ class TestUnderstandSentiment(unittest.TestCase):
self.word_dict, self.word_dict,
net_method=convolution_net, net_method=convolution_net,
use_cuda=False, use_cuda=False,
save_dirname="understand_sentiment.inference.model") save_dirname="understand_sentiment_conv.inference.model")
def test_conv_cpu_parallel(self): def test_conv_cpu_parallel(self):
with self.new_program_scope(): with self.new_program_scope():
...@@ -231,7 +310,11 @@ class TestUnderstandSentiment(unittest.TestCase): ...@@ -231,7 +310,11 @@ class TestUnderstandSentiment(unittest.TestCase):
@unittest.skip(reason="make CI faster") @unittest.skip(reason="make CI faster")
def test_stacked_lstm_cpu(self): def test_stacked_lstm_cpu(self):
with self.new_program_scope(): with self.new_program_scope():
main(self.word_dict, net_method=stacked_lstm_net, use_cuda=False) main(
self.word_dict,
net_method=stacked_lstm_net,
use_cuda=False,
save_dirname="understand_sentiment_stacked_lstm.inference.model")
def test_stacked_lstm_cpu_parallel(self): def test_stacked_lstm_cpu_parallel(self):
with self.new_program_scope(): with self.new_program_scope():
...@@ -247,7 +330,7 @@ class TestUnderstandSentiment(unittest.TestCase): ...@@ -247,7 +330,7 @@ class TestUnderstandSentiment(unittest.TestCase):
self.word_dict, self.word_dict,
net_method=convolution_net, net_method=convolution_net,
use_cuda=True, use_cuda=True,
save_dirname="understand_sentiment.inference.model") save_dirname="understand_sentiment_conv.inference.model")
def test_conv_gpu_parallel(self): def test_conv_gpu_parallel(self):
with self.new_program_scope(): with self.new_program_scope():
...@@ -260,7 +343,11 @@ class TestUnderstandSentiment(unittest.TestCase): ...@@ -260,7 +343,11 @@ class TestUnderstandSentiment(unittest.TestCase):
@unittest.skip(reason="make CI faster") @unittest.skip(reason="make CI faster")
def test_stacked_lstm_gpu(self): def test_stacked_lstm_gpu(self):
with self.new_program_scope(): with self.new_program_scope():
main(self.word_dict, net_method=stacked_lstm_net, use_cuda=True) main(
self.word_dict,
net_method=stacked_lstm_net,
use_cuda=True,
save_dirname="understand_sentiment_stacked_lstm.inference.model")
def test_stacked_lstm_gpu_parallel(self): def test_stacked_lstm_gpu_parallel(self):
with self.new_program_scope(): with self.new_program_scope():
...@@ -270,6 +357,23 @@ class TestUnderstandSentiment(unittest.TestCase): ...@@ -270,6 +357,23 @@ class TestUnderstandSentiment(unittest.TestCase):
use_cuda=True, use_cuda=True,
parallel=True) parallel=True)
@unittest.skip(reason='make CI faster')
def test_dynrnn_lstm_gpu(self):
with self.new_program_scope():
main(
self.word_dict,
net_method=dyn_rnn_lstm,
use_cuda=True,
parallel=False)
def test_dynrnn_lstm_gpu_parallel(self):
with self.new_program_scope():
main(
self.word_dict,
net_method=dyn_rnn_lstm,
use_cuda=True,
parallel=True)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# # Licensed under the Apache License, Version 2.0 (the "License"); #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
...@@ -12,7 +13,7 @@ ...@@ -12,7 +13,7 @@
# limitations under the License. # limitations under the License.
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import unittest import unittest
import os import os
import numpy as np import numpy as np
...@@ -21,6 +22,7 @@ import sys ...@@ -21,6 +22,7 @@ import sys
def create_random_lodtensor(lod, place, low, high): def create_random_lodtensor(lod, place, low, high):
# The range of data elements is [low, high]
data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64") data = np.random.random_integers(low, high, [lod[-1], 1]).astype("int64")
res = fluid.LoDTensor() res = fluid.LoDTensor()
res.set(data, place) res.set(data, place)
...@@ -28,54 +30,7 @@ def create_random_lodtensor(lod, place, low, high): ...@@ -28,54 +30,7 @@ def create_random_lodtensor(lod, place, low, high):
return res return res
def infer(use_cuda, save_dirname=None): def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
if save_dirname is None:
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
# Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded
# data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict) - 1
# Setup input, by creating 4 words, and setting up lod required for
# lookup_table_op
lod = [0, 1]
first_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
second_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
third_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
fourth_word = create_random_lodtensor(lod, place, low=0, high=dict_size)
assert feed_target_names[0] == 'firstw'
assert feed_target_names[1] == 'secondw'
assert feed_target_names[2] == 'thirdw'
assert feed_target_names[3] == 'forthw'
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program,
feed={
feed_target_names[0]: first_word,
feed_target_names[1]: second_word,
feed_target_names[2]: third_word,
feed_target_names[3]: fourth_word
},
fetch_list=fetch_targets,
return_numpy=False)
print(results[0].lod())
np_data = np.array(results[0])
print("Inference Shape: ", np_data.shape)
print("Inference results: ", np_data)
def train(use_cuda, is_sparse, parallel, save_dirname):
PASS_NUM = 100 PASS_NUM = 100
EMBED_SIZE = 32 EMBED_SIZE = 32
HIDDEN_SIZE = 256 HIDDEN_SIZE = 256
...@@ -118,7 +73,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname): ...@@ -118,7 +73,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
size=dict_size, size=dict_size,
act='softmax') act='softmax')
cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) cost = fluid.layers.cross_entropy(input=predict_word, label=words[4])
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
return avg_cost, predict_word return avg_cost, predict_word
word_dict = paddle.dataset.imikolov.build_dict() word_dict = paddle.dataset.imikolov.build_dict()
...@@ -130,7 +85,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname): ...@@ -130,7 +85,7 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
if not parallel: if not is_parallel:
avg_cost, predict_word = __network__( avg_cost, predict_word = __network__(
[first_word, second_word, third_word, forth_word, next_word]) [first_word, second_word, third_word, forth_word, next_word])
else: else:
...@@ -143,10 +98,10 @@ def train(use_cuda, is_sparse, parallel, save_dirname): ...@@ -143,10 +98,10 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
])) ]))
pd.write_output(avg_cost) pd.write_output(avg_cost)
avg_cost = fluid.layers.mean(x=pd()) avg_cost = fluid.layers.mean(pd())
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost) optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
...@@ -157,30 +112,116 @@ def train(use_cuda, is_sparse, parallel, save_dirname): ...@@ -157,30 +112,116 @@ def train(use_cuda, is_sparse, parallel, save_dirname):
feed_list=[first_word, second_word, third_word, forth_word, next_word], feed_list=[first_word, second_word, third_word, forth_word, next_word],
place=place) place=place)
exe.run(fluid.default_startup_program()) def train_loop(main_program):
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
for data in train_reader():
avg_cost_np = exe.run(main_program,
feed=feeder.feed(data),
fetch_list=[avg_cost])
if avg_cost_np[0] < 5.0:
if save_dirname is not None:
fluid.io.save_inference_model(save_dirname, [
'firstw', 'secondw', 'thirdw', 'forthw'
], [predict_word], exe)
return
if math.isnan(float(avg_cost_np[0])):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
trainer_id,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint,
pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
for pass_id in range(PASS_NUM):
for data in train_reader():
avg_cost_np = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
if avg_cost_np[0] < 5.0:
if save_dirname is not None:
fluid.io.save_inference_model(save_dirname, [
'firstw', 'secondw', 'thirdw', 'forthw'
], [predict_word], exe)
return
if math.isnan(float(avg_cost_np[0])):
sys.exit("got NaN loss, training failed.")
raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0])) def infer(use_cuda, save_dirname=None):
if save_dirname is None:
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
def main(use_cuda, is_sparse, parallel): inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded
# data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators).
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
# Setup inputs, by creating 4 words, the lod of which should be [0, 1]
lod = [0, 1]
first_word = create_random_lodtensor(
lod, place, low=0, high=dict_size - 1)
second_word = create_random_lodtensor(
lod, place, low=0, high=dict_size - 1)
third_word = create_random_lodtensor(
lod, place, low=0, high=dict_size - 1)
fourth_word = create_random_lodtensor(
lod, place, low=0, high=dict_size - 1)
assert feed_target_names[0] == 'firstw'
assert feed_target_names[1] == 'secondw'
assert feed_target_names[2] == 'thirdw'
assert feed_target_names[3] == 'forthw'
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program,
feed={
feed_target_names[0]: first_word,
feed_target_names[1]: second_word,
feed_target_names[2]: third_word,
feed_target_names[3]: fourth_word
},
fetch_list=fetch_targets,
return_numpy=False)
print(results[0].lod())
np_data = np.array(results[0])
print("Inference Shape: ", np_data.shape)
def main(use_cuda, is_sparse, is_parallel):
if use_cuda and not fluid.core.is_compiled_with_cuda(): if use_cuda and not fluid.core.is_compiled_with_cuda():
return return
save_dirname = "word2vec.inference.model"
train(use_cuda, is_sparse, parallel, save_dirname) if not is_parallel:
save_dirname = "word2vec.inference.model"
else:
save_dirname = None
train(use_cuda, is_sparse, is_parallel, save_dirname)
infer(use_cuda, save_dirname) infer(use_cuda, save_dirname)
...@@ -193,10 +234,10 @@ class W2VTest(unittest.TestCase): ...@@ -193,10 +234,10 @@ class W2VTest(unittest.TestCase):
pass pass
def inject_test_method(use_cuda, is_sparse, parallel): def inject_test_method(use_cuda, is_sparse, is_parallel):
fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse" fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse"
if is_sparse else "dense", "parallel" if is_sparse else "dense", "parallel"
if parallel else "normal") if is_parallel else "normal")
def __impl__(*args, **kwargs): def __impl__(*args, **kwargs):
prog = fluid.Program() prog = fluid.Program()
...@@ -204,10 +245,12 @@ def inject_test_method(use_cuda, is_sparse, parallel): ...@@ -204,10 +245,12 @@ def inject_test_method(use_cuda, is_sparse, parallel):
scope = fluid.core.Scope() scope = fluid.core.Scope()
with fluid.scope_guard(scope): with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog): with fluid.program_guard(prog, startup_prog):
main(use_cuda=use_cuda, is_sparse=is_sparse, parallel=parallel) main(
use_cuda=use_cuda,
is_sparse=is_sparse,
is_parallel=is_parallel)
# run only 2 cases: use_cuda is either True or False if use_cuda and is_sparse:
if is_sparse == False and parallel == False:
fn = __impl__ fn = __impl__
else: else:
# skip the other test when on CI server # skip the other test when on CI server
...@@ -219,8 +262,8 @@ def inject_test_method(use_cuda, is_sparse, parallel): ...@@ -219,8 +262,8 @@ def inject_test_method(use_cuda, is_sparse, parallel):
for use_cuda in (False, True): for use_cuda in (False, True):
for is_sparse in (False, True): for is_sparse in (False, True):
for parallel in (False, True): for is_parallel in (False, True):
inject_test_method(use_cuda, is_sparse, parallel) inject_test_method(use_cuda, is_sparse, is_parallel)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import math import math
import sys import sys
...@@ -30,7 +30,7 @@ y_predict = fluid.layers.fc(input=x, size=1, act=None) ...@@ -30,7 +30,7 @@ y_predict = fluid.layers.fc(input=x, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y) cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
......
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import sys import sys
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import math import math
import sys import sys
...@@ -117,7 +117,7 @@ else: ...@@ -117,7 +117,7 @@ else:
predict = fluid.layers.fc(input=net, size=classdim, act='softmax') predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
opts = optimizer.minimize(avg_cost) opts = optimizer.minimize(avg_cost)
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import math import math
import sys import sys
...@@ -100,7 +100,7 @@ def main(): ...@@ -100,7 +100,7 @@ def main():
label = layers.data( label = layers.data(
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
cost = layers.cross_entropy(input=rnn_out, label=label) cost = layers.cross_entropy(input=rnn_out, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
......
...@@ -20,7 +20,7 @@ import matplotlib ...@@ -20,7 +20,7 @@ import matplotlib
import numpy import numpy
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
matplotlib.use('Agg') matplotlib.use('Agg')
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -96,7 +96,7 @@ def main(): ...@@ -96,7 +96,7 @@ def main():
x=D(img), x=D(img),
label=fluid.layers.data( label=fluid.layers.data(
name='label', shape=[1], dtype='float32')) name='label', shape=[1], dtype='float32'))
d_loss = fluid.layers.mean(x=d_loss) d_loss = fluid.layers.mean(d_loss)
with fluid.program_guard(dg_program, startup_program): with fluid.program_guard(dg_program, startup_program):
noise = fluid.layers.data( noise = fluid.layers.data(
...@@ -107,7 +107,7 @@ def main(): ...@@ -107,7 +107,7 @@ def main():
x=D(g_img), x=D(g_img),
label=fluid.layers.fill_constant_batch_size_like( label=fluid.layers.fill_constant_batch_size_like(
input=noise, dtype='float32', shape=[-1, 1], value=1.0)) input=noise, dtype='float32', shape=[-1, 1], value=1.0))
dg_loss = fluid.layers.mean(x=dg_loss) dg_loss = fluid.layers.mean(dg_loss)
opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE) opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE)
......
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
class TestRoutineOp(unittest.TestCase): class TestRoutineOp(unittest.TestCase):
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
class TestCSPFramework(unittest.TestCase): class TestCSPFramework(unittest.TestCase):
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
prog = fluid.framework.Program() prog = fluid.framework.Program()
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid as fluid import paddle.fluid as fluid
def test_converter(): def test_converter():
......
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
import unittest import unittest
...@@ -145,5 +145,43 @@ class TestMultiBoxHead(unittest.TestCase): ...@@ -145,5 +145,43 @@ class TestMultiBoxHead(unittest.TestCase):
return mbox_locs, mbox_confs, box, var return mbox_locs, mbox_confs, box, var
class TestDetectionMAP(unittest.TestCase):
def test_detection_map(self):
program = Program()
with program_guard(program):
detect_res = layers.data(
name='detect_res',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
label = layers.data(
name='label',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
map_out, accum_pos_count_out, accum_true_pos_out, accum_false_pos_out = layers.detection_map(
detect_res=detect_res, label=label)
self.assertIsNotNone(map_out)
self.assertIsNotNone(accum_pos_count_out)
self.assertIsNotNone(accum_true_pos_out)
self.assertIsNotNone(accum_false_pos_out)
self.assertEqual(map_out.shape, (1, ))
map_out, accum_pos_count_out2, accum_true_pos_out2, accum_false_pos_out2 = layers.detection_map(
detect_res=detect_res, label=label)
self.assertIsNotNone(map_out)
self.assertIsNotNone(accum_pos_count_out2)
self.assertIsNotNone(accum_true_pos_out2)
self.assertIsNotNone(accum_false_pos_out2)
self.assertEqual(map_out.shape, (1, ))
self.assertEqual(accum_pos_count_out.shape,
accum_pos_count_out2.shape)
self.assertEqual(accum_true_pos_out.shape,
accum_true_pos_out2.shape)
self.assertEqual(accum_false_pos_out.shape,
accum_false_pos_out2.shape)
print(str(program))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
BATCH_SIZE = 128 BATCH_SIZE = 128
CLIP_MAX = 2e-6 CLIP_MAX = 2e-6
...@@ -33,7 +33,7 @@ with fluid.program_guard(main_program=prog): ...@@ -33,7 +33,7 @@ with fluid.program_guard(main_program=prog):
label = fluid.layers.data(name='y', shape=[1], dtype='int64') label = fluid.layers.data(name='y', shape=[1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
prog_clip = prog.clone() prog_clip = prog.clone()
prog_clip.block(0).var(hidden1.name).set_error_clip( prog_clip.block(0).var(hidden1.name).set_error_clip(
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid as fluid import paddle.fluid as fluid
BATCH_SIZE = 128 BATCH_SIZE = 128
CLIP = 1 CLIP = 1
...@@ -30,7 +30,7 @@ with fluid.program_guard(main_program=prog): ...@@ -30,7 +30,7 @@ with fluid.program_guard(main_program=prog):
label = fluid.layers.data(name='y', shape=[1], dtype='int64') label = fluid.layers.data(name='y', shape=[1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
prog_clip = prog.clone() prog_clip = prog.clone()
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.framework import Program, program_guard, default_main_program, default_startup_program from paddle.fluid.framework import Program, program_guard, default_main_program, default_startup_program
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.optimizer import MomentumOptimizer from paddle.fluid.optimizer import MomentumOptimizer
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2 as paddle import paddle.v2 as paddle
import unittest import unittest
import numpy as np import numpy as np
...@@ -56,7 +56,7 @@ class TestMNISTIfElseOp(unittest.TestCase): ...@@ -56,7 +56,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
prob = layers.merge_lod_tensor( prob = layers.merge_lod_tensor(
in_true=true_out, in_false=false_out, mask=cond, x=image) in_true=true_out, in_false=false_out, mask=cond, x=image)
loss = layers.cross_entropy(input=prob, label=label) loss = layers.cross_entropy(input=prob, label=label)
avg_loss = layers.mean(x=loss) avg_loss = layers.mean(loss)
optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
optimizer.minimize(avg_loss, startup_prog) optimizer.minimize(avg_loss, startup_prog)
...@@ -113,7 +113,7 @@ class TestMNISTIfElseOp(unittest.TestCase): ...@@ -113,7 +113,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
prob = ie() prob = ie()
loss = layers.cross_entropy(input=prob[0], label=label) loss = layers.cross_entropy(input=prob[0], label=label)
avg_loss = layers.mean(x=loss) avg_loss = layers.mean(loss)
optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
optimizer.minimize(avg_loss, startup_prog) optimizer.minimize(avg_loss, startup_prog)
......
...@@ -16,9 +16,9 @@ import unittest ...@@ -16,9 +16,9 @@ import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid as fluid import paddle.fluid as fluid
class TestPythonOperatorOverride(unittest.TestCase): class TestPythonOperatorOverride(unittest.TestCase):
......
...@@ -41,6 +41,7 @@ list(REMOVE_ITEM TEST_OPS test_while_op) ...@@ -41,6 +41,7 @@ list(REMOVE_ITEM TEST_OPS test_while_op)
list(REMOVE_ITEM TEST_OPS test_lod_array_length_op) list(REMOVE_ITEM TEST_OPS test_lod_array_length_op)
list(REMOVE_ITEM TEST_OPS test_reorder_lod_tensor) list(REMOVE_ITEM TEST_OPS test_reorder_lod_tensor)
list(REMOVE_ITEM TEST_OPS test_profiler) list(REMOVE_ITEM TEST_OPS test_profiler)
list(REMOVE_ITEM TEST_OPS test_nvprof)
list(REMOVE_ITEM TEST_OPS test_normalization_wrapper) list(REMOVE_ITEM TEST_OPS test_normalization_wrapper)
list(REMOVE_ITEM TEST_OPS test_executor_and_mul) list(REMOVE_ITEM TEST_OPS test_executor_and_mul)
list(REMOVE_ITEM TEST_OPS test_assign_value_op) list(REMOVE_ITEM TEST_OPS test_assign_value_op)
...@@ -75,6 +76,7 @@ py_test_modules(test_while_op MODULES test_while_op) ...@@ -75,6 +76,7 @@ py_test_modules(test_while_op MODULES test_while_op)
py_test_modules(test_lod_array_length_op MODULES test_lod_array_length_op) py_test_modules(test_lod_array_length_op MODULES test_lod_array_length_op)
py_test_modules(test_reorder_lod_tensor MODULES test_reorder_lod_tensor) py_test_modules(test_reorder_lod_tensor MODULES test_reorder_lod_tensor)
py_test_modules(test_profiler MODULES test_profiler) py_test_modules(test_profiler MODULES test_profiler)
py_test_modules(test_nvprof MODULES test_nvprof)
py_test_modules(test_normalization_wrapper MODULES test_normalization_wrapper) py_test_modules(test_normalization_wrapper MODULES test_normalization_wrapper)
py_test_modules(test_executor_and_mul MODULES test_executor_and_mul) py_test_modules(test_executor_and_mul MODULES test_executor_and_mul)
py_test_modules(test_assign_value_op MODULES test_assign_value_op) py_test_modules(test_assign_value_op MODULES test_assign_value_op)
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid as fluid import paddle.fluid as fluid
__all__ = ['many_times', 'prog_scope'] __all__ = ['many_times', 'prog_scope']
......
...@@ -16,12 +16,12 @@ import unittest ...@@ -16,12 +16,12 @@ import unittest
import numpy as np import numpy as np
import random import random
import itertools import itertools
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import collections import collections
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.framework import Program, OpProtoHolder from paddle.fluid.framework import Program, OpProtoHolder
def randomize_probability(batch_size, class_num, dtype='float32'): def randomize_probability(batch_size, class_num, dtype='float32'):
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
from op_test import OpTest from op_test import OpTest
import math import math
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
from paddle.v2.fluid import core from paddle.fluid import core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
class TestAdamOp1(OpTest): class TestAdamOp1(OpTest):
......
...@@ -13,11 +13,11 @@ ...@@ -13,11 +13,11 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program from paddle.fluid.framework import default_main_program
import numpy import numpy
...@@ -49,15 +49,15 @@ class TestArrayReadWrite(unittest.TestCase): ...@@ -49,15 +49,15 @@ class TestArrayReadWrite(unittest.TestCase):
i = layers.increment(x=i) i = layers.increment(x=i)
a2 = layers.array_read(array=arr, i=i) a2 = layers.array_read(array=arr, i=i)
mean_a0 = layers.mean(x=a0) mean_a0 = layers.mean(a0)
mean_a1 = layers.mean(x=a1) mean_a1 = layers.mean(a1)
mean_a2 = layers.mean(x=a2) mean_a2 = layers.mean(a2)
a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2])
mean_x0 = layers.mean(x=x[0]) mean_x0 = layers.mean(x[0])
mean_x1 = layers.mean(x=x[1]) mean_x1 = layers.mean(x[1])
mean_x2 = layers.mean(x=x[2]) mean_x2 = layers.mean(x[2])
x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2])
......
...@@ -12,12 +12,12 @@ ...@@ -12,12 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import op_test import op_test
import numpy import numpy
import unittest import unittest
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
class TestAssignValueOp(op_test.OpTest): class TestAssignValueOp(op_test.OpTest):
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
from paddle.v2.fluid.framework import grad_var_name from paddle.fluid.framework import grad_var_name
def get_backward_op(scope, op, no_grad_set): def get_backward_op(scope, op, no_grad_set):
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
class TestBeamSearchDecodeOp(unittest.TestCase): class TestBeamSearchDecodeOp(unittest.TestCase):
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import logging import logging
from paddle.v2.fluid.op import Operator, DynamicRecurrentOp from paddle.fluid.op import Operator, DynamicRecurrentOp
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
...@@ -46,7 +46,20 @@ def bipartite_match(distance, match_indices, match_dist): ...@@ -46,7 +46,20 @@ def bipartite_match(distance, match_indices, match_dist):
idx += 1 idx += 1
def batch_bipartite_match(distance, lod): def argmax_match(distance, match_indices, match_dist, threshold):
r, c = distance.shape
for j in xrange(c):
if match_indices[j] != -1:
continue
col_dist = distance[:, j]
indices = np.argwhere(col_dist >= threshold).flatten()
if len(indices) < 1:
continue
match_indices[j] = indices[np.argmax(col_dist[indices])]
match_dist[j] = col_dist[match_indices[j]]
def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None):
"""Bipartite Matching algorithm for batch input. """Bipartite Matching algorithm for batch input.
Arg: Arg:
distance (numpy.array) : The distance of two entries with shape [M, N]. distance (numpy.array) : The distance of two entries with shape [M, N].
...@@ -59,6 +72,9 @@ def batch_bipartite_match(distance, lod): ...@@ -59,6 +72,9 @@ def batch_bipartite_match(distance, lod):
for i in range(len(lod) - 1): for i in range(len(lod) - 1):
bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :], bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :],
match_dist[i, :]) match_dist[i, :])
if match_type == 'per_prediction':
argmax_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :],
match_dist[i, :], dist_threshold)
return match_indices, match_dist return match_indices, match_dist
...@@ -71,8 +87,8 @@ class TestBipartiteMatchOpWithLoD(OpTest): ...@@ -71,8 +87,8 @@ class TestBipartiteMatchOpWithLoD(OpTest):
self.inputs = {'DistMat': (dist, lod)} self.inputs = {'DistMat': (dist, lod)}
self.outputs = { self.outputs = {
'ColToRowMatchIndices': (match_indices), 'ColToRowMatchIndices': match_indices,
'ColToRowMatchDist': (match_dist), 'ColToRowMatchDist': match_dist,
} }
def test_check_output(self): def test_check_output(self):
...@@ -96,5 +112,27 @@ class TestBipartiteMatchOpWithoutLoD(OpTest): ...@@ -96,5 +112,27 @@ class TestBipartiteMatchOpWithoutLoD(OpTest):
self.check_output() self.check_output()
class TestBipartiteMatchOpWithPerPredictionType(OpTest):
def setUp(self):
self.op_type = 'bipartite_match'
lod = [[0, 5, 11, 23]]
dist = np.random.random((23, 237)).astype('float32')
match_indices, match_dist = batch_bipartite_match(dist, lod[0],
'per_prediction', 0.5)
self.inputs = {'DistMat': (dist, lod)}
self.outputs = {
'ColToRowMatchIndices': match_indices,
'ColToRowMatchDist': match_dist,
}
self.attrs = {
'match_type': 'per_prediction',
'dist_threshold': 0.5,
}
def test_check_output(self):
self.check_output()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer import paddle.fluid.optimizer as optimizer
from paddle.v2.fluid.backward import calc_gradient from paddle.fluid.backward import calc_gradient
class TestCalcGradient(unittest.TestCase): class TestCalcGradient(unittest.TestCase):
...@@ -26,7 +26,7 @@ class TestCalcGradient(unittest.TestCase): ...@@ -26,7 +26,7 @@ class TestCalcGradient(unittest.TestCase):
x = layers.create_parameter(dtype="float32", shape=[5, 10]) x = layers.create_parameter(dtype="float32", shape=[5, 10])
y = layers.create_parameter(dtype="float32", shape=[10, 8]) y = layers.create_parameter(dtype="float32", shape=[10, 8])
mul_out = layers.mul(x=x, y=y) mul_out = layers.mul(x=x, y=y)
mean_out = layers.mean(x=mul_out) mean_out = layers.mean(mul_out)
a = calc_gradient(mean_out, mul_out) a = calc_gradient(mean_out, mul_out)
b = calc_gradient(mean_out, x) b = calc_gradient(mean_out, x)
place = fluid.CPUPlace() place = fluid.CPUPlace()
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import op_test import op_test
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class TestCastOp(op_test.OpTest): class TestCastOp(op_test.OpTest):
......
...@@ -38,7 +38,10 @@ def create_test_class(op_type, typename, callback): ...@@ -38,7 +38,10 @@ def create_test_class(op_type, typename, callback):
for _type_name in {'float32', 'float64', 'int32', 'int64'}: for _type_name in {'float32', 'float64', 'int32', 'int64'}:
create_test_class('less_than', _type_name, lambda _a, _b: _a < _b) create_test_class('less_than', _type_name, lambda _a, _b: _a < _b)
create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b) create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b)
create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b)
create_test_class('greater_equal', _type_name, lambda _a, _b: _a >= _b)
create_test_class('equal', _type_name, lambda _a, _b: _a == _b) create_test_class('equal', _type_name, lambda _a, _b: _a == _b)
create_test_class('not_equal', _type_name, lambda _a, _b: _a != _b)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
# limitations under the License. # limitations under the License.
import logging import logging
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
from paddle.v2.fluid.op import Operator, CondOp from paddle.fluid.op import Operator, CondOp
class PySimpleCond(object): class PySimpleCond(object):
......
...@@ -13,11 +13,11 @@ ...@@ -13,11 +13,11 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.framework import default_startup_program, default_main_program from paddle.fluid.framework import default_startup_program, default_main_program
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
import numpy import numpy
...@@ -39,7 +39,7 @@ class ConditionalBlock(unittest.TestCase): ...@@ -39,7 +39,7 @@ class ConditionalBlock(unittest.TestCase):
outs = exe.run(feed={'X': x}, fetch_list=[out])[0] outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
print outs print outs
loss = layers.mean(x=out) loss = layers.mean(out)
append_backward(loss=loss) append_backward(loss=loss)
outs = exe.run( outs = exe.run(
feed={'X': x}, feed={'X': x},
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
class ConditionalBlock(unittest.TestCase): class ConditionalBlock(unittest.TestCase):
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
...@@ -210,6 +210,19 @@ class TestWithDilation(TestConv2dOp): ...@@ -210,6 +210,19 @@ class TestWithDilation(TestConv2dOp):
self.groups = 3 self.groups = 3
class TestWithInput1x1Filter1x1(TestConv2dOp):
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] / self.groups
self.filter_size = [6, f_c, 1, 1]
def init_group(self):
self.groups = 3
#----------------Conv2dCUDNN---------------- #----------------Conv2dCUDNN----------------
class TestCUDNN(TestConv2dOp): class TestCUDNN(TestConv2dOp):
def init_op_type(self): def init_op_type(self):
...@@ -241,6 +254,12 @@ class TestCUDNNWith1x1(TestWith1x1): ...@@ -241,6 +254,12 @@ class TestCUDNNWith1x1(TestWith1x1):
self.op_type = "conv2d" self.op_type = "conv2d"
class TestCUDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
def init_op_type(self):
self.use_cudnn = True
self.op_type = "conv2d"
class TestDepthwiseConv(TestConv2dOp): class TestDepthwiseConv(TestConv2dOp):
def init_test_case(self): def init_test_case(self):
self.pad = [1, 1] self.pad = [1, 1]
...@@ -265,7 +284,8 @@ class TestDepthwiseConv2(TestConv2dOp): ...@@ -265,7 +284,8 @@ class TestDepthwiseConv2(TestConv2dOp):
self.op_type = "depthwise_conv2d" self.op_type = "depthwise_conv2d"
# cudnn v5 does not support dilation conv. # Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation): # class TestCUDNNWithDilation(TestWithDilation):
# def init_op_type(self): # def init_op_type(self):
# self.op_type = "conv_cudnn" # self.op_type = "conv_cudnn"
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
...@@ -200,7 +200,8 @@ class TestCUDNNWithStride(TestWithStride): ...@@ -200,7 +200,8 @@ class TestCUDNNWithStride(TestWithStride):
self.op_type = "conv2d_transpose" self.op_type = "conv2d_transpose"
# #cudnn v5 does not support dilation conv. # Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation): # class TestCUDNNWithDilation(TestWithDilation):
# def init_test_case(self): # def init_test_case(self):
# self.pad = [1, 1] # self.pad = [1, 1]
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
...@@ -200,6 +200,22 @@ class TestWith1x1(TestConv3dOp): ...@@ -200,6 +200,22 @@ class TestWith1x1(TestConv3dOp):
self.groups = 3 self.groups = 3
class TestWithInput1x1Filter1x1(TestConv3dOp):
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.input_size = [2, 3, 1, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] / self.groups
self.filter_size = [6, f_c, 1, 1, 1]
def init_dilation(self):
self.dilations = [1, 1, 1]
def init_group(self):
self.groups = 3
class TestWithDilation(TestConv3dOp): class TestWithDilation(TestConv3dOp):
def init_test_case(self): def init_test_case(self):
self.pad = [0, 0, 0] self.pad = [0, 0, 0]
...@@ -240,6 +256,12 @@ class TestWith1x1CUDNN(TestWith1x1): ...@@ -240,6 +256,12 @@ class TestWith1x1CUDNN(TestWith1x1):
self.op_type = "conv3d" self.op_type = "conv3d"
class TestWithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
def init_op_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
# FIXME(typhoonzero): find a way to determine if # FIXME(typhoonzero): find a way to determine if
# using cudnn > 6 in python # using cudnn > 6 in python
# class TestWithDilationCUDNN(TestWithDilation): # class TestWithDilationCUDNN(TestWithDilation):
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
...@@ -207,7 +207,8 @@ class TestCUDNNWithStride(TestWithStride): ...@@ -207,7 +207,8 @@ class TestCUDNNWithStride(TestWithStride):
self.op_type = "conv3d_transpose" self.op_type = "conv3d_transpose"
# #cudnn v5 does not support dilation conv. # Please Don't remove the following code.
# Currently, CI use cudnn V5.0 which not support dilation conv.
# class TestCUDNNWithDilation(TestWithDilation): # class TestCUDNNWithDilation(TestWithDilation):
# def init_test_case(self): # def init_test_case(self):
# self.pad = [1, 1, 1] # self.pad = [1, 1, 1]
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
class TestDocString(unittest.TestCase): class TestDocString(unittest.TestCase):
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.v2.fluid.default_scope_funcs import * from paddle.fluid.default_scope_funcs import *
import unittest import unittest
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2 as paddle import paddle.v2 as paddle
import unittest import unittest
import numpy import numpy
...@@ -81,7 +81,7 @@ class TestDynRNN(unittest.TestCase): ...@@ -81,7 +81,7 @@ class TestDynRNN(unittest.TestCase):
logits = fluid.layers.fc(input=last, size=1, act=None) logits = fluid.layers.fc(input=last, size=1, act=None)
loss = fluid.layers.sigmoid_cross_entropy_with_logits( loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label) x=logits, label=label)
loss = fluid.layers.mean(x=loss) loss = fluid.layers.mean(loss)
sgd = fluid.optimizer.SGD(1e-4) sgd = fluid.optimizer.SGD(1e-4)
sgd.minimize(loss=loss) sgd.minimize(loss=loss)
cpu = fluid.CPUPlace() cpu = fluid.CPUPlace()
...@@ -119,7 +119,7 @@ class TestDynRNN(unittest.TestCase): ...@@ -119,7 +119,7 @@ class TestDynRNN(unittest.TestCase):
label = fluid.layers.data(name='label', shape=[1], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='float32')
loss = fluid.layers.sigmoid_cross_entropy_with_logits( loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label) x=logits, label=label)
loss = fluid.layers.mean(x=loss) loss = fluid.layers.mean(loss)
sgd = fluid.optimizer.Adam(1e-3) sgd = fluid.optimizer.Adam(1e-3)
sgd.minimize(loss=loss) sgd.minimize(loss=loss)
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import numpy import numpy
import random import random
import collections import collections
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import unittest import unittest
from decorators import * from decorators import *
...@@ -272,7 +272,7 @@ class TestSimpleMul(SeedFixedTestCase): ...@@ -272,7 +272,7 @@ class TestSimpleMul(SeedFixedTestCase):
out = rnn() out = rnn()
out = fluid.layers.sequence_pool(out, pool_type='last') out = fluid.layers.sequence_pool(out, pool_type='last')
loss = fluid.layers.mean(x=out) loss = fluid.layers.mean(out)
fluid.backward.append_backward(loss) fluid.backward.append_backward(loss)
cpu = fluid.CPUPlace() cpu = fluid.CPUPlace()
...@@ -348,7 +348,7 @@ class TestSimpleMulWithMemory(SeedFixedTestCase): ...@@ -348,7 +348,7 @@ class TestSimpleMulWithMemory(SeedFixedTestCase):
out = rnn() out = rnn()
last = fluid.layers.sequence_pool(input=out, pool_type='last') last = fluid.layers.sequence_pool(input=out, pool_type='last')
loss = fluid.layers.mean(x=last) loss = fluid.layers.mean(last)
fluid.backward.append_backward(loss) fluid.backward.append_backward(loss)
cpu = fluid.CPUPlace() cpu = fluid.CPUPlace()
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
import unittest import unittest
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid as fluid import paddle.fluid as fluid
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.v2.fluid.framework import Program, switch_main_program from paddle.fluid.framework import Program, switch_main_program
import bisect import bisect
import numpy as np import numpy as np
...@@ -125,7 +125,7 @@ class TestDyRnnStaticInput(unittest.TestCase): ...@@ -125,7 +125,7 @@ class TestDyRnnStaticInput(unittest.TestCase):
return static_input_step_outs return static_input_step_outs
last = fluid.layers.sequence_pool(input=rnn(), pool_type='last') last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
loss = fluid.layers.mean(x=last) loss = fluid.layers.mean(last)
append_backward(loss) append_backward(loss)
static_input_grad = self._program.global_block().var( static_input_grad = self._program.global_block().var(
framework.grad_var_name('static_input_tensor')) framework.grad_var_name('static_input_tensor'))
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import unittest import unittest
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
import unittest import unittest
import numpy import numpy
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.layers import mul, data from paddle.fluid.layers import mul, data
class TestExecutor(unittest.TestCase): class TestExecutor(unittest.TestCase):
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import op_test import op_test
import numpy import numpy
import unittest import unittest
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class TestFillOp(OpTest): class TestFillOp(OpTest):
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
from paddle.v2.fluid.framework import Program from paddle.fluid.framework import Program
class TestDebugStringFramework(unittest.TestCase): class TestDebugStringFramework(unittest.TestCase):
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
import unittest import unittest
import numpy import numpy
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
class TestGaussianRandomOp(unittest.TestCase): class TestGaussianRandomOp(unittest.TestCase):
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import decorators import decorators
import unittest import unittest
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.nets as nets import paddle.fluid.nets as nets
from paddle.v2.fluid.framework import Program from paddle.fluid.framework import Program
def conv_block(input, num_filter, groups, dropouts): def conv_block(input, num_filter, groups, dropouts):
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class TestInferShape(unittest.TestCase): class TestInferShape(unittest.TestCase):
......
...@@ -15,13 +15,13 @@ ...@@ -15,13 +15,13 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.executor as executor import paddle.fluid.executor as executor
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.optimizer as optimizer import paddle.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
from paddle.v2.fluid.io import save_inference_model, load_inference_model from paddle.fluid.io import save_inference_model, load_inference_model
class TestBook(unittest.TestCase): class TestBook(unittest.TestCase):
...@@ -38,7 +38,7 @@ class TestBook(unittest.TestCase): ...@@ -38,7 +38,7 @@ class TestBook(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None) y_predict = layers.fc(input=x, size=1, act=None)
cost = layers.square_error_cost(input=y_predict, label=y) cost = layers.square_error_cost(input=y_predict, label=y)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost, init_program) sgd_optimizer.minimize(avg_cost, init_program)
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
import numpy as np import numpy as np
import unittest import unittest
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.initializer as initializer import paddle.fluid.initializer as initializer
DELTA = 0.00001 DELTA = 0.00001
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import unittest import unittest
import numpy as np import numpy as np
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
import paddle.v2.fluid.core as core import paddle.fluid.core as core
def create_tensor(scope, name, np_data): def create_tensor(scope, name, np_data):
......
...@@ -16,9 +16,9 @@ import numpy as np ...@@ -16,9 +16,9 @@ import numpy as np
from operator import mul from operator import mul
from op_test import OpTest from op_test import OpTest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
from paddle.v2.fluid.framework import grad_var_name from paddle.fluid.framework import grad_var_name
np.random.random(123) np.random.random(123)
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.fluid.nets as nets
from paddle.v2.fluid.framework import Program, program_guard, default_main_program from paddle.fluid.framework import Program, program_guard, default_main_program
from paddle.v2.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
import decorators import decorators
...@@ -30,7 +30,7 @@ class TestBook(unittest.TestCase): ...@@ -30,7 +30,7 @@ class TestBook(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None) y_predict = layers.fc(input=x, size=1, act=None)
y = layers.data(name='y', shape=[1], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32')
cost = layers.square_error_cost(input=y_predict, label=y) cost = layers.square_error_cost(input=y_predict, label=y)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(cost)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
program.append_backward(avg_cost) program.append_backward(avg_cost)
...@@ -49,7 +49,7 @@ class TestBook(unittest.TestCase): ...@@ -49,7 +49,7 @@ class TestBook(unittest.TestCase):
act='softmax', act='softmax',
param_attr=["sftmax.w1", "sftmax.w2"]) param_attr=["sftmax.w1", "sftmax.w2"])
cost = layers.cross_entropy(input=predict, label=label) cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(cost)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
print(str(program)) print(str(program))
...@@ -92,7 +92,7 @@ class TestBook(unittest.TestCase): ...@@ -92,7 +92,7 @@ class TestBook(unittest.TestCase):
predict = layers.fc(input=conv_pool_2, size=10, act="softmax") predict = layers.fc(input=conv_pool_2, size=10, act="softmax")
cost = layers.cross_entropy(input=predict, label=label) cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(cost)
program.append_backward(avg_cost) program.append_backward(avg_cost)
...@@ -140,7 +140,7 @@ class TestBook(unittest.TestCase): ...@@ -140,7 +140,7 @@ class TestBook(unittest.TestCase):
size=dict_size, size=dict_size,
act='softmax') act='softmax')
cost = layers.cross_entropy(input=predict_word, label=next_word) cost = layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(cost)
self.assertIsNotNone(avg_cost) self.assertIsNotNone(avg_cost)
print(str(program)) print(str(program))
...@@ -287,7 +287,7 @@ class TestBook(unittest.TestCase): ...@@ -287,7 +287,7 @@ class TestBook(unittest.TestCase):
num_total_classes=dict_size, num_total_classes=dict_size,
param_attr='nce.w', param_attr='nce.w',
bias_attr='nce.b') bias_attr='nce.b')
avg_loss = layers.mean(x=loss) avg_loss = layers.mean(loss)
self.assertIsNotNone(avg_loss) self.assertIsNotNone(avg_loss)
print(str(default_main_program())) print(str(default_main_program()))
......
...@@ -17,10 +17,10 @@ import unittest ...@@ -17,10 +17,10 @@ import unittest
import math import math
import copy import copy
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.learning_rate_decay as lr_decay import paddle.fluid.learning_rate_decay as lr_decay
def exponential_decay(learning_rate, def exponential_decay(learning_rate,
......
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy import numpy
......
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.v2.fluid.layers import lod_rank_table, data from paddle.fluid.layers import lod_rank_table, data
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy import numpy
import unittest import unittest
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy import numpy
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy import numpy
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
...@@ -182,7 +182,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): ...@@ -182,7 +182,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
array = layers.lod_tensor_to_array(x, table) array = layers.lod_tensor_to_array(x, table)
result = layers.array_to_lod_tensor(array, table) result = layers.array_to_lod_tensor(array, table)
mean = layers.mean(x=result) mean = layers.mean(result)
append_backward(mean) append_backward(mean)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import decorators import decorators
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import numpy import numpy
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.optimizer as optimizer import paddle.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
from paddle.v2.fluid.memory_optimization_transpiler import memory_optimize from paddle.fluid.memory_optimization_transpiler import memory_optimize
class TestControlFlowGraph(unittest.TestCase): class TestControlFlowGraph(unittest.TestCase):
...@@ -29,7 +29,7 @@ class TestControlFlowGraph(unittest.TestCase): ...@@ -29,7 +29,7 @@ class TestControlFlowGraph(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None) y_predict = layers.fc(input=x, size=1, act=None)
y = layers.data(name='y', shape=[1], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32')
cost = layers.square_error_cost(input=y_predict, label=y) cost = layers.square_error_cost(input=y_predict, label=y)
avg_cost = layers.mean(x=cost) avg_cost = layers.mean(cost)
opt = optimizer.SGD(learning_rate=0.001) opt = optimizer.SGD(learning_rate=0.001)
opt = opt.minimize(avg_cost) opt = opt.minimize(avg_cost)
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
import unittest import unittest
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import os
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.layers as layers
import paddle.fluid.core as core
class TestNVProf(unittest.TestCase):
def test_nvprof(self):
if not fluid.core.is_compiled_with_cuda():
return
epoc = 8
dshape = [4, 3, 28, 28]
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output_file = 'cuda_profiler.txt'
with profiler.cuda_profiler(output_file, 'csv') as nvprof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
os.remove(output_file)
if __name__ == '__main__':
unittest.main()
...@@ -16,10 +16,10 @@ import unittest ...@@ -16,10 +16,10 @@ import unittest
import numpy as np import numpy as np
import math import math
from op_test import OpTest from op_test import OpTest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.v2.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
class TestOneHotOp(OpTest): class TestOneHotOp(OpTest):
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class TestOpSupportGPU(unittest.TestCase): class TestOpSupportGPU(unittest.TestCase):
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import unittest import unittest
import paddle.v2.fluid.op as op import paddle.fluid.op as op
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import paddle.fluid.proto.framework_pb2 as framework_pb2
class TestGetAllProtos(unittest.TestCase): class TestGetAllProtos(unittest.TestCase):
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.framework import Program, default_startup_program from paddle.fluid.framework import Program, default_startup_program
main_program = default_startup_program() main_program = default_startup_program()
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
import unittest import unittest
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer import paddle.fluid.optimizer as optimizer
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
class TestOptimizer(unittest.TestCase): class TestOptimizer(unittest.TestCase):
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import numpy import numpy
...@@ -127,7 +127,7 @@ class BaseParallelForTest(unittest.TestCase): ...@@ -127,7 +127,7 @@ class BaseParallelForTest(unittest.TestCase):
data = next(generator) data = next(generator)
loss = generator.send(data) loss = generator.send(data)
self.assertIsNotNone(loss) self.assertIsNotNone(loss)
avg_loss = fluid.layers.mean(x=loss) avg_loss = fluid.layers.mean(loss)
fluid.backward.append_backward(loss=avg_loss) fluid.backward.append_backward(loss=avg_loss)
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -170,7 +170,7 @@ class ParallelOpTest(BaseParallelForTest): ...@@ -170,7 +170,7 @@ class ParallelOpTest(BaseParallelForTest):
x = fluid.layers.data(shape=[784], dtype='float32', name='img') x = fluid.layers.data(shape=[784], dtype='float32', name='img')
x = yield x x = yield x
hidden = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') hidden = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
loss = fluid.layers.mean(x=hidden) loss = fluid.layers.mean(hidden)
yield loss yield loss
def test_simple_fc(self): def test_simple_fc(self):
...@@ -200,7 +200,7 @@ class ParallelOpTestMultipleInput(BaseParallelForTest): ...@@ -200,7 +200,7 @@ class ParallelOpTestMultipleInput(BaseParallelForTest):
hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
hidden2 = fluid.layers.fc(input=hidden1, size=200, param_attr='fc2.w') hidden2 = fluid.layers.fc(input=hidden1, size=200, param_attr='fc2.w')
hidden3 = fluid.layers.fc(input=hidden2, size=200, param_attr='fc3.w') hidden3 = fluid.layers.fc(input=hidden2, size=200, param_attr='fc3.w')
loss = fluid.layers.mean(x=hidden3) loss = fluid.layers.mean(hidden3)
yield loss yield loss
def test_simple_fc(self): def test_simple_fc(self):
......
...@@ -13,11 +13,11 @@ ...@@ -13,11 +13,11 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
from paddle.v2.fluid.framework import default_main_program from paddle.fluid.framework import default_main_program
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import paddle.v2.fluid.io as io import paddle.fluid.io as io
from paddle.v2.fluid.initializer import ConstantInitializer from paddle.fluid.initializer import ConstantInitializer
import numpy as np import numpy as np
main_program = default_main_program() main_program = default_main_program()
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
from paddle.v2.fluid.framework import switch_main_program from paddle.fluid.framework import switch_main_program
from paddle.v2.fluid.framework import Program from paddle.fluid.framework import Program
import numpy as np import numpy as np
...@@ -35,7 +35,7 @@ class TestPrintOpCPU(unittest.TestCase): ...@@ -35,7 +35,7 @@ class TestPrintOpCPU(unittest.TestCase):
x.stop_gradient = False x.stop_gradient = False
printed = layers.Print(input=x, **kargs) printed = layers.Print(input=x, **kargs)
if only_forward: return printed if only_forward: return printed
loss = layers.mean(x=printed) loss = layers.mean(printed)
append_backward(loss=loss) append_backward(loss=loss)
return loss return loss
......
...@@ -15,34 +15,16 @@ ...@@ -15,34 +15,16 @@
import unittest import unittest
import os import os
import numpy as np import numpy as np
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.profiler as profiler import paddle.fluid.profiler as profiler
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class TestProfiler(unittest.TestCase): class TestProfiler(unittest.TestCase):
def test_nvprof(self):
if not fluid.core.is_compiled_with_cuda():
return
epoc = 8
dshape = [4, 3, 28, 28]
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output_file = 'cuda_profiler.txt'
with profiler.cuda_profiler(output_file, 'csv') as nvprof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
os.remove(output_file)
def net_profiler(self, state): def net_profiler(self, state):
if state == 'GPU' and not core.is_compiled_with_cuda(): enable_if_gpu = state == 'GPU' or state == "All"
if enable_if_gpu and not core.is_compiled_with_cuda():
return return
startup_program = fluid.Program() startup_program = fluid.Program()
main_program = fluid.Program() main_program = fluid.Program()
...@@ -54,7 +36,7 @@ class TestProfiler(unittest.TestCase): ...@@ -54,7 +36,7 @@ class TestProfiler(unittest.TestCase):
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
label = fluid.layers.data(name='y', shape=[1], dtype='int64') label = fluid.layers.data(name='y', shape=[1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label) accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
...@@ -85,6 +67,9 @@ class TestProfiler(unittest.TestCase): ...@@ -85,6 +67,9 @@ class TestProfiler(unittest.TestCase):
def test_cuda_profiler(self): def test_cuda_profiler(self):
self.net_profiler('GPU') self.net_profiler('GPU')
def test_all_profiler(self):
self.net_profiler('All')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
from paddle.v2.fluid.framework import Program, default_main_program, program_guard, grad_var_name from paddle.fluid.framework import Program, default_main_program, program_guard, grad_var_name
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
main_program = default_main_program() main_program = default_main_program()
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import paddle.fluid.proto.framework_pb2 as framework_pb2
import unittest import unittest
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class TestOpDesc(unittest.TestCase): class TestOpDesc(unittest.TestCase):
......
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.framework import Program, grad_var_name from paddle.fluid.framework import Program, grad_var_name
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class PyRNNBase(object): class PyRNNBase(object):
...@@ -127,7 +127,7 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -127,7 +127,7 @@ class RecurrentOpTest1(unittest.TestCase):
self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) self.output = layers.mean(self.create_rnn_op(), **self.p_info)
def create_rnn_op(self): def create_rnn_op(self):
x = layers.data( x = layers.data(
...@@ -261,7 +261,7 @@ class RecurrentOpTest2(RecurrentOpTest1): ...@@ -261,7 +261,7 @@ class RecurrentOpTest2(RecurrentOpTest1):
self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape) self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) self.output = layers.mean(self.create_rnn_op(), **self.p_info)
def create_rnn_op(self): def create_rnn_op(self):
x = layers.data( x = layers.data(
...@@ -360,7 +360,7 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): ...@@ -360,7 +360,7 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1):
self.py_rnn = RecurrentOpMultipleMemoryTest.PySimpleRNN3( self.py_rnn = RecurrentOpMultipleMemoryTest.PySimpleRNN3(
self.input_shape, self.output_shape) self.input_shape, self.output_shape)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) self.output = layers.mean(self.create_rnn_op(), **self.p_info)
def create_rnn_op(self): def create_rnn_op(self):
x = layers.data( x = layers.data(
...@@ -444,7 +444,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): ...@@ -444,7 +444,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1):
self.output_shape = (self.sent_len, self.batch_size, self.input_dim) self.output_shape = (self.sent_len, self.batch_size, self.input_dim)
self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(self.input_shape, self.py_rnn = RecurrentOpNoMemBootTest.PySimpleRNN4(self.input_shape,
self.output_shape) self.output_shape)
self.output = layers.mean(x=self.create_rnn_op(), **self.p_info) self.output = layers.mean(self.create_rnn_op(), **self.p_info)
print self.main_program print self.main_program
def create_rnn_op(self): def create_rnn_op(self):
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import numpy import numpy
from multiprocessing import Process from multiprocessing import Process
import os, sys import os, sys
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import decorators import decorators
...@@ -22,7 +22,7 @@ class TestRegistry(unittest.TestCase): ...@@ -22,7 +22,7 @@ class TestRegistry(unittest.TestCase):
@decorators.prog_scope() @decorators.prog_scope()
def test_registry_layer(self): def test_registry_layer(self):
x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32') x = fluid.layers.data(name='X', shape=[10, 10], dtype='float32')
output = fluid.layers.mean(x=x) output = fluid.layers.mean(x)
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
......
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
import unittest import unittest
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer import paddle.fluid.optimizer as optimizer
import paddle.v2.fluid.regularizer as regularizer import paddle.fluid.regularizer as regularizer
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
class TestL2DecayRegularizer(unittest.TestCase): class TestL2DecayRegularizer(unittest.TestCase):
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy import numpy
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
import unittest import unittest
from paddle.v2.fluid.framework import Program from paddle.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
class RNNMemoryHelperOpTest(unittest.TestCase): class RNNMemoryHelperOpTest(unittest.TestCase):
......
...@@ -12,25 +12,25 @@ ...@@ -12,25 +12,25 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.core import paddle.fluid.core
import unittest import unittest
class TestScope(unittest.TestCase): class TestScope(unittest.TestCase):
def test_create_destroy(self): def test_create_destroy(self):
paddle_c = paddle.v2.fluid.core paddle_c = paddle.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
self.assertIsNotNone(scope) self.assertIsNotNone(scope)
scope_with_parent = scope.new_scope() scope_with_parent = scope.new_scope()
self.assertIsNotNone(scope_with_parent) self.assertIsNotNone(scope_with_parent)
def test_none_variable(self): def test_none_variable(self):
paddle_c = paddle.v2.fluid.core paddle_c = paddle.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
self.assertIsNone(scope.find_var("test")) self.assertIsNone(scope.find_var("test"))
def test_create_var_get_var(self): def test_create_var_get_var(self):
paddle_c = paddle.v2.fluid.core paddle_c = paddle.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var_a = scope.var("var_a") var_a = scope.var("var_a")
self.assertIsNotNone(var_a) self.assertIsNotNone(var_a)
...@@ -39,7 +39,7 @@ class TestScope(unittest.TestCase): ...@@ -39,7 +39,7 @@ class TestScope(unittest.TestCase):
self.assertIsNotNone(scope2.find_var('var_a')) self.assertIsNotNone(scope2.find_var('var_a'))
def test_var_get_int(self): def test_var_get_int(self):
paddle_c = paddle.v2.fluid.core paddle_c = paddle.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var = scope.var("test_int") var = scope.var("test_int")
var.set_int(10) var.set_int(10)
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
from op_test import OpTest from op_test import OpTest
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program, switch_main_program from paddle.fluid.framework import default_main_program, switch_main_program
from paddle.v2.fluid.framework import Program from paddle.fluid.framework import Program
import numpy as np import numpy as np
...@@ -39,7 +39,7 @@ class TestShrinkRNNMemoryBase(unittest.TestCase): ...@@ -39,7 +39,7 @@ class TestShrinkRNNMemoryBase(unittest.TestCase):
i = layers.increment(x=i) i = layers.increment(x=i)
i.stop_gradient = True i.stop_gradient = True
self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table) self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table)
mem3_mean = layers.mean(x=self.mem3) mem3_mean = layers.mean(self.mem3)
append_backward(loss=mem3_mean) append_backward(loss=mem3_mean)
self.x_grad = self.main_program.global_block().var('x@GRAD') self.x_grad = self.main_program.global_block().var('x@GRAD')
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.framework import Program, program_guard from paddle.fluid.framework import Program, program_guard
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
...@@ -145,7 +145,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): ...@@ -145,7 +145,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
input=x, mask=y, level=level) input=x, mask=y, level=level)
out = layers.merge_lod_tensor( out = layers.merge_lod_tensor(
in_true=out_true, in_false=out_false, mask=y, x=x, level=level) in_true=out_true, in_false=out_false, mask=y, x=x, level=level)
mean = layers.mean(x=out) mean = layers.mean(out)
append_backward(mean) append_backward(mean)
......
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
from paddle.v2.fluid.op import Operator from paddle.fluid.op import Operator
class TestSpliteSelectedRows(unittest.TestCase): class TestSpliteSelectedRows(unittest.TestCase):
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
import math import math
import unittest import unittest
from paddle.v2.fluid.distribute_transpiler import split_dense_variable from paddle.fluid.distribute_transpiler import split_dense_variable
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import random import random
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
import unittest import unittest
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.v2.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.v2.fluid.framework import default_startup_program from paddle.fluid.framework import default_startup_program
class TestSwitch(unittest.TestCase): class TestSwitch(unittest.TestCase):
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import unittest import unittest
import numpy import numpy
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
class TestUniqueName(unittest.TestCase):
def test_guard(self):
with fluid.unique_name.guard():
name_1 = fluid.unique_name.generate('')
with fluid.unique_name.guard():
name_2 = fluid.unique_name.generate('')
self.assertEqual(name_1, name_2)
with fluid.unique_name.guard("A"):
name_1 = fluid.unique_name.generate('')
with fluid.unique_name.guard('B'):
name_2 = fluid.unique_name.generate('')
self.assertNotEqual(name_1, name_2)
def test_generate(self):
with fluid.unique_name.guard():
name1 = fluid.unique_name.generate('fc')
name2 = fluid.unique_name.generate('fc')
name3 = fluid.unique_name.generate('tmp')
self.assertNotEqual(name1, name2)
self.assertEqual(name1[-2:], name3[-2:])
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
from paddle.v2.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_ from paddle.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_
import paddle.v2.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
import unittest import unittest
import numpy import numpy
import collections import collections
import paddle.v2.fluid as fluid import paddle.fluid as fluid
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.initializer import ConstantInitializer from paddle.fluid.initializer import ConstantInitializer
from paddle.v2.fluid.param_attr import WeightNormParamAttr from paddle.fluid.param_attr import WeightNormParamAttr
class TestWeightNormalization(unittest.TestCase): class TestWeightNormalization(unittest.TestCase):
......
...@@ -13,10 +13,10 @@ ...@@ -13,10 +13,10 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.fluid.executor import Executor
import paddle.v2.fluid.core as core import paddle.fluid.core as core
from paddle.v2.fluid.backward import append_backward from paddle.fluid.backward import append_backward
import numpy import numpy
...@@ -58,7 +58,7 @@ class TestWhileOp(unittest.TestCase): ...@@ -58,7 +58,7 @@ class TestWhileOp(unittest.TestCase):
layers.less_than(x=i, y=array_len, cond=cond) layers.less_than(x=i, y=array_len, cond=cond)
sum_result = layers.array_read(array=mem_array, i=i) sum_result = layers.array_read(array=mem_array, i=i)
loss = layers.mean(x=sum_result) loss = layers.mean(sum_result)
append_backward(loss) append_backward(loss)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import contextlib
import sys
__all__ = ['generate', 'switch', 'guard', 'UniqueNameGenerator']
class UniqueNameGenerator(object):
"""
Generate unique name with prefix.
Args:
prefix(str): The generated name prefix. All generated name will be
started with this prefix.
"""
def __init__(self, prefix=None):
self.ids = collections.defaultdict(int)
if prefix is None:
prefix = ""
self.prefix = prefix
def __call__(self, key):
"""
Generate unique names with prefix
Args:
key(str): The key of return string.
Returns(str): A unique string with the prefix
"""
tmp = self.ids[key]
self.ids[key] += 1
return self.prefix + "_".join([key, str(tmp)])
generator = UniqueNameGenerator()
def generate(key):
return generator(key)
def switch(new_generator=None):
global generator
old = generator
if new_generator is None:
generator = UniqueNameGenerator()
else:
generator = new_generator
return old
@contextlib.contextmanager
def guard(new_generator=None):
if isinstance(new_generator, basestring):
new_generator = UniqueNameGenerator(new_generator)
old = switch(new_generator)
yield
switch(old)
...@@ -41,6 +41,26 @@ EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7' ...@@ -41,6 +41,26 @@ EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7'
UNK_IDX = 0 UNK_IDX = 0
def load_label_dict(filename):
d = dict()
tag_dict = set()
with open(filename, 'r') as f:
for i, line in enumerate(f):
line = line.strip()
if line.startswith("B-"):
tag_dict.add(line[2:])
elif line.startswith("I-"):
tag_dict.add(line[2:])
index = 0
for tag in tag_dict:
d["B-" + tag] = index
index += 1
d["I-" + tag] = index
index += 1
d["O"] = index
return d
def load_dict(filename): def load_dict(filename):
d = dict() d = dict()
with open(filename, 'r') as f: with open(filename, 'r') as f:
...@@ -188,7 +208,7 @@ def get_dict(): ...@@ -188,7 +208,7 @@ def get_dict():
verb_dict = load_dict( verb_dict = load_dict(
paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st',
VERBDICT_MD5)) VERBDICT_MD5))
label_dict = load_dict( label_dict = load_label_dict(
paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st',
TRGDICT_MD5)) TRGDICT_MD5))
return word_dict, verb_dict, label_dict return word_dict, verb_dict, label_dict
......
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import os
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.train(), buf_size=500),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
else:
trainer_prog = t.get_trainer_program()
exe.run(fluid.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM):
fluid.io.save_persistables(exe, "./fit_a_line.model/")
fluid.io.load_persistables(exe, "./fit_a_line.model/")
for data in train_reader():
avg_loss_value = exe.run(trainer_prog,
feed=feeder.feed(data),
fetch_list=[avg_cost])
print("loss:" + str(avg_loss_value))
if avg_loss_value[0] < 10.0:
exit(0)
exit(1)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import os
import sys
TRAINERS = 5
BATCH_SIZE = 128
PASS_NUM = 100
def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=False)
return fluid.layers.batch_norm(input=tmp, act=act)
def shortcut(input, ch_in, ch_out, stride):
if ch_in != ch_out:
return conv_bn_layer(input, ch_out, 1, stride, 0, None)
else:
return input
def basicblock(input, ch_in, ch_out, stride):
tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None)
short = shortcut(input, ch_in, ch_out, stride)
return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
def layer_warp(block_func, input, ch_in, ch_out, count, stride):
tmp = block_func(input, ch_in, ch_out, stride)
for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
conv1 = conv_bn_layer(
input=input, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
pool = fluid.layers.pool2d(
input=res3, pool_size=8, pool_type='avg', pool_stride=1)
return pool
def vgg16_bn_drop(input):
def conv_block(input, num_filter, groups, dropouts):
return fluid.nets.img_conv_group(
input=input,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act='relu',
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type='max')
conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
fc1 = fluid.layers.fc(input=drop, size=512, act=None)
bn = fluid.layers.batch_norm(input=fc1, act='relu')
drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
return fc2
classdim = 10
data_shape = [3, 32, 32]
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
net_type = "vgg"
if len(sys.argv) >= 2:
net_type = sys.argv[1]
if net_type == "vgg":
print("training vgg net")
net = vgg16_bn_drop(images)
elif net_type == "resnet":
print("training resnet")
net = resnet_cifar10(images, 32)
else:
raise ValueError("%s network is not supported" % net_type)
predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimize_ops, params_grads = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=TRAINERS)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
accuracy.reset(exe)
for data in train_reader():
loss, acc = exe.run(trainer_prog,
feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
print("pass_id:" + str(pass_id) + "loss:" + str(loss) + " pass_acc:"
+ str(pass_acc))
# this model is slow, so if we can train two mini batches,
# we think it works properly.
print("trainer run end")
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
exit(1)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
import paddle.v2.fluid as fluid
import time
import os
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3
IS_SPARSE = True
PASS_NUM = 10
BATCH_SIZE = 20
embedding_name = 'emb'
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
**ignored):
# 8 features
predicate_embedding = fluid.layers.embedding(
input=predicate,
size=[pred_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
input=mark,
size=[mark_dict_len, mark_dim],
dtype='float32',
is_sparse=IS_SPARSE)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
fluid.layers.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr=fluid.ParamAttr(
name=embedding_name, trainable=False)) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0_layers = [
fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
]
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
lstm_0 = fluid.layers.dynamic_lstm(
input=hidden_0,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid')
# stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = fluid.layers.sums(input=[
fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
])
lstm = fluid.layers.dynamic_lstm(
input=mix_hidden,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
is_reverse=((i % 2) == 1))
input_tmp = [mix_hidden, lstm]
feature_out = fluid.layers.sums(input=[
fluid.layers.fc(input=input_tmp[0], size=label_dict_len),
fluid.layers.fc(input=input_tmp[1], size=label_dict_len)
])
return feature_out
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
# define network topology
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1)
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1)
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1)
feature_out = db_lstm(**locals())
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1)
crf_cost = fluid.layers.linear_chain_crf(
input=feature_out,
label=target,
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(x=crf_cost)
# TODO(qiao)
# check other optimizers and check why out will be NAN
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
# TODO(qiao)
# add dependency track and move this config before optimizer
crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
chunk_evaluator = fluid.evaluator.ChunkEvaluator(
input=crf_decode,
label=target,
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
feeder = fluid.DataFeeder(
feed_list=[
word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
],
place=place)
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv(
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
start_time = time.time()
batch_id = 0
exe.run(fluid.default_startup_program())
embedding_param = fluid.global_scope().find_var(
embedding_name).get_tensor()
embedding_param.set(
load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
place)
for pass_id in xrange(PASS_NUM):
chunk_evaluator.reset(exe)
for data in train_data():
cost, precision, recall, f1_score = exe.run(
trainer_prog,
feed=feeder.feed(data),
fetch_list=[avg_cost] + chunk_evaluator.metrics)
pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
exe)
if batch_id % 10 == 0:
print("avg_cost:" + str(cost) + " precision:" + str(
precision) + " recall:" + str(recall) + " f1_score:" +
str(f1_score) + " pass_precision:" + str(
pass_precision) + " pass_recall:" + str(
pass_recall) + " pass_f1_score:" + str(
pass_f1_score))
if batch_id != 0:
print("second per batch: " + str((time.time(
) - start_time) / batch_id))
batch_id = batch_id + 1
if __name__ == '__main__':
main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import os
PASS_NUM = 100
EMBED_SIZE = 32
HIDDEN_SIZE = 256
N = 5
BATCH_SIZE = 32
IS_SPARSE = True
TRAINERS = 2
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
embed_first = fluid.layers.embedding(
input=first_word,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='shared_w')
embed_second = fluid.layers.embedding(
input=second_word,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='shared_w')
embed_third = fluid.layers.embedding(
input=third_word,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='shared_w')
embed_forth = fluid.layers.embedding(
input=forth_word,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr='shared_w')
concat_embed = fluid.layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid')
predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax')
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=TRAINERS)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
feeder = fluid.DataFeeder(
feed_list=[first_word, second_word, third_word, forth_word, next_word],
place=place)
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
for data in train_reader():
avg_cost_np = exe.run(t.get_trainer_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
print("avg_cost_np", avg_cost_np)
if avg_cost_np[0] < 5.0:
exit(
0) # if avg cost less than 10.0, we think our code is good.
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
exit(1)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
import os
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
hidden_dim = 32
word_dim = 16
IS_SPARSE = True
batch_size = 10
max_length = 50
topk_size = 50
trg_dic_size = 10000
decoder_size = hidden_dim
def encoder_decoder():
# encoder
src_word_id = layers.data(
name="src_word_id", shape=[1], dtype='int64', lod_level=1)
src_embedding = layers.embedding(
input=src_word_id,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(name='vemb'))
fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4)
encoder_out = layers.sequence_last_step(input=lstm_hidden0)
# decoder
trg_language_word = layers.data(
name="target_language_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = layers.embedding(
input=trg_language_word,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(name='vemb'))
rnn = fluid.layers.DynamicRNN()
with rnn.block():
current_word = rnn.step_input(trg_embedding)
mem = rnn.memory(init=encoder_out)
fc1 = fluid.layers.fc(input=[current_word, mem],
size=decoder_size,
act='tanh')
out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax')
rnn.update_memory(mem, fc1)
rnn.output(out)
return rnn()
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
rnn_out = encoder_decoder()
label = layers.data(
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
cost = layers.cross_entropy(input=rnn_out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimize_ops, params_grads = optimizer.minimize(avg_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=1000),
batch_size=batch_size)
place = core.CPUPlace()
exe = Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv(
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
exe.run(framework.default_startup_program())
batch_id = 0
for pass_id in xrange(2):
for data in train_data():
word_data = to_lodtensor(map(lambda x: x[0], data), place)
trg_word = to_lodtensor(map(lambda x: x[1], data), place)
trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
outs = exe.run(trainer_prog,
feed={
'src_word_id': word_data,
'target_language_word': trg_word,
'target_language_next_word': trg_word_next
},
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])
print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
" avg_cost=" + str(avg_cost_val))
if batch_id > 3:
exit(0)
batch_id += 1
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
if __name__ == '__main__':
main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import os
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=images,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
predict = fluid.layers.fc(input=conv_pool_2, size=10, act="softmax")
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adam(learning_rate=0.01)
optimize_ops, params_grads = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
BATCH_SIZE = 50
PASS_NUM = 3
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
pserver_endpoints = os.getenv("PSERVERS") # all pserver endpoints
trainers = int(os.getenv("TRAINERS")) # total trainer count
current_endpoint = os.getenv("SERVER_ENDPOINT") # current pserver endpoint
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
t = fluid.DistributeTranspiler()
t.transpile(
optimize_ops,
params_grads,
0,
pservers=pserver_endpoints,
trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
# TODO(typhoonzero): change trainer startup program to fetch parameters from pserver
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
accuracy.reset(exe)
batch_id = 0
for data in train_reader():
loss, acc = exe.run(trainer_prog,
feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
if batch_id % 100 == 0:
print("batch_id %d, loss: %f, acc: %f" %
(batch_id, loss, pass_acc))
batch_id += 1
pass_acc = accuracy.eval(exe)
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import os
BATCH_SIZE = 128
PASS_NUM = 100
images = fluid.layers.data(name='x', shape=[784], dtype='float32')
# TODO(aroraabhinav) Add regularization and error clipping after
# Issue 7432(https://github.com/PaddlePaddle/Paddle/issues/7432) is resolved.
hidden1 = fluid.layers.fc(input=images, size=128, act='relu')
hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
label = fluid.layers.data(name='y', shape=[1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
optimize_ops, params_grads = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
accuracy.reset(exe)
batch_id = 0
for data in train_reader():
loss, acc = exe.run(trainer_prog,
feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
if batch_id % 100 == 0:
print("batch_id %d, loss: %f, acc: %f" %
(batch_id, loss, pass_acc))
batch_id += 1
pass_acc = accuracy.eval(exe)
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.optimizer import SGDOptimizer
IS_SPARSE = True
BATCH_SIZE = 256
PASS_NUM = 100
def get_usr_combined_features():
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
uid = layers.data(name='user_id', shape=[1], dtype='int64')
usr_emb = layers.embedding(
input=uid,
dtype='float32',
size=[USR_DICT_SIZE, 32],
param_attr='user_table',
is_sparse=IS_SPARSE)
usr_fc = layers.fc(input=usr_emb, size=32)
USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
usr_gender_emb = layers.embedding(
input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16],
param_attr='gender_table',
is_sparse=IS_SPARSE)
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
usr_age_emb = layers.embedding(
input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE,
param_attr='age_table')
usr_age_fc = layers.fc(input=usr_age_emb, size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
usr_job_emb = layers.embedding(
input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16],
param_attr='job_table',
is_sparse=IS_SPARSE)
usr_job_fc = layers.fc(input=usr_job_emb, size=16)
concat_embed = layers.concat(
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1)
usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
return usr_combined_features
def get_mov_combined_features():
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
mov_emb = layers.embedding(
input=mov_id,
dtype='float32',
size=[MOV_DICT_SIZE, 32],
param_attr='movie_table',
is_sparse=IS_SPARSE)
mov_fc = layers.fc(input=mov_emb, size=32)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data(name='category_id', shape=[1], dtype='int64')
mov_categories_emb = layers.embedding(
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool(
input=mov_categories_emb, pool_type="sum")
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64')
mov_title_emb = layers.embedding(
input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb,
num_filters=32,
filter_size=3,
act="tanh",
pool_type="sum")
concat_embed = layers.concat(
input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)
mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
return mov_combined_features
def model():
usr_combined_features = get_usr_combined_features()
mov_combined_features = get_mov_combined_features()
# need cos sim
inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
scale_infer = layers.scale(x=inference, scale=5.0)
label = layers.data(name='score', shape=[1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label)
avg_cost = layers.mean(x=square_cost)
return avg_cost
def func_feed(feeding, data, place):
feed_tensors = {}
for (key, idx) in feeding.iteritems():
tensor = core.LoDTensor()
if key != "category_id" and key != "movie_title":
if key == "score":
numpy_data = np.array(map(lambda x: x[idx], data)).astype(
"float32")
else:
numpy_data = np.array(map(lambda x: x[idx], data)).astype(
"int64")
else:
numpy_data = map(lambda x: np.array(x[idx]).astype("int64"), data)
lod_info = [len(item) for item in numpy_data]
offset = 0
lod = [offset]
for item in lod_info:
offset += item
lod.append(offset)
numpy_data = np.concatenate(numpy_data, axis=0)
tensor.set_lod([lod])
numpy_data = numpy_data.reshape([numpy_data.shape[0], 1])
tensor.set(numpy_data, place)
feed_tensors[key] = tensor
return feed_tensors
def main():
cost = model()
optimizer = SGDOptimizer(learning_rate=0.2)
optimize_ops, params_grads = optimizer.minimize(cost)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
exe.run(fluid.default_startup_program())
trainer_prog = t.get_trainer_program()
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
for pass_id in range(PASS_NUM):
for data in train_reader():
outs = exe.run(trainer_prog,
feed=func_feed(feeding, data, place),
fetch_list=[cost])
out = np.array(outs[0])
print("cost=" + str(out[0]))
if out[0] < 6.0:
print("Training complete. Average cost is less than 6.0.")
# if avg cost less than 6.0, we think our code is good.
exit(0)
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
if __name__ == '__main__':
main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
hid_dim=32):
emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim])
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=3,
act="tanh",
pool_type="sqrt")
conv_4 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=4,
act="tanh",
pool_type="sqrt")
prediction = fluid.layers.fc(input=[conv_3, conv_4],
size=class_dim,
act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002)
optimize_ops, params_grads = adam_optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
return avg_cost, accuracy, accuracy.metrics[0], optimize_ops, params_grads
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
BATCH_SIZE = 100
PASS_NUM = 5
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost, accuracy, acc_out, optimize_ops, params_grads = convolution_net(
data, label, input_dim=dict_dim, class_dim=class_dim)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv(
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
exe.run(fluid.default_startup_program())
trainer_prog = t.get_trainer_program()
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
for pass_id in xrange(PASS_NUM):
accuracy.reset(exe)
for data in train_data():
cost_val, acc_val = exe.run(trainer_prog,
feed=feeder.feed(data),
fetch_list=[cost, acc_out])
pass_acc = accuracy.eval(exe)
print("cost=" + str(cost_val) + " acc=" + str(acc_val) +
" pass_acc=" + str(pass_acc))
if cost_val < 1.0 and pass_acc > 0.8:
exit(0)
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
if __name__ == '__main__':
main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def stacked_lstm_net(data,
label,
input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
assert stacked_num % 2 == 1
emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim])
# add bias attr
# TODO(qijun) linear act
fc1 = fluid.layers.fc(input=emb, size=hid_dim)
lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = fluid.layers.fc(input=inputs, size=hid_dim)
lstm, cell = fluid.layers.dynamic_lstm(
input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
inputs = [fc, lstm]
fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
prediction = fluid.layers.fc(input=[fc_last, lstm_last],
size=class_dim,
act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002)
optimize_ops, params_grads = adam_optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
return avg_cost, accuracy, accuracy.metrics[0], optimize_ops, params_grads
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
BATCH_SIZE = 100
PASS_NUM = 5
word_dict = paddle.dataset.imdb.word_dict()
print "loaded word dict successfully"
dict_dim = len(word_dict)
class_dim = 2
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost, accuracy, acc_out, optimize_ops, params_grads = stacked_lstm_net(
data, label, input_dim=dict_dim, class_dim=class_dim)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv(
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
t.transpile(
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER":
if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
exe.run(fluid.default_startup_program())
trainer_prog = t.get_trainer_program()
for pass_id in xrange(PASS_NUM):
accuracy.reset(exe)
for data in train_data():
cost_val, acc_val = exe.run(trainer_prog,
feed=feeder.feed(data),
fetch_list=[cost, acc_out])
pass_acc = accuracy.eval(exe)
print("cost=" + str(cost_val) + " acc=" + str(acc_val) +
" pass_acc=" + str(pass_acc))
if cost_val < 1.0 and acc_val > 0.8:
exit(0)
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
if __name__ == '__main__':
main()
...@@ -71,9 +71,9 @@ packages=['paddle', ...@@ -71,9 +71,9 @@ packages=['paddle',
'paddle.v2.reader', 'paddle.v2.reader',
'paddle.v2.master', 'paddle.v2.master',
'paddle.v2.plot', 'paddle.v2.plot',
'paddle.v2.fluid', 'paddle.fluid',
'paddle.v2.fluid.proto', 'paddle.fluid.proto',
'paddle.v2.fluid.layers', 'paddle.fluid.layers',
'py_paddle'] 'py_paddle']
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
...@@ -102,14 +102,14 @@ setup(name='${PACKAGE_NAME}', ...@@ -102,14 +102,14 @@ setup(name='${PACKAGE_NAME}',
ext_modules=[Extension('_foo', ['stub.cc'])], ext_modules=[Extension('_foo', ['stub.cc'])],
package_data={ package_data={
'paddle.v2.master': ['libpaddle_master.so'], 'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.fluid': ['core.so'], 'paddle.fluid': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so'] 'py_paddle':['*.py','_swig_paddle.so']
}, },
package_dir={ package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}', '': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.fluid.proto will be generated while compiling. # The paddle.fluid.proto will be generated while compiling.
# So that package points to other directory. # So that package points to other directory.
'paddle.v2.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework', 'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
}, },
scripts=paddle_bins, scripts=paddle_bins,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册