提交 1b8b253e 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into nlp

...@@ -41,7 +41,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO ...@@ -41,7 +41,6 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
...@@ -59,7 +58,6 @@ option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) ...@@ -59,7 +58,6 @@ option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
...@@ -100,6 +98,9 @@ endif() ...@@ -100,6 +98,9 @@ endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.") "A path setting third party libraries download & build directories.")
set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
"A path setting fluid shared and static libraries")
if (WITH_C_API AND WITH_PYTHON) if (WITH_C_API AND WITH_PYTHON)
message(WARNING "It is suggest not embedded a python interpreter in Paddle " message(WARNING "It is suggest not embedded a python interpreter in Paddle "
"when using C-API. It will give an unpredictable behavior when using a " "when using C-API. It will give an unpredictable behavior when using a "
...@@ -153,7 +154,6 @@ include(cupti) ...@@ -153,7 +154,6 @@ include(cupti)
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(package) # set paddle packages include(package) # set paddle packages
include(cpplint) # set paddle c++ style
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
include(util) # set unittest and link libs include(util) # set unittest and link libs
include(rdma) # set rdma libraries include(rdma) # set rdma libraries
......
...@@ -38,7 +38,7 @@ def str2bool(v): ...@@ -38,7 +38,7 @@ def str2bool(v):
parser = argparse.ArgumentParser(description=__doc__) parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
'--batch_size', type=int, default=128, help="Batch size for training.") '--batch_size', type=int, default=16, help="Batch size for training.")
parser.add_argument( parser.add_argument(
'--learning_rate', '--learning_rate',
type=float, type=float,
...@@ -61,7 +61,7 @@ parser.add_argument( ...@@ -61,7 +61,7 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
'--data_set', '--data_set',
type=str, type=str,
default='cifar10', default='flowers',
choices=['cifar10', 'flowers'], choices=['cifar10', 'flowers'],
help='Optional dataset for benchmark.') help='Optional dataset for benchmark.')
parser.add_argument( parser.add_argument(
...@@ -200,26 +200,30 @@ def main(): ...@@ -200,26 +200,30 @@ def main():
fetch_list=[avg_cost, batch_acc, batch_size]) fetch_list=[avg_cost, batch_acc, batch_size])
return loss, acc, b_size return loss, acc, b_size
if args.profile and args.task_index == 0: if args.profile:
# warmup. with profiler.profiler('All', 'total',
for batch_id, data in enumerate(train_reader()): '/tmp/profile_vgg_%d' % args.task_index):
if batch_id > 5: break
run_step(batch_id, data)
with profiler.profiler('All', 'total', '/tmp/profile_vgg'):
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
if batch_id > 5: break if batch_id > 5: break
run_step(batch_id, data) run_step(batch_id, data)
total_time = 0.0
count = 0
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_reader()):
ts = time.time() ts = time.time()
loss, acc, b_size = run_step(batch_id, data) loss, acc, b_size = run_step(batch_id, data)
iters += 1 iters += 1
num_samples += len(data) num_samples += len(data)
train_pass_acc.add(value=acc, weight=b_size) train_pass_acc.add(value=acc, weight=b_size)
duration = time.time() - ts
total_time += duration
count += len(data)
print( print(
"Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, " "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, "
"Speed = %.2f img/s" % (pass_id, iters, loss, acc, "Speed = %.2f (%.2f) img/s" % (pass_id, iters, loss, acc,
len(data) / (time.time() - ts)) len(data) / duration,
count / total_time)
) # The accuracy is the accumulation of batches, but not the current batch. ) # The accuracy is the accumulation of batches, but not the current batch.
pass_elapsed = time.time() - start_time pass_elapsed = time.time() - start_time
......
# Fluid Benchmark
This directory contains several models configurations and tools that used to run
Fluid benchmarks for local and distributed training.
## Run the Benchmark
To start, run the following command to get the full help message:
```bash
python fluid_benchmark.py --help
```
Currently supported `--model` argument include:
* mnist
* resnet
* you can chose to use different dataset using `--data_set cifar10` or
`--data_set flowers`.
* vgg
* stacked_dynamic_lstm
* machine_translation
* Run the following command to start a benchmark job locally:
```bash
python fluid_benchmark.py --model mnist --parallel 1 --device GPU --with_test
```
You can choose to use GPU/CPU training. With GPU training, you can specify
`--parallel 1` to run multi GPU training.
* Run distributed training with parameter servers:
* start parameter servers:
```bash
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
```
* start trainers:
```bash
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
```
* Run distributed training using NCCL2
```bash
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method nccl2
```
## Run Distributed Benchmark on Kubernetes Cluster
We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submit
distributed benchmark jobs to your cluster. To generate a job yaml, just run:
```bash
python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver --with_test" --disttype pserver
```
Then the yaml files are generated under directory `myjob`, you can run:
```bash
kubectl create -f myjob/
```
The job shall start.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import cProfile
import time
import os
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.profiler as profiler
import paddle.fluid.transpiler.distribute_transpiler as distribute_transpiler
BENCHMARK_MODELS = [
"machine_translation", "resnet", "vgg", "mnist", "stacked_dynamic_lstm"
]
def parse_args():
parser = argparse.ArgumentParser('Fluid model benchmarks.')
parser.add_argument(
'--model',
type=str,
choices=BENCHMARK_MODELS,
default='resnet',
help='The model to run benchmark with.')
parser.add_argument(
'--batch_size', type=int, default=32, help='The minibatch size.')
parser.add_argument(
'--learning_rate',
type=float,
default=0.001,
help='The minibatch size.')
# TODO(wuyi): add "--use_fake_data" option back.
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test'
)
parser.add_argument(
'--iterations', type=int, default=80, help='The number of minibatches.')
parser.add_argument(
'--pass_num', type=int, default=100, help='The number of passes.')
parser.add_argument(
'--data_format',
type=str,
default='NCHW',
choices=['NCHW', 'NHWC'],
help='The data data_format, now only support NCHW.')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--gpus',
type=int,
default=1,
help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
parser.add_argument(
'--data_set',
type=str,
default='flowers',
choices=['cifar10', 'flowers'],
help='Optional dataset for benchmark.')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
parser.add_argument(
'--no_test',
action='store_false',
help='If set, test the testset during training.')
parser.add_argument(
'--memory_optimize',
action='store_true',
help='If set, optimize runtime memory before start.')
parser.add_argument(
'--update_method',
type=str,
default='local',
choices=['local', 'pserver', 'nccl2'],
help='Choose parameter update method, can be local, pserver, nccl2.')
args = parser.parse_args()
return args
def append_nccl2_prepare():
if os.getenv("PADDLE_TRAINER_ID", None) != None:
# append gen_nccl_id at the end of startup program
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
port = os.getenv("PADDLE_PSERVER_PORT")
worker_ips = os.getenv("PADDLE_TRAINER_IPS")
worker_endpoints = []
for ip in worker_ips.split(","):
worker_endpoints.append(':'.join([ip, port]))
num_trainers = len(worker_endpoints)
current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port
worker_endpoints.remove(current_endpoint)
nccl_id_var = fluid.default_startup_program().global_block().create_var(
name="NCCLID",
persistable=True,
type=fluid.core.VarDesc.VarType.RAW)
fluid.default_startup_program().global_block().append_op(
type="gen_nccl_id",
inputs={},
outputs={"NCCLID": nccl_id_var},
attrs={
"endpoint": current_endpoint,
"endpoint_list": worker_endpoints,
"trainer_id": trainer_id
})
return nccl_id_var, num_trainers, trainer_id
else:
raise Exception(
"must set PADDLE_TRAINER_ID env variables for dist train.")
def dist_transpile():
if "PADDLE_TRAINING_ROLE" not in os.environ:
return None, None
# the port of all pservers, needed by both trainer and pserver
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
# comma separated ips of all pservers, needed by trainer and
# pserver
pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist)
# total number of workers/trainers in the job, needed by
# trainer and pserver
trainers = int(os.getenv("PADDLE_TRAINERS"))
# the IP of the local machine, needed by pserver only
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
# the unique trainer id, starting from 0, needed by trainer
# only
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
# the role, should be either PSERVER or TRAINER
training_role = os.getenv("PADDLE_TRAINING_ROLE")
t = distribute_transpiler.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
pserver_program = t.get_pserver_program(current_endpoint)
pserver_startup_program = t.get_startup_program(current_endpoint,
pserver_program)
return pserver_program, pserver_startup_program
elif training_role == "TRAINER":
train_program = t.get_trainer_program()
return train_program, fluid.default_startup_program()
else:
raise ValueError(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)
def test(exe, inference_program, test_reader, feeder, batch_acc):
accuracy_evaluator = fluid.metrics.Accuracy()
for batch_id, data in enumerate(test_reader()):
acc = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=[batch_acc])
accuracy_evaluator.update(value=np.array(acc), weight=len(data))
return accuracy_evaluator.eval()
# TODO(wuyi): replace train, train_parallel, test functions with new trainer
# API once it is ready.
def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
args, train_prog, startup_prog):
if os.getenv("PADDLE_TRAINING_ROLE") == "PSERVER":
place = core.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
exe.run(train_prog)
return
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_prog)
feed_var_list = [
var for var in train_prog.global_block().vars.itervalues()
if var.is_data
]
feeder = fluid.DataFeeder(feed_var_list, place)
iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num):
train_losses = []
for batch_id, data in enumerate(train_reader()):
if iters == args.skip_batch_num:
start_time = time.time()
num_samples = 0
if iters == args.iterations:
break
loss = exe.run(train_prog,
feed=feeder.feed(data),
fetch_list=[avg_loss])
iters += 1
num_samples += len(data)
train_losses.append(loss)
print("Pass: %d, Iter: %d, Loss: %f\n" %
(pass_id, iters, np.mean(train_losses)))
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sec\n' %
(num_samples, train_elapsed, examples_per_sec))
print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses)))
# evaluation
if not args.no_test and batch_acc != None:
pass_test_acc = test(exe, infer_prog, test_reader, feeder,
batch_acc)
print(", Test Accuracy: %f" % pass_test_acc)
print("\n")
# TODO(wuyi): add warmup passes to get better perf data.
exit(0)
# TODO(wuyi): replace train, train_parallel, test functions with new trainer
# API once it is ready.
def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
batch_acc, args, train_prog, startup_prog, nccl_id_var,
num_trainers, trainer_id):
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
startup_exe = fluid.Executor(place)
startup_exe.run(startup_prog)
strategy = fluid.ExecutionStrategy()
strategy.num_threads = 1
strategy.allow_op_delay = False
exe = fluid.ParallelExecutor(
True,
avg_loss.name,
exec_strategy=strategy,
num_trainers=num_trainers,
trainer_id=trainer_id)
feed_var_list = [
var for var in train_prog.global_block().vars.itervalues()
if var.is_data
]
feeder = fluid.DataFeeder(feed_var_list, place)
for pass_id in range(args.pass_num):
num_samples = 0
iters = 0
start_time = time.time()
for batch_id, data in enumerate(train_reader()):
if iters == args.skip_batch_num:
start_time = time.time()
num_samples = 0
if iters == args.iterations:
break
loss, = exe.run([avg_loss.name], feed=feeder.feed(data))
if args.update_method == "pserver":
exe.bcast_params()
num_samples += len(data)
iters += 1
if batch_id % 1 == 0:
print("Pass %d, batch %d, loss %s" %
(pass_id, batch_id, np.array(loss)))
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
if not args.no_test and batch_acc != None:
test_acc = test(startup_exe, infer_prog, test_reader, feeder,
batch_acc)
print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc))
exit(0)
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- resnet Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def main():
args = parse_args()
print_arguments(args)
nccl_id_var, num_trainers, trainer_id = None, 1, 0
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
model_def = __import__("models.%s" % args.model, fromlist=["models"])
train_args = list(model_def.get_model(args))
train_args.append(args)
# Run optimizer.minimize(avg_loss)
train_args[2].minimize(train_args[0])
if args.memory_optimize:
fluid.memory_optimize(fluid.default_main_program())
if args.update_method == "pserver":
train_prog, startup_prog = dist_transpile()
if not train_prog:
raise Exception(
"Must configure correct environments to run dist train.")
train_args.extend([train_prog, startup_prog])
if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER":
train_args.extend([nccl_id_var, num_trainers, trainer_id])
train_parallel(*train_args)
train(*train_args)
exit(0)
# for other update methods, use default programs
train_args.append(fluid.default_main_program())
train_args.append(fluid.default_startup_program())
if args.update_method == "nccl2":
nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare()
if args.gpus == 1:
# NOTE: parallel executor use profiler interanlly
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
train(*train_args)
else:
train(*train_args)
else:
if args.device == "CPU":
raise Exception("Only support GPU perf with parallel exe")
train_args.extend([nccl_id_var, num_trainers, trainer_id])
train_parallel(*train_args)
if __name__ == "__main__":
main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import yaml
import copy
import argparse
import random
import os
from kube_templates import pserver, trainer, envs
def parse_args():
parser = argparse.ArgumentParser(description='Generate dist job yamls.')
parser.add_argument(
'--jobname', default="paddlejob", help='unique job name')
parser.add_argument(
'--cpu', default=1, type=int, help='CPU cores per trainer node')
parser.add_argument(
'--pscpu', default=1, type=int, help='CPU cores per pserver node')
parser.add_argument(
'--gpu', default=0, type=int, help='num of GPUs per node')
parser.add_argument(
'--image',
default="bootstrapper:5000/fluid_benchmark:gpu",
help='num of GPUs per node')
parser.add_argument(
'--pservers', default=1, type=int, help='num of pservers')
parser.add_argument(
'--trainers', default=1, type=int, help='num of trainers')
parser.add_argument('--memory', default=1, type=int, help='trainer memory')
parser.add_argument(
'--psmemory', default=1, type=int, help='pserver memory')
parser.add_argument(
'--port', default=30236, type=int, help='num of trainers')
parser.add_argument(
'--entry', default="python train.py", help='command to run')
parser.add_argument(
'--fluid', default=1, type=int, help='whether is fluid job')
parser.add_argument(
'--rdma', action='store_ture', help='whether mount rdma libs')
parser.add_argument(
'--disttype',
default="pserver",
type=str,
choices=['pserver', 'nccl2', 'local'],
help='pserver or nccl2 or local')
args = parser.parse_args()
return args
def gen_job():
ps = pserver
tn = trainer
args = parse_args()
ps_container = ps["spec"]["template"]["spec"]["containers"][0]
tn_container = tn["spec"]["template"]["spec"]["containers"][0]
if args.fluid == 1:
ps_container["command"] = \
["paddle_k8s", "start_fluid"]
tn_container["command"] = \
["paddle_k8s", "start_fluid"]
ps["metadata"]["name"] = args.jobname + "-pserver"
ps["spec"]["template"]["metadata"]["labels"][
"paddle-job-pserver"] = args.jobname
tn["metadata"]["name"] = args.jobname + "-trainer"
tn["spec"]["template"]["metadata"]["labels"]["paddle-job"] = args.jobname
ps_container["image"] = args.image
tn_container["image"] = args.image
ps_container["resources"]["requests"]["cpu"] = str(args.pscpu)
ps_container["resources"]["requests"]["memory"] = str(args.psmemory) + "Gi"
ps_container["resources"]["limits"]["cpu"] = str(args.pscpu)
ps_container["resources"]["limits"]["memory"] = str(args.psmemory) + "Gi"
tn_container["resources"]["requests"]["cpu"] = str(args.cpu)
tn_container["resources"]["requests"]["memory"] = str(args.memory) + "Gi"
tn_container["resources"]["limits"]["cpu"] = str(args.cpu)
tn_container["resources"]["limits"]["memory"] = str(args.memory) + "Gi"
if args.gpu > 0:
tn_container["resources"]["requests"][
"alpha.kubernetes.io/nvidia-gpu"] = str(args.gpu)
tn_container["resources"]["limits"][
"alpha.kubernetes.io/nvidia-gpu"] = str(args.gpu)
ps["spec"]["replicas"] = int(args.pservers)
tn["spec"]["parallelism"] = int(args.trainers)
tn["spec"]["completions"] = int(args.trainers)
ps_container["ports"][0]["name"] = "jobport-" + str(args.port)
ps_container["ports"][0]["containerPort"] = args.port
spreadport = random.randint(40000, 60000)
tn_container["ports"][0]["name"] = "spr-" + str(spreadport)
tn_container["ports"][0]["containerPort"] = spreadport
envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname})
envs.append({"name": "TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
envs.append({"name": "ENTRY", "value": args.entry})
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
envs.append({
"name": "LD_LIBRARY_PATH",
"value":
"/usr/local/lib:/usr/local/nvidia/lib64:/usr/local/rdma/lib64:/usr/lib64/mlnx_ofed/valgrind"
})
volumes = [{
"name": "nvidia-driver",
"hostPath": {
"path": "/usr/local/nvidia/lib64"
}
}]
volumeMounts = [{
"mountPath": "/usr/local/nvidia/lib64",
"name": "nvidia-driver"
}]
if args.rdma:
volumes.extend([{
"name": "ibetc",
"hostPath": {
"path": "/etc/libibverbs.d"
}
}, {
"name": "iblibs",
"hostPath": {
"path": "/usr/local/rdma"
}
}, {
"name": "valgrind",
"hostPath": {
"path": "/usr/lib64/mlnx_ofed/valgrind"
}
}])
volumeMounts.extend([{
"mountPath": "/etc/libibverbs.d",
"name": "ibetc"
}, {
"mountPath": "/usr/local/rdma",
"name": "iblibs"
}, {
"mountPath": "/usr/lib64/mlnx_ofed/valgrind",
"name": "valgrind"
}])
# append shm for NCCL2
volumes.append({"name": "dshm", "emptyDir": {"medium": "Memory"}})
volumeMounts.append({"mountPath": "/dev/shm", "name": "dshm"})
tn["spec"]["template"]["spec"]["volumes"] = volumes
tn_container["volumeMounts"] = volumeMounts
ps_container["env"] = envs
ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"})
tn_container["env"] = envs
if args.disttype == "pserver":
tn_container["env"].append({
"name": "TRAINING_ROLE",
"value": "TRAINER"
})
elif args.disttype == "nccl2" or args.disttype == "local":
# NCCL2 have no training role, set to plain WORKER
tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"})
os.mkdir(args.jobname)
if args.disttype == "pserver":
with open("%s/pserver.yaml" % args.jobname, "w") as fn:
yaml.dump(ps, fn)
with open("%s/trainer.yaml" % args.jobname, "w") as fn:
yaml.dump(tn, fn)
if __name__ == "__main__":
gen_job()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pserver import pserver
from trainer import trainer
__all__ = ["pserver", "trainer", "envs"]
envs = [
# envs that don't need to change
{
"name": "GLOG_v",
"value": "0"
},
{
"name": "GLOG_logtostderr",
"value": "1"
},
{
"name": "TOPOLOGY",
"value": ""
},
{
"name": "TRAINER_PACKAGE",
"value": "/workspace"
},
{
"name": "PADDLE_INIT_NICS",
"value": "eth2"
},
{
"name": "NAMESPACE",
"valueFrom": {
"fieldRef": {
"fieldPath": "metadata.namespace"
}
}
},
{
"name": "POD_IP",
"valueFrom": {
"fieldRef": {
"fieldPath": "status.podIP"
}
}
}
]
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pserver = {
"apiVersion": "extensions/v1beta1",
"kind": "ReplicaSet",
"metadata": {
"name": "jobname-pserver"
},
"spec": {
"replicas": 1,
"template": {
"metadata": {
"labels": {
"paddle-job-pserver": "jobname"
}
},
"spec": {
"hostNetwork": True,
"imagePullSecrets": [{
"name": "job-registry-secret"
}],
"containers": [{
"name": "pserver",
"image": "",
"imagePullPolicy": "Always",
"ports": [{
"name": "jobport-1",
"containerPort": 1
}],
"env": [],
"command": ["paddle_k8s", "start_pserver"],
"resources": {
"requests": {
"memory": "10Gi",
"cpu": "4"
},
"limits": {
"memory": "10Gi",
"cpu": "4"
}
}
}]
}
}
}
}
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
trainer = {
"apiVersion": "batch/v1",
"kind": "Job",
"metadata": {
"name": "jobname-pserver"
},
"spec": {
"parallelism": 4,
"completions": 4,
"template": {
"metadata": {
"labels": {
"paddle-job": "jobname"
}
},
"spec": {
"hostNetwork": True,
"imagePullSecrets": [{
"name": "job-registry-secret"
}],
"restartPolicy": "Never",
"containers": [{
"name": "trainer",
"image": "",
"imagePullPolicy": "Always",
# to let container set rlimit
"securityContext": {
"privileged": True
# TODO(wuyi): use below specific cap instead of privileged,
# using privileged will cause all GPU device are visible
# in the container.
# "capabilities": {
# "add": ["SYS_RESOURCE"]
# }
},
"ports": [{
"name": "jobport-1",
"containerPort": 1
}],
"env": [],
"command": ["paddle_k8s", "start_trainer", "v2"],
"resources": {
"requests": {
"memory": "10Gi",
"cpu": "4",
},
"limits": {
"memory": "10Gi",
"cpu": "4",
}
}
}]
}
}
}
}
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = [
"machine_translation", "resnet", "vgg", "mnist", "stacked_dynamic_lstm"
]
...@@ -27,74 +27,6 @@ import paddle.fluid.core as core ...@@ -27,74 +27,6 @@ import paddle.fluid.core as core
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--embedding_dim",
type=int,
default=512,
help="The dimension of embedding table. (default: %(default)d)")
parser.add_argument(
"--encoder_size",
type=int,
default=512,
help="The size of encoder bi-rnn unit. (default: %(default)d)")
parser.add_argument(
"--decoder_size",
type=int,
default=512,
help="The size of decoder rnn unit. (default: %(default)d)")
parser.add_argument(
"--batch_size",
type=int,
default=16,
help="The sequence number of a mini-batch data. (default: %(default)d)")
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test')
parser.add_argument(
'--iterations', type=int, default=80, help='The number of minibatches.')
parser.add_argument(
"--dict_size",
type=int,
default=30000,
help="The dictionary capacity. Dictionaries of source sequence and "
"target dictionary have same capacity. (default: %(default)d)")
parser.add_argument(
"--pass_num",
type=int,
default=2,
help="The pass number to train. (default: %(default)d)")
parser.add_argument(
"--learning_rate",
type=float,
default=0.0002,
help="Learning rate used to train the model. (default: %(default)f)")
parser.add_argument(
"--infer_only", action='store_true', help="If set, run forward only.")
parser.add_argument(
"--beam_size",
type=int,
default=3,
help="The width for beam searching. (default: %(default)d)")
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help="The device type.")
parser.add_argument(
"--max_length",
type=int,
default=250,
help="The maximum length of sequence when doing generation. "
"(default: %(default)d)")
parser.add_argument(
'--with_test',
action='store_true',
help='If set, test the testset during training.')
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size): def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
def linear(inputs): def linear(inputs):
...@@ -264,116 +196,37 @@ def lodtensor_to_ndarray(lod_tensor): ...@@ -264,116 +196,37 @@ def lodtensor_to_ndarray(lod_tensor):
return ndarray return ndarray
def train(): def get_model(args):
embedding_dim = 512
encoder_size = 512
decoder_size = 512
dict_size = 30000
beam_size = 3
max_length = 250
avg_cost, feeding_list = seq_to_seq_net( avg_cost, feeding_list = seq_to_seq_net(
args.embedding_dim, embedding_dim,
args.encoder_size, encoder_size,
args.decoder_size, decoder_size,
args.dict_size, dict_size,
args.dict_size, dict_size,
False, False,
beam_size=args.beam_size, beam_size=beam_size,
max_length=args.max_length) max_length=max_length)
# clone from default main program # clone from default main program
inference_program = fluid.default_main_program().clone() inference_program = fluid.default_main_program().clone()
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
optimizer.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
train_batch_generator = paddle.batch( train_batch_generator = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.wmt14.train(args.dict_size), buf_size=1000), paddle.dataset.wmt14.train(dict_size), buf_size=1000),
batch_size=args.batch_size) batch_size=args.batch_size)
test_batch_generator = paddle.batch( test_batch_generator = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.wmt14.test(args.dict_size), buf_size=1000), paddle.dataset.wmt14.test(dict_size), buf_size=1000),
batch_size=args.batch_size) batch_size=args.batch_size)
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) return avg_cost, inference_program, optimizer, train_batch_generator, \
exe = Executor(place) test_batch_generator, None
exe.run(framework.default_startup_program())
def do_validation():
total_loss = 0.0
count = 0
for batch_id, data in enumerate(test_batch_generator()):
src_seq = to_lodtensor(map(lambda x: x[0], data), place)[0]
trg_seq = to_lodtensor(map(lambda x: x[1], data), place)[0]
lbl_seq = to_lodtensor(map(lambda x: x[2], data), place)[0]
fetch_outs = exe.run(inference_program,
feed={
feeding_list[0]: src_seq,
feeding_list[1]: trg_seq,
feeding_list[2]: lbl_seq
},
fetch_list=[avg_cost],
return_numpy=False)
total_loss += lodtensor_to_ndarray(fetch_outs[0])[0]
count += 1
return total_loss / count
iters, num_samples, start_time = 0, 0, time.time()
for pass_id in xrange(args.pass_num):
train_accs = []
train_losses = []
for batch_id, data in enumerate(train_batch_generator()):
if iters == args.skip_batch_num:
start_time = time.time()
num_samples = 0
if iters == args.iterations:
break
src_seq, word_num = to_lodtensor(map(lambda x: x[0], data), place)
num_samples += word_num
trg_seq, word_num = to_lodtensor(map(lambda x: x[1], data), place)
num_samples += word_num
lbl_seq, _ = to_lodtensor(map(lambda x: x[2], data), place)
fetch_outs = exe.run(framework.default_main_program(),
feed={
feeding_list[0]: src_seq,
feeding_list[1]: trg_seq,
feeding_list[2]: lbl_seq
},
fetch_list=[avg_cost])
iters += 1
loss = np.array(fetch_outs[0])
print(
"Pass = %d, Iter = %d, Loss = %f" % (pass_id, iters, loss)
) # The accuracy is the accumulation of batches, but not the current batch.
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
# evaluation
if args.with_test:
test_loss = do_validation()
exit(0)
def infer():
pass
def print_arguments(args):
print('----------- seq2seq Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
if args.infer_only:
infer()
else:
train()
...@@ -19,6 +19,7 @@ from __future__ import print_function ...@@ -19,6 +19,7 @@ from __future__ import print_function
import numpy as np import numpy as np
import argparse import argparse
import time import time
import cProfile
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -31,42 +32,6 @@ DTYPE = "float32" ...@@ -31,42 +32,6 @@ DTYPE = "float32"
# fluid.default_startup_program().random_seed = SEED # fluid.default_startup_program().random_seed = SEED
def parse_args():
parser = argparse.ArgumentParser("mnist model benchmark.")
parser.add_argument(
'--batch_size', type=int, default=128, help='The minibatch size.')
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test'
)
parser.add_argument(
'--iterations', type=int, default=35, help='The number of minibatches.')
parser.add_argument(
'--pass_num', type=int, default=5, help='The number of passes.')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
parser.add_argument(
'--with_test',
action='store_true',
help='If set, test the testset during training.')
args = parser.parse_args()
return args
def cnn_model(data): def cnn_model(data):
conv_pool_1 = fluid.nets.simple_img_conv_pool( conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=data, input=data,
...@@ -99,36 +64,13 @@ def cnn_model(data): ...@@ -99,36 +64,13 @@ def cnn_model(data):
return predict return predict
def eval_test(exe, batch_acc, batch_size_tensor, inference_program): def get_model(args):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=args.batch_size)
test_pass_acc = fluid.average.WeightedAverage()
for batch_id, data in enumerate(test_reader()):
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
data)).astype(DTYPE)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1])
acc, weight = exe.run(inference_program,
feed={"pixel": img_data,
"label": y_data},
fetch_list=[batch_acc, batch_size_tensor])
test_pass_acc.add(value=acc, weight=weight)
pass_acc = test_pass_acc.eval()
return pass_acc
def run_benchmark(model, args):
if args.use_cprof:
pr = cProfile.Profile()
pr.enable()
start_time = time.time()
# Input data # Input data
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# Train program # Train program
predict = model(images) predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
...@@ -143,86 +85,10 @@ def run_benchmark(model, args): ...@@ -143,86 +85,10 @@ def run_benchmark(model, args):
# Optimization # Optimization
opt = fluid.optimizer.AdamOptimizer( opt = fluid.optimizer.AdamOptimizer(
learning_rate=0.001, beta1=0.9, beta2=0.999) learning_rate=0.001, beta1=0.9, beta2=0.999)
opt.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
# Initialize executor
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
# Parameter initialization
exe.run(fluid.default_startup_program())
# Reader # Reader
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=args.batch_size) paddle.dataset.mnist.train(), batch_size=args.batch_size)
test_reader = paddle.batch(
accuracy = fluid.metrics.Accuracy() paddle.dataset.mnist.test(), batch_size=args.batch_size)
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) return avg_cost, inference_program, opt, train_reader, test_reader, batch_acc
iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num):
accuracy.reset()
train_accs = []
train_losses = []
for batch_id, data in enumerate(train_reader()):
if iters == args.skip_batch_num:
start_time = time.time()
num_samples = 0
if iters == args.iterations:
break
img_data = np.array(
map(lambda x: x[0].reshape([1, 28, 28]), data)).astype(DTYPE)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1])
outs = train_exe.run(
feed={"pixel": img_data,
"label": y_data},
fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
]
) # The accuracy is the accumulation of batches, but not the current batch.
accuracy.update(
value=np.array(np.mean(outs[1])),
weight=np.mean(np.array(outs[2])))
iters += 1
num_samples += len(y_data)
loss = np.mean(np.array(outs[0]))
acc = np.mean(np.array(outs[1]))
train_losses.append(loss)
train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
(pass_id, iters, loss, acc))
print("Pass: %d, Loss: %f, Train Accuray: %f\n" %
(pass_id, np.mean(train_losses), np.mean(train_accs)))
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
# evaluation
if args.with_test:
test_avg_acc = eval_test(exe, batch_acc, batch_size_tensor,
inference_program)
exit(0)
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- mnist Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
run_benchmark(cnn_model, args)
else:
run_benchmark(cnn_model, args)
...@@ -16,7 +16,6 @@ from __future__ import absolute_import ...@@ -16,7 +16,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import argparse
import functools import functools
import numpy as np import numpy as np
import time import time
...@@ -29,64 +28,6 @@ import paddle.fluid.core as core ...@@ -29,64 +28,6 @@ import paddle.fluid.core as core
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
def parse_args():
parser = argparse.ArgumentParser('Convolution model benchmark.')
parser.add_argument(
'--model',
type=str,
choices=['resnet_imagenet', 'resnet_cifar10'],
default='resnet_imagenet',
help='The model architecture.')
parser.add_argument(
'--batch_size', type=int, default=32, help='The minibatch size.')
parser.add_argument(
'--use_fake_data',
action='store_true',
help='use real data or fake data')
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test'
)
parser.add_argument(
'--iterations', type=int, default=80, help='The number of minibatches.')
parser.add_argument(
'--pass_num', type=int, default=100, help='The number of passes.')
parser.add_argument(
'--data_format',
type=str,
default='NCHW',
choices=['NCHW', 'NHWC'],
help='The data data_format, now only support NCHW.')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--data_set',
type=str,
default='flowers',
choices=['cifar10', 'flowers'],
help='Optional dataset for benchmark.')
parser.add_argument(
'--infer_only', action='store_true', help='If set, run forward only.')
parser.add_argument(
'--use_cprof', action='store_true', help='If set, use cProfile.')
parser.add_argument(
'--use_nvprof',
action='store_true',
help='If set, use nvprof for CUDA.')
parser.add_argument(
'--with_test',
action='store_true',
help='If set, test the testset during training.')
args = parser.parse_args()
return args
def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
conv1 = fluid.layers.conv2d( conv1 = fluid.layers.conv2d(
input=input, input=input,
...@@ -100,7 +41,7 @@ def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): ...@@ -100,7 +41,7 @@ def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
def shortcut(input, ch_out, stride): def shortcut(input, ch_out, stride):
ch_in = input.shape[1] if args.data_format == 'NCHW' else input.shape[-1] ch_in = input.shape[1] # if args.data_format == 'NCHW' else input.shape[-1]
if ch_in != ch_out: if ch_in != ch_out:
return conv_bn_layer(input, ch_out, 1, stride, 0, None) return conv_bn_layer(input, ch_out, 1, stride, 0, None)
else: else:
...@@ -172,23 +113,22 @@ def resnet_cifar10(input, class_dim, depth=32, data_format='NCHW'): ...@@ -172,23 +113,22 @@ def resnet_cifar10(input, class_dim, depth=32, data_format='NCHW'):
return out return out
def run_benchmark(model, args): def get_model(args):
if args.use_cprof: model = resnet_cifar10
pr = cProfile.Profile()
pr.enable()
if args.data_set == "cifar10": if args.data_set == "cifar10":
class_dim = 10 class_dim = 10
if args.data_format == 'NCHW': if args.data_format == 'NCHW':
dshape = [3, 32, 32] dshape = [3, 32, 32]
else: else:
dshape = [32, 32, 3] dshape = [32, 32, 3]
model = resnet_cifar10
else: else:
class_dim = 102 class_dim = 102
if args.data_format == 'NCHW': if args.data_format == 'NCHW':
dshape = [3, 224, 224] dshape = [3, 224, 224]
else: else:
dshape = [224, 224, 3] dshape = [224, 224, 3]
model = resnet_imagenet
input = fluid.layers.data(name='data', shape=dshape, dtype='float32') input = fluid.layers.data(name='data', shape=dshape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
...@@ -206,9 +146,6 @@ def run_benchmark(model, args): ...@@ -206,9 +146,6 @@ def run_benchmark(model, args):
target_vars=[batch_acc, batch_size_tensor]) target_vars=[batch_acc, batch_size_tensor])
optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9)
opts = optimizer.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -221,97 +158,4 @@ def run_benchmark(model, args): ...@@ -221,97 +158,4 @@ def run_benchmark(model, args):
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
batch_size=args.batch_size) batch_size=args.batch_size)
def test(exe): return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc
test_accuracy = fluid.average.WeightedAverage()
for batch_id, data in enumerate(test_reader()):
img_data = np.array(map(lambda x: x[0].reshape(dshape),
data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])
acc, weight = exe.run(inference_program,
feed={"data": img_data,
"label": y_data},
fetch_list=[batch_acc, batch_size_tensor])
test_accuracy.add(value=acc, weight=weight)
return test_accuracy.eval()
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
if args.use_fake_data:
data = train_reader().next()
image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype(
'float32')
label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1])
iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num):
accuracy.reset()
train_accs = []
train_losses = []
for batch_id, data in enumerate(train_reader()):
if iters == args.skip_batch_num:
start_time = time.time()
num_samples = 0
if iters == args.iterations:
break
if not args.use_fake_data:
image = np.array(map(lambda x: x[0].reshape(dshape),
data)).astype('float32')
label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1])
loss, acc, weight = train_exe.run(
feed={'data': image,
'label': label},
fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
])
iters += 1
num_samples += len(label)
accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
train_losses.append(loss)
train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %
(pass_id, iters, loss, acc))
print("Pass: %d, Loss: %f, Train Accuray: %f\n" %
(pass_id, np.mean(train_losses), np.mean(train_accs)))
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
# evaluation
if args.with_test:
pass_test_acc = test(exe)
exit(0)
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- resnet Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == '__main__':
model_map = {
'resnet_imagenet': resnet_imagenet,
'resnet_cifar10': resnet_cifar10
}
args = parse_args()
print_arguments(args)
if args.data_format == 'NHWC':
raise ValueError('Only support NCHW data_format now.')
if args.use_nvprof and args.device == 'GPU':
with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
run_benchmark(model_map[args.model], args)
else:
run_benchmark(model_map[args.model], args)
...@@ -29,57 +29,6 @@ import paddle.fluid as fluid ...@@ -29,57 +29,6 @@ import paddle.fluid as fluid
import paddle.batch as batch import paddle.batch as batch
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
def parse_args():
parser = argparse.ArgumentParser("Understand Sentiment by Dynamic RNN.")
parser.add_argument(
'--batch_size',
type=int,
default=32,
help='The sequence number of a batch data. (default: %(default)d)')
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test'
)
parser.add_argument(
'--iterations', type=int, default=80, help='The number of minibatches.')
parser.add_argument(
'--emb_dim',
type=int,
default=512,
help='Dimension of embedding table. (default: %(default)d)')
parser.add_argument(
'--hidden_dim',
type=int,
default=512,
help='Hidden size of lstm unit. (default: %(default)d)')
parser.add_argument(
'--pass_num',
type=int,
default=100,
help='Epoch number to train. (default: %(default)d)')
parser.add_argument(
'--device',
type=str,
default='CPU',
choices=['CPU', 'GPU'],
help='The device type.')
parser.add_argument(
'--crop_size',
type=int,
default=int(os.environ.get('CROP_SIZE', '1500')),
help='The max sentence length of input. Since this model use plain RNN,'
' Gradient could be explored if sentence is too long')
parser.add_argument(
'--with_test',
action='store_true',
help='If set, test the testset during training.')
args = parser.parse_args()
return args
word_dict = imdb.word_dict() word_dict = imdb.word_dict()
...@@ -94,14 +43,15 @@ def crop_sentence(reader, crop_size): ...@@ -94,14 +43,15 @@ def crop_sentence(reader, crop_size):
return __impl__ return __impl__
def main(): def get_model(args):
args = parse_args() lstm_size = 512
lstm_size = args.hidden_dim emb_dim = 512
crop_size = 1500
data = fluid.layers.data( data = fluid.layers.data(
name="words", shape=[1], lod_level=1, dtype='int64') name="words", shape=[1], lod_level=1, dtype='int64')
sentence = fluid.layers.embedding( sentence = fluid.layers.embedding(
input=data, size=[len(word_dict), args.emb_dim]) input=data, size=[len(word_dict), emb_dim])
sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh')
...@@ -161,51 +111,17 @@ def main(): ...@@ -161,51 +111,17 @@ def main():
target_vars=[batch_acc, batch_size_tensor]) target_vars=[batch_acc, batch_size_tensor])
adam = fluid.optimizer.Adam() adam = fluid.optimizer.Adam()
adam.minimize(loss)
fluid.memory_optimize(fluid.default_main_program())
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
train_reader = batch( train_reader = batch(
paddle.reader.shuffle( paddle.reader.shuffle(
crop_sentence(imdb.train(word_dict), args.crop_size), crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000),
buf_size=25000), batch_size=args.batch_size)
test_reader = batch(
paddle.reader.shuffle(
crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000),
batch_size=args.batch_size) batch_size=args.batch_size)
iters, num_samples, start_time = 0, 0, time.time() return loss, inference_program, adam, train_reader, test_reader, batch_acc
for pass_id in range(args.pass_num):
train_accs = []
train_losses = []
for batch_id, data in enumerate(train_reader()):
if iters == args.skip_batch_num:
start_time = time.time()
num_samples = 0
if iters == args.iterations:
break
tensor_words = to_lodtensor([x[0] for x in data], place)
label = numpy.array([x[1] for x in data]).astype("int64")
label = label.reshape((-1, 1))
loss_np, acc, weight = exe.run(
fluid.default_main_program(),
feed={"words": tensor_words,
"label": label},
fetch_list=[loss, batch_acc, batch_size_tensor])
iters += 1
for x in data:
num_samples += len(x[0])
print(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, iters, loss_np, acc)
) # The accuracy is the accumulation of batches, but not the current batch.
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
exit(0)
def to_lodtensor(data, place): def to_lodtensor(data, place):
...@@ -221,16 +137,3 @@ def to_lodtensor(data, place): ...@@ -221,16 +137,3 @@ def to_lodtensor(data, place):
res.set(flattened_data, place) res.set(flattened_data, place)
res.set_lod([lod]) res.set_lod([lod])
return res return res
def print_arguments(args):
print('----------- lstm Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
main()
...@@ -23,46 +23,6 @@ import paddle.fluid.core as core ...@@ -23,46 +23,6 @@ import paddle.fluid.core as core
import argparse import argparse
import functools import functools
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--batch_size', type=int, default=128, help="Batch size for training.")
parser.add_argument(
'--skip_batch_num',
type=int,
default=5,
help='The first num of minibatch num to skip, for better performance test')
parser.add_argument(
'--iterations', type=int, default=80, help='The number of minibatches.')
parser.add_argument(
'--learning_rate',
type=float,
default=1e-3,
help="Learning rate for training.")
parser.add_argument('--pass_num', type=int, default=50, help="No. of passes.")
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help="The device type.")
parser.add_argument(
'--data_format',
type=str,
default='NCHW',
choices=['NCHW', 'NHWC'],
help='The data order, now only support NCHW.')
parser.add_argument(
'--data_set',
type=str,
default='cifar10',
choices=['cifar10', 'flowers'],
help='Optional dataset for benchmark.')
parser.add_argument(
'--with_test',
action='store_true',
help='If set, test the testset during training.')
args = parser.parse_args()
def vgg16_bn_drop(input): def vgg16_bn_drop(input):
def conv_block(input, num_filter, groups, dropouts): def conv_block(input, num_filter, groups, dropouts):
...@@ -91,7 +51,7 @@ def vgg16_bn_drop(input): ...@@ -91,7 +51,7 @@ def vgg16_bn_drop(input):
return fc2 return fc2
def main(): def get_model(args):
if args.data_set == "cifar10": if args.data_set == "cifar10":
classdim = 10 classdim = 10
if args.data_format == 'NCHW': if args.data_format == 'NCHW':
...@@ -128,16 +88,6 @@ def main(): ...@@ -128,16 +88,6 @@ def main():
# Optimization # Optimization
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
opts = optimizer.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program())
# Initialize executor
place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
exe = fluid.Executor(place)
# Parameter initialization
exe.run(fluid.default_startup_program())
# data reader # data reader
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -151,78 +101,4 @@ def main(): ...@@ -151,78 +101,4 @@ def main():
if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), if args.data_set == 'cifar10' else paddle.dataset.flowers.test(),
batch_size=args.batch_size) batch_size=args.batch_size)
# test return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc
def test(exe):
test_accuracy = fluid.average.WeightedAverage()
for batch_id, data in enumerate(test_reader()):
img_data = np.array(map(lambda x: x[0].reshape(data_shape),
data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])
acc, weight = exe.run(inference_program,
feed={"pixel": img_data,
"label": y_data},
fetch_list=[batch_acc, batch_size_tensor])
test_accuracy.add(value=acc, weight=weight)
return test_accuracy.eval()
iters, num_samples, start_time = 0, 0, time.time()
accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
for pass_id in range(args.pass_num):
accuracy.reset()
train_accs = []
train_losses = []
for batch_id, data in enumerate(train_reader()):
if iters == args.skip_batch_num:
start_time = time.time()
num_samples = 0
if iters == args.iterations:
break
img_data = np.array(map(lambda x: x[0].reshape(data_shape),
data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])
loss, acc, weight = train_exe.run(
feed={"pixel": img_data,
"label": y_data},
fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
])
accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
iters += 1
num_samples += len(y_data)
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
print(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, iters, loss, acc)
) # The accuracy is the accumulation of batches, but not the current batch.
# pass_train_acc = accuracy.eval()
train_losses.append(loss)
train_accs.append(acc)
print("Pass: %d, Loss: %f, Train Accuray: %f\n" %
(pass_id, np.mean(train_losses), np.mean(train_accs)))
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
# evaluation
if args.with_test:
pass_test_acc = test(exe)
exit(0)
def print_arguments():
print('----------- vgg Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == "__main__":
print_arguments()
main()
# util to check C++ file style
# * it basically use google cpplint.py.
# * It provide "add_style_check_target" for cmake.
# Usage see add_style_check_target's document
#
# TODO(yuyang18): Add python style check.
set(STYLE_FILTER)
# diable unwanted filters
# paddle do not indent public/potected/private in class
set(STYLE_FILTER "${STYLE_FILTER}-whitespace/indent,")
# paddle use mutable reference. BUT IT IS NOT RECOMMANDED
set(STYLE_FILTER "${STYLE_FILTER}-runtime/references,")
# paddle use relative path for include.
set(STYLE_FILTER "${STYLE_FILTER}-build/include,")
# paddle use <thread>, <mutex>, etc.
set(STYLE_FILTER "${STYLE_FILTER}-build/c++11,")
# paddle use c style casting. BUT IT IS NOT RECOMMANDED
set(STYLE_FILTER "${STYLE_FILTER}-readability/casting")
# IGNORE SOME FILES
set(IGNORE_PATTERN
.*ImportanceSampler.*
.*cblas\\.h.*
.*\\.pb\\.txt
.*MultiDataProvider.*
.*pb.*
.*pybind.h)
# add_style_check_target
#
# attach check code style step for target.
#
# first argument: target name to attach
# rest arguments: source list to check code style.
#
# NOTE: If WITH_STYLE_CHECK is OFF, then this macro just do nothing.
macro(add_style_check_target TARGET_NAME)
if(WITH_STYLE_CHECK)
set(SOURCES_LIST ${ARGN})
list(REMOVE_DUPLICATES SOURCES_LIST)
foreach(filename ${SOURCES_LIST})
foreach(pattern ${IGNORE_PATTERN})
if(filename MATCHES ${pattern})
list(REMOVE_ITEM SOURCES_LIST ${filename})
endif()
endforeach()
endforeach()
if(SOURCES_LIST)
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}"
${SOURCES_LIST}
COMMENT "cpplint: Checking source code style"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif()
endif()
endmacro()
...@@ -23,17 +23,20 @@ SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc) ...@@ -23,17 +23,20 @@ SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc)
SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
include(ProcessorCount)
ProcessorCount(NUM_OF_PROCESSOR)
IF(APPLE) IF(APPLE)
SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh) SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j ${NUM_OF_PROCESSOR} static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE() ELSE()
SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin) SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j ${NUM_OF_PROCESSOR} static grpc_cpp_plugin)
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
extern_grpc extern_grpc
DEPENDS protobuf zlib DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git" URL "http://paddlepaddledeps.bj.bcebos.com/grpc.tar.xz"
GIT_TAG "v1.10.x"
PREFIX ${GRPC_SOURCES_DIR} PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -206,8 +206,6 @@ function(cc_library TARGET_NAME) ...@@ -206,8 +206,6 @@ function(cc_library TARGET_NAME)
list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif() endif()
endforeach() endforeach()
add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS})
else(cc_library_SRCS) else(cc_library_SRCS)
if(cc_library_DEPS) if(cc_library_DEPS)
merge_static_libs(${TARGET_NAME} ${cc_library_DEPS}) merge_static_libs(${TARGET_NAME} ${cc_library_DEPS})
...@@ -231,7 +229,7 @@ endfunction(cc_binary) ...@@ -231,7 +229,7 @@ endfunction(cc_binary)
function(cc_test TARGET_NAME) function(cc_test TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING)
set(options "") set(options SERIAL)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -241,6 +239,9 @@ function(cc_test TARGET_NAME) ...@@ -241,6 +239,9 @@ function(cc_test TARGET_NAME)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if (${cc_test_SERIAL})
set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1)
endif()
endif() endif()
endfunction(cc_test) endfunction(cc_test)
...@@ -268,7 +269,6 @@ function(nv_library TARGET_NAME) ...@@ -268,7 +269,6 @@ function(nv_library TARGET_NAME)
list(APPEND nv_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) list(APPEND nv_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif() endif()
endforeach() endforeach()
add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS})
else(nv_library_SRCS) else(nv_library_SRCS)
if (nv_library_DEPS) if (nv_library_DEPS)
merge_static_libs(${TARGET_NAME} ${nv_library_DEPS}) merge_static_libs(${TARGET_NAME} ${nv_library_DEPS})
...@@ -295,7 +295,7 @@ endfunction(nv_binary) ...@@ -295,7 +295,7 @@ endfunction(nv_binary)
function(nv_test TARGET_NAME) function(nv_test TARGET_NAME)
if (WITH_GPU AND WITH_TESTING) if (WITH_GPU AND WITH_TESTING)
set(options "") set(options SERIAL)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -303,6 +303,9 @@ function(nv_test TARGET_NAME) ...@@ -303,6 +303,9 @@ function(nv_test TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1)
endif()
endif() endif()
endfunction(nv_test) endfunction(nv_test)
...@@ -338,7 +341,6 @@ function(hip_library TARGET_NAME) ...@@ -338,7 +341,6 @@ function(hip_library TARGET_NAME)
list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif() endif()
endforeach() endforeach()
add_style_check_target(${TARGET_NAME} ${hip_library_SRCS} ${hip_library_HEADERS})
else(hip_library_SRCS) else(hip_library_SRCS)
if (hip_library_DEPS) if (hip_library_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_DEPS}) merge_static_libs(${TARGET_NAME} ${hip_library_DEPS})
......
...@@ -52,32 +52,32 @@ function(copy TARGET) ...@@ -52,32 +52,32 @@ function(copy TARGET)
endfunction() endfunction()
# third party # third party
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/eigen3") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/eigen3")
copy(eigen3_lib copy(eigen3_lib
SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen
DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported
) )
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/gflags") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/gflags")
copy(gflags_lib copy(gflags_lib
SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES} SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
) )
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/glog") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/glog")
copy(glog_lib copy(glog_lib
SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES} SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
) )
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/boost/") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/boost/")
copy(boost_lib copy(boost_lib
SRCS ${BOOST_INCLUDE_DIR}/boost SRCS ${BOOST_INCLUDE_DIR}/boost
DSTS ${dst_dir} DSTS ${dst_dir}
) )
if(NOT PROTOBUF_FOUND) if(NOT PROTOBUF_FOUND)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/protobuf") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
copy(protobuf_lib copy(protobuf_lib
SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LIBRARY} SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LIBRARY}
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
...@@ -85,13 +85,13 @@ if(NOT PROTOBUF_FOUND) ...@@ -85,13 +85,13 @@ if(NOT PROTOBUF_FOUND)
endif() endif()
if(NOT CBLAS_FOUND) if(NOT CBLAS_FOUND)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/openblas") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/openblas")
copy(openblas_lib copy(openblas_lib
SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include
DSTS ${dst_dir} ${dst_dir} DSTS ${dst_dir} ${dst_dir}
) )
elseif (WITH_MKLML) elseif (WITH_MKLML)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/mklml") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mklml")
copy(mklml_lib copy(mklml_lib
SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR} SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR}
DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir} DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}
...@@ -99,7 +99,7 @@ elseif (WITH_MKLML) ...@@ -99,7 +99,7 @@ elseif (WITH_MKLML)
endif() endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/mkldnn") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mkldnn")
copy(mkldnn_lib copy(mkldnn_lib
SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB} SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
...@@ -107,17 +107,17 @@ if(WITH_MKLDNN) ...@@ -107,17 +107,17 @@ if(WITH_MKLDNN)
endif() endif()
if(NOT MOBILE_INFERENCE AND NOT RPI) if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy")
copy(snappy_lib copy(snappy_lib
SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES} SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib) DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream")
copy(snappystream_lib copy(snappystream_lib
SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES} SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib) DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib")
copy(zlib_lib copy(zlib_lib
SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib) DSTS ${dst_dir} ${dst_dir}/lib)
...@@ -125,7 +125,7 @@ endif() ...@@ -125,7 +125,7 @@ endif()
# paddle fluid module # paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid") set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid")
set(module "framework") set(module "framework")
copy(framework_lib DEPS framework_py_proto copy(framework_lib DEPS framework_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
...@@ -165,15 +165,16 @@ copy(pybind_lib ...@@ -165,15 +165,16 @@ copy(pybind_lib
# CMakeCache Info # CMakeCache Info
copy(cmake_cache copy(cmake_cache
SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
DSTS ${CMAKE_INSTALL_PREFIX}) DSTS ${FLUID_INSTALL_DIR})
add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep})
# paddle fluid version # paddle fluid version
execute_process( execute_process(
COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE PADDLE_GIT_COMMIT) OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
set(version_file ${CMAKE_INSTALL_PREFIX}/version.txt) set(version_file ${FLUID_INSTALL_DIR}/version.txt)
file(WRITE ${version_file} file(WRITE ${version_file}
"GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
"WITH_MKL: ${WITH_MKL}\n" "WITH_MKL: ${WITH_MKL}\n"
......
...@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") ...@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director # HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
set(IMPORT_PADDLE_STRING "")
set(IMPORT_PADDLEV2_STRING "")
configure_file( configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in" "${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py" "${BINARY_BUILD_DIR_EN}/conf.py"
...@@ -27,8 +30,6 @@ sphinx_add_target(paddle_fluid_docs ...@@ -27,8 +30,6 @@ sphinx_add_target(paddle_fluid_docs
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN}) ${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_fluid_docs gen_proto_py paddle_python)
# configured documentation tools and intermediate build results # configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
...@@ -50,6 +51,4 @@ sphinx_add_target(paddle_fluid_docs_cn ...@@ -50,6 +51,4 @@ sphinx_add_target(paddle_fluid_docs_cn
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN}) ${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_fluid_docs_cn gen_proto_py paddle_python)
add_subdirectory(api) add_subdirectory(api)
...@@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") ...@@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director # HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
set(IMPORT_PADDLE_STRING "import paddle")
set(IMPORT_PADDLEV2_STRING "import paddle.v2")
configure_file( configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in" "${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py" "${BINARY_BUILD_DIR_EN}/conf.py"
......
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
====
clip
====
ErrorClipByValue
----------------
.. autoclass:: paddle.fluid.clip.ErrorClipByValue
:members:
:noindex:
GradientClipByValue
-------------------
.. autoclass:: paddle.fluid.clip.GradientClipByValue
:members:
:noindex:
GradientClipByNorm
------------------
.. autoclass:: paddle.fluid.clip.GradientClipByNorm
:members:
:noindex:
GradientClipByGlobalNorm
------------------------
.. autoclass:: paddle.fluid.clip.GradientClipByGlobalNorm
:members:
:noindex:
append_gradient_clip_ops
------------------------
.. autofunction:: paddle.fluid.clip.append_gradient_clip_ops
:noindex:
error_clip_callback
-------------------
.. autofunction:: paddle.fluid.clip.error_clip_callback
:noindex:
...@@ -5,24 +5,3 @@ ...@@ -5,24 +5,3 @@
evaluator evaluator
========= =========
ChunkEvaluator
--------------
.. autoclass:: paddle.fluid.evaluator.ChunkEvaluator
:members:
:noindex:
EditDistance
--------------
.. autoclass:: paddle.fluid.evaluator.EditDistance
:members:
:noindex:
DetectionMAP
--------------
.. autoclass:: paddle.fluid.evaluator.DetectionMAP
:members:
:noindex:
...@@ -30,3 +30,9 @@ switch_scope ...@@ -30,3 +30,9 @@ switch_scope
.. autofunction:: paddle.fluid.executor.switch_scope .. autofunction:: paddle.fluid.executor.switch_scope
:noindex: :noindex:
fetch_var
---------
.. autofunction:: paddle.fluid.executor.fetch_var
:noindex:
#!/bin/bash #!/bin/bash
python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst
for module in io data_feeder evaluator executor initializer io nets optimizer param_attr profiler regularizer for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
do do
python gen_doc.py ${module} > ${module}.rst python gen_doc.py ${module} > ${module}.rst
done done
...@@ -9,8 +9,9 @@ Fluid ...@@ -9,8 +9,9 @@ Fluid
data_feeder.rst data_feeder.rst
executor.rst executor.rst
initializer.rst initializer.rst
evaluator.rst metrics.rst
nets.rst nets.rst
clip.rst
optimizer.rst optimizer.rst
param_attr.rst param_attr.rst
profiler.rst profiler.rst
......
...@@ -33,11 +33,16 @@ Xavier ...@@ -33,11 +33,16 @@ Xavier
:members: :members:
:noindex: :noindex:
MSRA force_init_on_cpu
------ -----------------
.. autoclass:: paddle.fluid.initializer.MSRA .. autofunction:: paddle.fluid.initializer.force_init_on_cpu
:members: :noindex:
init_on_cpu
-----------
.. autofunction:: paddle.fluid.initializer.init_on_cpu
:noindex: :noindex:
ConstantInitializer ConstantInitializer
...@@ -68,9 +73,3 @@ XavierInitializer ...@@ -68,9 +73,3 @@ XavierInitializer
:members: :members:
:noindex: :noindex:
MSRAInitializer
-----------------
.. autoclass:: paddle.fluid.initializer.MSRAInitializer
:members:
:noindex:
...@@ -55,6 +55,13 @@ While ...@@ -55,6 +55,13 @@ While
:members: :members:
:noindex: :noindex:
Switch
------
.. autoclass:: paddle.fluid.layers.Switch
:members:
:noindex:
lod_rank_table lod_rank_table
-------------- --------------
...@@ -67,12 +74,6 @@ max_sequence_len ...@@ -67,12 +74,6 @@ max_sequence_len
.. autofunction:: paddle.fluid.layers.max_sequence_len .. autofunction:: paddle.fluid.layers.max_sequence_len
:noindex: :noindex:
topk
----
.. autofunction:: paddle.fluid.layers.topk
:noindex:
lod_tensor_to_array lod_tensor_to_array
------------------- -------------------
...@@ -109,6 +110,12 @@ less_than ...@@ -109,6 +110,12 @@ less_than
.. autofunction:: paddle.fluid.layers.less_than .. autofunction:: paddle.fluid.layers.less_than
:noindex: :noindex:
equal
-----
.. autofunction:: paddle.fluid.layers.equal
:noindex:
array_read array_read
---------- ----------
...@@ -212,6 +219,42 @@ Send ...@@ -212,6 +219,42 @@ Send
.. autofunction:: paddle.fluid.layers.Send .. autofunction:: paddle.fluid.layers.Send
:noindex: :noindex:
open_recordio_file
------------------
.. autofunction:: paddle.fluid.layers.open_recordio_file
:noindex:
open_files
----------
.. autofunction:: paddle.fluid.layers.open_files
:noindex:
read_file
---------
.. autofunction:: paddle.fluid.layers.read_file
:noindex:
shuffle
-------
.. autofunction:: paddle.fluid.layers.shuffle
:noindex:
batch
-----
.. autofunction:: paddle.fluid.layers.batch
:noindex:
double_buffer
-------------
.. autofunction:: paddle.fluid.layers.double_buffer
:noindex:
nn nn
== ==
...@@ -281,12 +324,6 @@ square_error_cost ...@@ -281,12 +324,6 @@ square_error_cost
.. autofunction:: paddle.fluid.layers.square_error_cost .. autofunction:: paddle.fluid.layers.square_error_cost
:noindex: :noindex:
accuracy
--------
.. autofunction:: paddle.fluid.layers.accuracy
:noindex:
chunk_eval chunk_eval
---------- ----------
...@@ -311,6 +348,18 @@ sequence_pool ...@@ -311,6 +348,18 @@ sequence_pool
.. autofunction:: paddle.fluid.layers.sequence_pool .. autofunction:: paddle.fluid.layers.sequence_pool
:noindex: :noindex:
sequence_softmax
----------------
.. autofunction:: paddle.fluid.layers.sequence_softmax
:noindex:
softmax
-------
.. autofunction:: paddle.fluid.layers.softmax
:noindex:
pool2d pool2d
------ ------
...@@ -323,12 +372,6 @@ batch_norm ...@@ -323,12 +372,6 @@ batch_norm
.. autofunction:: paddle.fluid.layers.batch_norm .. autofunction:: paddle.fluid.layers.batch_norm
:noindex: :noindex:
layer_norm
----------
.. autofunction:: paddle.fluid.layers.layer_norm
:noindex:
beam_search_decode beam_search_decode
------------------ ------------------
...@@ -377,6 +420,12 @@ reduce_min ...@@ -377,6 +420,12 @@ reduce_min
.. autofunction:: paddle.fluid.layers.reduce_min .. autofunction:: paddle.fluid.layers.reduce_min
:noindex: :noindex:
reduce_prod
-----------
.. autofunction:: paddle.fluid.layers.reduce_prod
:noindex:
sequence_first_step sequence_first_step
------------------- -------------------
...@@ -425,6 +474,12 @@ matmul ...@@ -425,6 +474,12 @@ matmul
.. autofunction:: paddle.fluid.layers.matmul .. autofunction:: paddle.fluid.layers.matmul
:noindex: :noindex:
topk
----
.. autofunction:: paddle.fluid.layers.topk
:noindex:
warpctc warpctc
------- -------
...@@ -473,6 +528,60 @@ multiplex ...@@ -473,6 +528,60 @@ multiplex
.. autofunction:: paddle.fluid.layers.multiplex .. autofunction:: paddle.fluid.layers.multiplex
:noindex: :noindex:
layer_norm
----------
.. autofunction:: paddle.fluid.layers.layer_norm
:noindex:
softmax_with_cross_entropy
--------------------------
.. autofunction:: paddle.fluid.layers.softmax_with_cross_entropy
:noindex:
smooth_l1
---------
.. autofunction:: paddle.fluid.layers.smooth_l1
:noindex:
one_hot
-------
.. autofunction:: paddle.fluid.layers.one_hot
:noindex:
autoincreased_step_counter
--------------------------
.. autofunction:: paddle.fluid.layers.autoincreased_step_counter
:noindex:
reshape
-------
.. autofunction:: paddle.fluid.layers.reshape
:noindex:
lod_reset
---------
.. autofunction:: paddle.fluid.layers.lod_reset
:noindex:
lrn
---
.. autofunction:: paddle.fluid.layers.lrn
:noindex:
pad
---
.. autofunction:: paddle.fluid.layers.pad
:noindex:
label_smooth label_smooth
------------ ------------
...@@ -480,12 +589,12 @@ label_smooth ...@@ -480,12 +589,12 @@ label_smooth
:noindex: :noindex:
roi_pool roi_pool
--------- --------
.. autofunction:: paddle.fluid.layers.roi_pool .. autofunction:: paddle.fluid.layers.roi_pool
:noindex: :noindex:
ops ops
=== ===
...@@ -501,18 +610,6 @@ mul ...@@ -501,18 +610,6 @@ mul
.. autofunction:: paddle.fluid.layers.mul .. autofunction:: paddle.fluid.layers.mul
:noindex: :noindex:
reshape
-------
.. autofunction:: paddle.fluid.layers.reshape
:noindex:
pad
---
.. autofunction:: paddle.fluid.layers.pad
:noindex:
scale scale
----- -----
...@@ -579,10 +676,70 @@ clip_by_norm ...@@ -579,10 +676,70 @@ clip_by_norm
.. autofunction:: paddle.fluid.layers.clip_by_norm .. autofunction:: paddle.fluid.layers.clip_by_norm
:noindex: :noindex:
sequence_softmax logical_and
---------------- -----------
.. autofunction:: paddle.fluid.layers.sequence_softmax .. autofunction:: paddle.fluid.layers.logical_and
:noindex:
logical_or
----------
.. autofunction:: paddle.fluid.layers.logical_or
:noindex:
logical_xor
-----------
.. autofunction:: paddle.fluid.layers.logical_xor
:noindex:
logical_not
-----------
.. autofunction:: paddle.fluid.layers.logical_not
:noindex:
uniform_random
--------------
.. autofunction:: paddle.fluid.layers.uniform_random
:noindex:
uniform_random_batch_size_like
------------------------------
.. autofunction:: paddle.fluid.layers.uniform_random_batch_size_like
:noindex:
gaussian_random
---------------
.. autofunction:: paddle.fluid.layers.gaussian_random
:noindex:
gaussian_random_batch_size_like
-------------------------------
.. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like
:noindex:
cumsum
------
.. autofunction:: paddle.fluid.layers.cumsum
:noindex:
scatter
-------
.. autofunction:: paddle.fluid.layers.scatter
:noindex:
sum
---
.. autofunction:: paddle.fluid.layers.sum
:noindex: :noindex:
sigmoid sigmoid
...@@ -651,6 +808,18 @@ floor ...@@ -651,6 +808,18 @@ floor
.. autofunction:: paddle.fluid.layers.floor .. autofunction:: paddle.fluid.layers.floor
:noindex: :noindex:
cos
---
.. autofunction:: paddle.fluid.layers.cos
:noindex:
sin
---
.. autofunction:: paddle.fluid.layers.sin
:noindex:
round round
----- -----
...@@ -828,4 +997,15 @@ topk ...@@ -828,4 +997,15 @@ topk
.. autofunction:: paddle.fluid.layers.topk .. autofunction:: paddle.fluid.layers.topk
:noindex: :noindex:
dice_loss
----
.. autofunction:: paddle.fluid.layers.dice_loss
:noindex:
bilinear_interp
____
.. autofunction:: paddle.fluid.layers.bilinear_interp
:noindex:
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
=======
metrics
=======
MetricBase
----------
.. autoclass:: paddle.fluid.metrics.MetricBase
:members:
:noindex:
CompositeMetric
---------------
.. autoclass:: paddle.fluid.metrics.CompositeMetric
:members:
:noindex:
Accuracy
--------
.. autoclass:: paddle.fluid.metrics.Accuracy
:members:
:noindex:
ChunkEvaluator
--------------
.. autoclass:: paddle.fluid.metrics.ChunkEvaluator
:members:
:noindex:
EditDistance
------------
.. autoclass:: paddle.fluid.metrics.EditDistance
:members:
:noindex:
DetectionMAP
------------
.. autoclass:: paddle.fluid.metrics.DetectionMAP
:members:
:noindex:
Auc
---
.. autoclass:: paddle.fluid.metrics.Auc
:members:
:noindex:
...@@ -47,6 +47,28 @@ DecayedAdagrad ...@@ -47,6 +47,28 @@ DecayedAdagrad
:members: :members:
:noindex: :noindex:
Adadelta
-----------------
.. autoclass:: paddle.fluid.optimizer.Adadelta
:members:
:noindex:
RMSProp
-----------------
.. autoclass:: paddle.fluid.optimizer.RMSProp
:members:
:noindex:
ModelAverage
-----------------
.. autoclass:: paddle.fluid.optimizer.ModelAverage
:members:
:noindex:
SGDOptimizer SGDOptimizer
------------ ------------
...@@ -89,9 +111,25 @@ DecayedAdagradOptimizer ...@@ -89,9 +111,25 @@ DecayedAdagradOptimizer
:members: :members:
:noindex: :noindex:
Adadelta
-------------- AdadeltaOptimizer
-----------------
.. autoclass:: paddle.fluid.optimizer.AdadeltaOptimizer .. autoclass:: paddle.fluid.optimizer.AdadeltaOptimizer
:members: :members:
:noindex: :noindex:
RMSPropOptimizer
-----------------
.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
:members:
:noindex:
Optimizer
---------
.. autoclass:: paddle.fluid.optimizer.Optimizer
:members:
:noindex:
...@@ -11,6 +11,13 @@ append_regularization_ops ...@@ -11,6 +11,13 @@ append_regularization_ops
.. autofunction:: paddle.fluid.regularizer.append_regularization_ops .. autofunction:: paddle.fluid.regularizer.append_regularization_ops
:noindex: :noindex:
WeightDecayRegularizer
----------------------
.. autoclass:: paddle.fluid.regularizer.WeightDecayRegularizer
:members:
:noindex:
L1Decay L1Decay
------- -------
...@@ -26,15 +33,16 @@ L2Decay ...@@ -26,15 +33,16 @@ L2Decay
:noindex: :noindex:
L1DecayRegularizer L1DecayRegularizer
--------------------- ------------------
.. autoclass:: paddle.fluid.regularizer.L1DecayRegularizer .. autoclass:: paddle.fluid.regularizer.L1DecayRegularizer
:members: :members:
:noindex: :noindex:
L2DecayRegularizer L2DecayRegularizer
--------------------- ------------------
.. autoclass:: paddle.fluid.regularizer.L2DecayRegularizer .. autoclass:: paddle.fluid.regularizer.L2DecayRegularizer
:members: :members:
:noindex: :noindex:
...@@ -3,5 +3,6 @@ ...@@ -3,5 +3,6 @@
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
optimization/index_cn.rst optimization/index_cn.rst
inference/inference_support_in_fluid.md
...@@ -5,3 +5,4 @@ HOW TO ...@@ -5,3 +5,4 @@ HOW TO
:maxdepth: 1 :maxdepth: 1
optimization/index_en.rst optimization/index_en.rst
inference/inference_support_in_fluid.md
# Fluid Inference使用指南
## 目录:
- Python Inference API
- 编译Fluid Inference库
- Inference C++ API
- Inference实例
- Inference计算优化
## Python Inference API **[改进中]**
- 保存Inference模型 ([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/io.py#L295))
```python
def save_inference_model(dirname,
feeded_var_names,
target_vars,
executor,
main_program=None,
model_filename=None,
params_filename=None):
```
Inference模型和参数将会保存到`dirname`目录下:
- 序列化的模型
- `model_filename``None`,保存到`dirname/__model__`
- `model_filename``None`,保存到`dirname/model_filename`
- 参数
- `params_filename``None`,单独保存到各个独立的文件,各文件以参数变量的名字命名
- `params_filename``None`,保存到`dirname/params_filename`
- 两种存储格式
- 参数保存到各个独立的文件
- 如,设置`model_filename``None``params_filename``None`
```bash
$ cd recognize_digits_conv.inference.model
$ ls
$ __model__ batch_norm_1.w_0 batch_norm_1.w_2 conv2d_2.w_0 conv2d_3.w_0 fc_1.w_0 batch_norm_1.b_0 batch_norm_1.w_1 conv2d_2.b_0 conv2d_3.b_0 fc_1.b_0
```
- 参数保存到同一个文件
- 如,设置`model_filename``None``params_filename``__params__`
```bash
$ cd recognize_digits_conv.inference.model
$ ls
$ __model__ __params__
```
- 加载Inference模型([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/io.py#L380))
```python
def load_inference_model(dirname,
executor,
model_filename=None,
params_filename=None):
...
return [program, feed_target_names, fetch_targets]
```
## 编译Fluid Inference库
- **不需要额外的CMake选项**
- 1、 配置CMake命令,更多配置请参考[源码编译PaddlePaddle](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/build_from_source_cn.html)
```bash
$ git clone https://github.com/PaddlePaddle/Paddle.git
$ cd Paddle
$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX=your/path/to/paddle_inference_lib \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_PYTHON=ON \
-DWITH_MKL=OFF \
-DWITH_GPU=OFF \
..
```
- 2、 编译PaddlePaddle
```bash
$ make
```
- 3、 部署。执行如下命令将PaddlePaddle Fluid Inference库部署到`your/path/to/paddle_inference_lib`目录。
```bash
$ make inference_lib_dist
```
- 目录结构
```bash
$ cd your/path/to/paddle_inference_lib
$ tree
.
|-- paddle
| `-- fluid
| |-- framework
| |-- inference
| | |-- io.h
| | `-- libpaddle_fluid.so
| |-- memory
| |-- platform
| `-- string
|-- third_party
| |-- eigen3
| `-- install
| |-- gflags
| |-- glog
| `-- protobuf
`-- ...
```
假设`PADDLE_ROOT=your/path/to/paddle_inference_lib`
## 链接Fluid Inference库
- 示例项目([链接](https://github.com/luotao1/fluid_inference_example.git))
- GCC配置
```bash
$ g++ -o a.out -std=c++11 main.cc \
-I${PADDLE_ROOT}/ \
-I${PADDLE_ROOT}/third_party/install/gflags/include \
-I${PADDLE_ROOT}/third_party/install/glog/include \
-I${PADDLE_ROOT}/third_party/install/protobuf/include \
-I${PADDLE_ROOT}/third_party/eigen3 \
-L${PADDLE_ROOT}/paddle/fluid/inference -lpaddle_fluid \
-lrt -ldl -lpthread
```
- CMake配置
```cmake
include_directories(${PADDLE_ROOT}/)
include_directories(${PADDLE_ROOT}/third_party/install/gflags/include)
include_directories(${PADDLE_ROOT}/third_party/install/glog/include)
include_directories(${PADDLE_ROOT}/third_party/install/protobuf/include)
include_directories(${PADDLE_ROOT}/third_party/eigen3)
target_link_libraries(${TARGET_NAME}
${PADDLE_ROOT}/paddle/fluid/inference/libpaddle_fluid.so
-lrt -ldl -lpthread)
```
- 设置环境变量:
`export LD_LIBRARY_PATH=${PADDLE_ROOT}/paddle/fluid/inference:$LD_LIBRARY_PATH`
## C++ Inference API
- 推断流程([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/test_helper.h#L91))
- 1、 初始化设备
```cpp
#include "paddle/fluid/framework/init.h"
paddle::framework::InitDevices(false);
```
- 2、 定义place,executor,scope
```cpp
auto place = paddle::platform::CPUPlace();
auto executor = paddle::framework::Executor(place);
auto* scope = new paddle::framework::Scope();
```
- 3、 加载模型
```cpp
#include "paddle/fluid/inference/io.h"
auto inference_program = paddle::inference::Load(executor, *scope, dirname);
// or
auto inference_program = paddle::inference::Load(executor,
*scope,
dirname + "/" + model_filename,
dirname + "/" + params_filename);
```
- 4、 获取`feed_target_names``fetch_target_names`
```cpp
const std::vector<std::string>& feed_target_names = inference_program->GetFeedTargetNames();
const std::vector<std::string>& fetch_target_names = inference_program->GetFetchTargetNames();
```
- 5、 准备`feed`数据
```cpp
#include "paddle/fluid/framework/lod_tensor.h"
std::vector<paddle::framework::LoDTensor*> cpu_feeds;
...
std::map<std::string, const paddle::framework::LoDTensor*> feed_targets;
for (size_t i = 0; i < feed_target_names.size(); ++i) {
// Please make sure that cpu_feeds[i] is right for feed_target_names[i]
feed_targets[feed_target_names[i]] = cpu_feeds[i];
}
```
- 6、 定义`Tensor``fetch`结果
```cpp
std::vector<paddle::framework::LoDTensor*> cpu_fetchs;
std::map<std::string, paddle::framework::LoDTensor*> fetch_targets;
for (size_t i = 0; i < fetch_target_names.size(); ++i) {
fetch_targets[fetch_target_names[i]] = cpu_fetchs[i];
}
```
- 7、 执行`inference_program`
```cpp
executor.Run(*inference_program, scope, feed_targets, fetch_targets);
```
- 8、 使用`fetch`数据
```cpp
for (size_t i = 0; i < cpu_fetchs.size(); ++i) {
std::cout << "lod_i: " << cpu_fetchs[i]->lod();
std::cout << "dims_i: " << cpu_fetchs[i]->dims();
std::cout << "result:";
float* output_ptr = cpu_fetchs[i]->data<float>();
for (int j = 0; j < cpu_fetchs[i]->numel(); ++j) {
std::cout << " " << output_ptr[j];
}
std::cout << std::endl;
}
```
针对不同的数据,4. - 8.可执行多次。
- 9、 释放内存
```cpp
delete scope;
```
- 接口说明
```cpp
void Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>& feed_targets,
std::map<std::string, LoDTensor*>& fetch_targets,
bool create_vars = true,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");
```
- 使用Python API `save_inference_model`保存的`program`里面包含了`feed_op``fetch_op`,用户提供的`feed_targets``fetch_targets`必须和`inference_program`中的`feed_op``fetch_op`保持一致。
- 用户提供的`feed_holder_name``fetch_holder_name`也必须和`inference_program``feed_op``fetch_op`保持一致,可使用`SetFeedHolderName``SetFetchHolderName`接口重新设置`inferece_program`
- 默认情况下,除了`persistable`属性设置为`True``Variable`之外,每次执行`executor.Run`会创建一个局部`Scope`,并且在这个局部`Scope`中创建和销毁所有的`Variable`,以最小化空闲时的内存占用。
- `persistable`属性为`True``Variable`有:
- Operators的参数`w``b`
- `feed_op`的输入变量
- `fetch_op`的输出变量
- **不在每次执行时创建和销毁变量
([PR](https://github.com/PaddlePaddle/Paddle/pull/9301))**
- 执行`inference_program`
```cpp
// Call once
executor.CreateVariables(*inference_program, scope, 0);
// Call as many times as you like
executor.Run(
*inference_program, scope, feed_targets, fetch_targets, false);
```
- **优点**
- 节省了频繁创建、销毁变量的时间(约占每次`Run`总时间的1% ~ 12%)
- 执行结束后可获取所有Operators的计算结果
- **缺点**
- 空闲时也会占用大量的内存
- 在同一个`Scope`中,相同的变量名是公用同一块内存的,容易引起意想不到的错误
- **不在每次执行时创建Op([PR](https://github.com/PaddlePaddle/Paddle/pull/9630))**
- 执行`inference_program`
```cpp
// Call once
auto ctx = executor.Prepare(*inference_program, 0);
// Call as many times as you like if you have no need to change the inference_program
executor.RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets);
```
- **优点**
- 节省了频繁创建、销毁Op的时间
- **缺点**
- 一旦修改了`inference_program`,则需要重新创建`ctx`
- **多线程共享Parameters([链接](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/test_multi_thread_helper.h))**
- 主线程
- 1、 初始化设备
- 2、 定义`place``executor``scope`
- 3、 加载模型,得到`inference_program`
- 从线程
- **复制`inference_program`得到`copy_program`,修改`copy_program`的`feed_holder_name`和`fetch_holder_name`**
```cpp
auto copy_program = std::unique_ptr<paddle::framework::ProgramDesc>(
new paddle::framework::ProgramDesc(*inference_program));
std::string feed_holder_name = "feed_" + paddle::string::to_string(thread_id);
std::string fetch_holder_name = "fetch_" + paddle::string::to_string(thread_id);
copy_program->SetFeedHolderName(feed_holder_name);
copy_program->SetFetchHolderName(fetch_holder_name);
```
- 4、 获取`copy_program``feed_target_names``fetch_target_names`
- 5、 准备feed数据,定义Tensor来fetch结果
- 6、 执行`copy_program`
```cpp
executor->Run(*copy_program, scope, feed_targets, fetch_targets, true, feed_holder_name, fetch_holder_name);
```
- 7、 使用fetch数据
- 主线程
- 8、 释放资源
- 基本概念
- 数据相关:
- [Tensor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/tensor.md),一个N维数组,数据可以是任意类型(int,float,double等)
- [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md),带LoD(Level-of-Detail)即序列信息的Tensor
- [Scope](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md),记录了变量Variable
- 执行相关:
- [Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/executor.md),无状态执行器,只跟设备相关
- Place
- CPUPlace,CPU设备
- CUDAPlace,CUDA GPU设备
- 神经网络表示:
- [Program](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/program.md).
详细介绍请参考[**Paddle Fluid开发者指南**](https://github.com/lcy-seso/learning_notes/blob/master/Fluid/developer's_guid_for_Fluid/Developer's_Guide_to_Paddle_Fluid.md)
## Inference实例
1. fit a line: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_fit_a_line.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc)
1. image classification: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_image_classification.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_image_classification.cc)
1. label semantic roles: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_label_semantic_roles.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_label_semantic_roles.cc)
1. recognize digits: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recognize_digits.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_recognize_digits.cc)
1. recommender system: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_recommender_system.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_recommender_system.cc)
1. understand sentiment: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_understand_sentiment.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_understand_sentiment.cc)
1. word2vec: [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/tests/book/test_word2vec.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/tests/book/test_inference_word2vec.cc)
## Inference计算优化
- 使用Python推理优化工具([inference_transpiler](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/inference_transpiler.py))
```python
class InferenceTranspiler:
def transpile(self, program, place, scope=None):
...
if scope is None:
scope = global_scope()
...
```
- 使用`InferenceTranspiler`将会直接修改`program`
- 使用`InferenceTranspiler`会修改参数的值,请确保`program`的参数在`scope`内。
- 支持的优化
- 融合batch_norm op的计算
- 使用示例([链接](https://github.com/Xreki/Xreki.github.io/blob/master/fluid/inference/inference_transpiler.py))
```python
import paddle.fluid as fluid
# NOTE: Applying the inference transpiler will change the inference_program.
t = fluid.InferenceTranspiler()
t.transpile(inference_program, place, inference_scope)
```
## 内存使用优化
- 使用Python内存优化工具([memory_optimization_transipiler](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/memory_optimization_transpiler.py))
```python
fluid.memory_optimize(inference_program)
```
...@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") ...@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director # HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
set(IMPORT_PADDLE_STRING "")
set(IMPORT_PADDLEV2_STRING "")
configure_file( configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in" "${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py" "${BINARY_BUILD_DIR_EN}/conf.py"
...@@ -27,8 +30,6 @@ sphinx_add_target(paddle_mobile_docs ...@@ -27,8 +30,6 @@ sphinx_add_target(paddle_mobile_docs
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN}) ${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_mobile_docs gen_proto_py paddle_python)
# configured documentation tools and intermediate build results # configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
...@@ -49,5 +50,3 @@ sphinx_add_target(paddle_mobile_docs_cn ...@@ -49,5 +50,3 @@ sphinx_add_target(paddle_mobile_docs_cn
${SPHINX_CACHE_DIR_CN} ${SPHINX_CACHE_DIR_CN}
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN}) ${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_mobile_docs_cn gen_proto_py paddle_python)
移动端 移动端
===== ======
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
cross_compiling_for_android_cn.md cross_compiling_for_android_cn.md
cross_compiling_for_ios_cn.md cross_compiling_for_ios_cn.md
cross_compiling_for_raspberry_cn.md cross_compiling_for_raspberry_cn.md
\ No newline at end of file
...@@ -16,8 +16,8 @@ import os, subprocess ...@@ -16,8 +16,8 @@ import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python')) sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
import paddle @IMPORT_PADDLE_STRING@
import paddle.v2 @IMPORT_PADDLEV2_STRING@
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
......
...@@ -16,8 +16,8 @@ import os, subprocess ...@@ -16,8 +16,8 @@ import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python')) sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
import paddle @IMPORT_PADDLE_STRING@
import paddle.v2 @IMPORT_PADDLEV2_STRING@
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
......
...@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") ...@@ -15,6 +15,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director # HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
set(IMPORT_PADDLE_STRING "")
set(IMPORT_PADDLEV2_STRING "")
configure_file( configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in" "${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py" "${BINARY_BUILD_DIR_EN}/conf.py"
...@@ -27,8 +30,6 @@ sphinx_add_target(paddle_v2_docs ...@@ -27,8 +30,6 @@ sphinx_add_target(paddle_v2_docs
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN}) ${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_v2_docs gen_proto_py paddle_python)
# configured documentation tools and intermediate build results # configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
...@@ -50,6 +51,4 @@ sphinx_add_target(paddle_v2_docs_cn ...@@ -50,6 +51,4 @@ sphinx_add_target(paddle_v2_docs_cn
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN}) ${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_v2_docs_cn gen_proto_py paddle_python)
add_subdirectory(api) add_subdirectory(api)
...@@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") ...@@ -7,6 +7,9 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output director # HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
set(IMPORT_PADDLE_STRING "import paddle")
set(IMPORT_PADDLEV2_STRING "import paddle.v2")
configure_file( configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in" "${CMAKE_CURRENT_SOURCE_DIR}/../../templates/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py" "${BINARY_BUILD_DIR_EN}/conf.py"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
---------------- ----------------
PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像 PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境Docker镜像
可以在 `这里 <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ 找到,您也可以 可以在 `这里 <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`__ 找到,您也可以
在 `这里 <https://github.com/PaddlePaddle/Paddle/tree/develop/tools/manylinux1/>`_ 找到 paddle_manylinux_devel 在 `这里 <https://github.com/PaddlePaddle/Paddle/tree/develop/tools/manylinux1/>`__ 找到 paddle_manylinux_devel
镜像的编译以及使用方法。或者参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。 镜像的编译以及使用方法。或者参考下述可选步骤,从源码中构建用于编译PaddlePaddle的Docker镜像。
如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。 如果您选择不使用Docker镜像,则需要在本机安装下面章节列出的 `编译依赖`_ 之后才能开始编译的步骤。
...@@ -35,7 +35,7 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 ...@@ -35,7 +35,7 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
# 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像 # 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像
docker build -t paddle:dev . docker build -t paddle:dev .
# 3. 执行下面的命令编译CPU-Only的二进制 # 3. 执行下面的命令编译CPU-Only的二进制
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
# 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步) # 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步)
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
...@@ -116,11 +116,10 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安 ...@@ -116,11 +116,10 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行 很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行
```emacs .. code-block:: emacs
(global-set-key "\C-cc" 'compile)
(setq compile-command (global-set-key "\C-cc" 'compile)
"docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") (setq compile-command "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
```
就可以按 `Ctrl-C` 和 `c` 键来启动编译了。 就可以按 `Ctrl-C` 和 `c` 键来启动编译了。
......
...@@ -23,7 +23,7 @@ You need to use Docker to build PaddlePaddle ...@@ -23,7 +23,7 @@ You need to use Docker to build PaddlePaddle
to avoid installing dependencies by yourself. We have several pre-built to avoid installing dependencies by yourself. We have several pre-built
Docker images `here <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ , Docker images `here <https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/>`_ ,
you can also find how to build and use paddle_manylinux_devel Docker image from you can also find how to build and use paddle_manylinux_devel Docker image from
`here <https://github.com/PaddlePaddle/Paddle/tree/develop/tools/manylinux1/>`_ `here <https://github.com/PaddlePaddle/Paddle/tree/develop/tools/manylinux1/>`__
Or you can build your own image from source as the optional step below: Or you can build your own image from source as the optional step below:
.. code-block:: bash .. code-block:: bash
...@@ -34,7 +34,7 @@ Or you can build your own image from source as the optional step below: ...@@ -34,7 +34,7 @@ Or you can build your own image from source as the optional step below:
# 2. Optional: build development docker image from source # 2. Optional: build development docker image from source
docker build -t paddle:dev . docker build -t paddle:dev .
# 3. Run the following command to build a CPU-Only binaries # 3. Run the following command to build a CPU-Only binaries
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
# 4. Or, use your built Docker image to build PaddlePaddle (must run step 2) # 4. Or, use your built Docker image to build PaddlePaddle (must run step 2)
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
...@@ -88,7 +88,7 @@ If you wish to run only one unit test, like :code:`test_sum_op`: ...@@ -88,7 +88,7 @@ If you wish to run only one unit test, like :code:`test_sum_op`:
.. _faq_docker: .. _faq_docker:
Frequently Asked Questions Frequently Asked Questions
---------------- ---------------------------
- What is Docker? - What is Docker?
...@@ -118,11 +118,10 @@ Frequently Asked Questions ...@@ -118,11 +118,10 @@ Frequently Asked Questions
Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file: Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file:
```emacs .. code-block:: emacs
(global-set-key "\C-cc" 'compile)
(setq compile-command (global-set-key "\C-cc" 'compile)
"docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") (setq compile-command "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev")
```
so they could type `Ctrl-C` and `c` to build PaddlePaddle from source. so they could type `Ctrl-C` and `c` to build PaddlePaddle from source.
...@@ -145,7 +144,7 @@ Frequently Asked Questions ...@@ -145,7 +144,7 @@ Frequently Asked Questions
.. _compile_deps: .. _compile_deps:
Appendix: Compile Dependencies Appendix: Compile Dependencies
---------------- -------------------------------
PaddlePaddle need the following dependencies when compiling, other dependencies PaddlePaddle need the following dependencies when compiling, other dependencies
will be downloaded automatically. will be downloaded automatically.
...@@ -166,11 +165,11 @@ will be downloaded automatically. ...@@ -166,11 +165,11 @@ will be downloaded automatically.
.. _build_options: .. _build_options:
Appendix: Build Options Appendix: Build Options
---------------- -------------------------
Build options include whether build binaries for CPU or GPU, which BLAS Build options include whether build binaries for CPU or GPU, which BLAS
library to use etc. You may pass these settings when running cmake. library to use etc. You may pass these settings when running cmake.
For detailed cmake tutorial please refer to `here <https://cmake.org/cmake-tutorial>`_ 。 For detailed cmake tutorial please refer to `here <https://cmake.org/cmake-tutorial>`__
You can add :code:`-D` argument to pass such options, like: You can add :code:`-D` argument to pass such options, like:
...@@ -219,7 +218,7 @@ keep on with latest cuDNN versions. Be sure to run with the same version of cuDN ...@@ -219,7 +218,7 @@ keep on with latest cuDNN versions. Be sure to run with the same version of cuDN
you built. you built.
Pass Compile Options Pass Compile Options
++++++++++++++ ++++++++++++++++++++++
You can pass compile options to use intended BLAS/CUDA/Cudnn libraries. You can pass compile options to use intended BLAS/CUDA/Cudnn libraries.
When running cmake command, it will search system paths like When running cmake command, it will search system paths like
......
...@@ -73,6 +73,7 @@ ...@@ -73,6 +73,7 @@
当然,您也可以进入到Docker容器中,以交互式的方式执行或调试您的代码: 当然,您也可以进入到Docker容器中,以交互式的方式执行或调试您的代码:
.. code-block:: bash .. code-block:: bash
docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash
cd /work cd /work
python train.py python train.py
......
...@@ -80,6 +80,7 @@ Also, you can go into the container shell, run or debug your code ...@@ -80,6 +80,7 @@ Also, you can go into the container shell, run or debug your code
interactively: interactively:
.. code-block:: bash .. code-block:: bash
docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash
cd /work cd /work
python train.py python train.py
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
PaddlePaddle针对不同的用户群体提供了多种安装方式。 PaddlePaddle针对不同的用户群体提供了多种安装方式。
专注深度学习模型开发 专注深度学习模型开发
----------------- --------------------
PaddlePaddle提供了多种python wheel包,可通过pip一键安装: PaddlePaddle提供了多种python wheel包,可通过pip一键安装:
...@@ -18,7 +18,7 @@ PaddlePaddle提供了多种python wheel包,可通过pip一键安装: ...@@ -18,7 +18,7 @@ PaddlePaddle提供了多种python wheel包,可通过pip一键安装:
这是最便捷的安装方式,请根据机器配置和系统选择对应的安装包。 这是最便捷的安装方式,请根据机器配置和系统选择对应的安装包。
关注底层框架 关注底层框架
---------- -------------
PaddlePaddle提供了基于Docker的安装方式,请参照以下教程: PaddlePaddle提供了基于Docker的安装方式,请参照以下教程:
...@@ -45,7 +45,7 @@ PaddlePaddle提供了基于Docker的安装方式,请参照以下教程: ...@@ -45,7 +45,7 @@ PaddlePaddle提供了基于Docker的安装方式,请参照以下教程:
常见问题汇总 常见问题汇总
----------- --------------
如果在安装过程中遇到了问题,请先尝试在下面的页面寻找答案: 如果在安装过程中遇到了问题,请先尝试在下面的页面寻找答案:
......
install and Compile install and Compile
========== ======================
.. _install_steps: .. _install_steps:
PaddlePaddle provides various methods of installation for many different users PaddlePaddle provides various methods of installation for many different users
Focus on Deep Learning Model Development Focus on Deep Learning Model Development
----------------- ----------------------------------------
PaddlePaddle provides lots of packages of python wheel , that pip can install: PaddlePaddle provides lots of packages of python wheel , that pip can install:
...@@ -18,7 +18,7 @@ PaddlePaddle provides lots of packages of python wheel , that pip can install: ...@@ -18,7 +18,7 @@ PaddlePaddle provides lots of packages of python wheel , that pip can install:
This is the most convenient way of installation. Please choose the right installation package with machine configure and system. This is the most convenient way of installation. Please choose the right installation package with machine configure and system.
Follow the Bottom Frame Follow the Bottom Frame
---------- ------------------------
PaddlePaddle also supports installation using Docker. Please refer to the tutorial below: PaddlePaddle also supports installation using Docker. Please refer to the tutorial below:
......
...@@ -55,11 +55,11 @@ paddlepaddle-gpu==0.11.0 使用CUDA 7.5和cuDNN 5编译的0.11.0版 ...@@ -55,11 +55,11 @@ paddlepaddle-gpu==0.11.0 使用CUDA 7.5和cuDNN 5编译的0.11.0版
:header: "版本说明", "cp27-cp27mu", "cp27-cp27m" :header: "版本说明", "cp27-cp27mu", "cp27-cp27m"
:widths: 1, 3, 3 :widths: 1, 3, 3
"cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_" "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_" "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_" "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`_" "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`_" "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
.. _pip_dependency: .. _pip_dependency:
......
...@@ -58,11 +58,11 @@ If the links below shows up the login form, just click "Log in as guest" to star ...@@ -58,11 +58,11 @@ If the links below shows up the login form, just click "Log in as guest" to star
:header: "version", "cp27-cp27mu", "cp27-cp27m" :header: "version", "cp27-cp27mu", "cp27-cp27m"
:widths: 1, 3, 3 :widths: 1, 3, 3
"cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_" "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_" "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`_" "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuNoavxOpenblas/.lastSuccessful/paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`_" "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`_" "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl>`__", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl>`__"
.. _pip_dependency: .. _pip_dependency:
......
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
代码示例如下: 代码示例如下:
```python ```python
from paddle.utils.merge_model import merge_v2_modelss from paddle.utils.merge_model import merge_v2_model
from mnist_v2 import network from mnist_v2 import network
net = network(is_infer=True) net = network(is_infer=True)
......
...@@ -13,4 +13,3 @@ ...@@ -13,4 +13,3 @@
# limitations under the License. # limitations under the License.
# #
cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer) cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer)
add_style_check_target(test_cclient test_cclient.c)
...@@ -33,9 +33,6 @@ add_library(paddle_capi STATIC ${CAPI_HEADERS} ${CAPI_PRIVATE_HEADER} ...@@ -33,9 +33,6 @@ add_library(paddle_capi STATIC ${CAPI_HEADERS} ${CAPI_PRIVATE_HEADER}
target_include_directories(paddle_capi PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories(paddle_capi PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER}
${CAPI_PRIVATE_HEADER})
add_dependencies(paddle_capi paddle_proto paddle_gserver) add_dependencies(paddle_capi paddle_proto paddle_gserver)
# TODO: paddle_capi_whole will be removed. # TODO: paddle_capi_whole will be removed.
......
...@@ -87,8 +87,3 @@ else() ...@@ -87,8 +87,3 @@ else()
endif() endif()
add_dependencies(paddle_cuda paddle_proto ${external_project_dependencies}) add_dependencies(paddle_cuda paddle_proto ${external_project_dependencies})
add_style_check_target(paddle_cuda
${CUDA_SOURCES}
${CUDA_HEADERS}
${CUDA_CXX_SOURCES})
...@@ -36,5 +36,5 @@ cc_test(broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_ha ...@@ -36,5 +36,5 @@ cc_test(broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_ha
device_context broadcast_op_handle) device_context broadcast_op_handle)
cc_test(gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory cc_test(gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
device_context gather_op_handle) device_context gather_op_handle)
cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory #cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
device_context reduce_op_handle ) # device_context reduce_op_handle )
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h"
#include "paddle/fluid/framework/details/send_op_handle.h" #include "paddle/fluid/framework/details/send_op_handle.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -159,25 +160,39 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build( ...@@ -159,25 +160,39 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
if (!is_forwarding && places_.size() > 1) { if (!is_forwarding && places_.size() > 1) {
// Currently, we assume that once gradient is generated, it can be // Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once. // broadcast, and each gradient is only broadcast once.
for (auto &og : op->OutputArgumentNames()) { if (static_cast<bool>(boost::get<int>(op->GetAttr(
if (IsParameterGradientOnce(og, &og_has_been_broadcast)) { OpProtoAndCheckerMaker::OpRoleAttrName())) &
switch (strategy_.reduce_) { static_cast<int>(OpRole::kBackward))) {
case BuildStrategy::ReduceStrategy::kReduce: try {
CreateReduceOp(&result, og, cur_device_id); auto backward_vars =
var_name_on_devices[cur_device_id].emplace(og); boost::get<std::vector<std::string>>(op->GetNullableAttr(
bcast_var_name_set[cur_device_id].emplace( OpProtoAndCheckerMaker::OpRoleVarAttrName()));
og.substr(0, og.size() - strlen(kGradVarSuffix)));
cur_device_id = (cur_device_id + 1) % places_.size(); PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0);
break;
case BuildStrategy::ReduceStrategy::kAllReduce: for (size_t i = 0; i < backward_vars.size(); i += 2) {
if (IsSparseGradient(var_types, og)) { auto &p_name = backward_vars[i];
CreateReduceOp(&result, og, 0); auto &g_name = backward_vars[i + 1];
CreateBroadcastOp(&result, og, 0); VLOG(10) << "Bcast " << g_name << " for parameter " << p_name;
} else {
InsertNCCLAllReduceOp(&result, og); switch (strategy_.reduce_) {
} case BuildStrategy::ReduceStrategy::kReduce:
break; CreateReduceOp(&result, g_name, cur_device_id);
var_name_on_devices[cur_device_id].emplace(g_name);
bcast_var_name_set[cur_device_id].emplace(p_name);
cur_device_id = (cur_device_id + 1) % places_.size();
break;
case BuildStrategy::ReduceStrategy::kAllReduce:
if (IsSparseGradient(var_types, g_name)) {
CreateReduceOp(&result, g_name, 0);
CreateBroadcastOp(&result, g_name, 0);
} else {
InsertNCCLAllReduceOp(&result, g_name);
}
break;
}
} }
} catch (boost::bad_get e) {
} }
} }
} }
...@@ -398,11 +413,12 @@ void MultiDevSSAGraphBuilder::CreateSendOp(SSAGraph *result, ...@@ -398,11 +413,12 @@ void MultiDevSSAGraphBuilder::CreateSendOp(SSAGraph *result,
} }
bool MultiDevSSAGraphBuilder::IsScaleLossOp(const OpDesc &op) const { bool MultiDevSSAGraphBuilder::IsScaleLossOp(const OpDesc &op) const {
// FIXME(yy): Do not hard code like this return boost::get<int>(
return op.OutputArgumentNames().size() == 1 && op.GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) ==
op.OutputArgumentNames()[0] == GradVarName(loss_var_name_); (static_cast<int>(OpRole::kBackward) |
static_cast<int>(OpRole::kLoss)) &&
!loss_var_name_.empty(); // If loss_var is empty. This is test mode
} }
} // namespace details } // namespace details
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -96,10 +96,7 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> { ...@@ -96,10 +96,7 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
info->proto_ = new proto::OpProto; info->proto_ = new proto::OpProto;
info->checker_ = new OpAttrChecker(); info->checker_ = new OpAttrChecker();
T maker; T maker;
maker.SetProto(info->proto_); maker(info->proto_, info->checker_);
maker.SetChecker(info->checker_);
maker.Make();
maker.Validate();
info->proto_->set_type(op_type); info->proto_->set_type(op_type);
PADDLE_ENFORCE( PADDLE_ENFORCE(
info->proto_->IsInitialized(), info->proto_->IsInitialized(),
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/shape_inference.h"
...@@ -222,6 +223,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const { ...@@ -222,6 +223,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const {
return it->second; return it->second;
} }
Attribute OpDesc::GetNullableAttr(const std::string &name) const {
auto it = attrs_.find(name);
if (it != attrs_.end()) {
return it->second;
} else {
return Attribute();
}
}
int OpDesc::GetBlockAttr(const std::string &name) const { int OpDesc::GetBlockAttr(const std::string &name) const {
auto it = attrs_.find(name); auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
...@@ -249,6 +259,13 @@ void OpDesc::RenameOutput(const std::string &old_name, ...@@ -249,6 +259,13 @@ void OpDesc::RenameOutput(const std::string &old_name,
std::replace(output.second.begin(), output.second.end(), old_name, std::replace(output.second.begin(), output.second.end(), old_name,
new_name); new_name);
} }
auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName());
if (it != attrs_.end()) {
auto &op_vars = boost::get<std::vector<std::string>>(it->second);
std::replace(op_vars.begin(), op_vars.end(), old_name, new_name);
}
need_update_ = true; need_update_ = true;
} }
......
...@@ -78,6 +78,8 @@ class OpDesc { ...@@ -78,6 +78,8 @@ class OpDesc {
Attribute GetAttr(const std::string &name) const; Attribute GetAttr(const std::string &name) const;
Attribute GetNullableAttr(const std::string &name) const;
int GetBlockAttr(const std::string &name) const; int GetBlockAttr(const std::string &name) const;
void Rename(const std::string &old_name, const std::string &new_name); void Rename(const std::string &old_name, const std::string &new_name);
......
...@@ -13,6 +13,7 @@ limitations under the License. */ ...@@ -13,6 +13,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_proto_maker.h"
#include <string> #include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -55,5 +56,28 @@ void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { ...@@ -55,5 +56,28 @@ void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
} }
} }
void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
OpAttrChecker* attr_checker) {
proto_ = proto;
op_checker_ = attr_checker;
Make();
AddAttr<int>(OpRoleAttrName(), "The role of this operator")
.InEnum(
{static_cast<int>(OpRole::kForward),
static_cast<int>(OpRole::kBackward),
static_cast<int>(OpRole::kOptimize),
static_cast<int>(OpRole::kLoss) | static_cast<int>(OpRole::kForward),
static_cast<int>(OpRole::kLoss) |
static_cast<int>(OpRole::kBackward),
static_cast<int>(OpRole::kNotSpecified)})
.SetDefault(static_cast<int>(OpRole::kNotSpecified));
AddAttr<std::vector<std::string>>(OpRoleVarAttrName(),
"Optimized for variable")
.SetDefault({});
Validate();
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -20,21 +20,31 @@ limitations under the License. */ ...@@ -20,21 +20,31 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
enum class OpRole {
kForward = 0x0000,
kBackward = 0x0001,
kOptimize = 0x0002,
kLoss = 0x0100,
// The default value of op's role. This should be only used for unittests and
// CreateOp inside a operator.
kNotSpecified = 0x1000,
};
// this class not only make proto but also init attribute checkers. // this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker { class OpProtoAndCheckerMaker {
public: public:
static const char *OpRoleAttrName() { return "op_role"; }
static const char *OpRoleVarAttrName() { return "op_role_var"; }
void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
virtual void Make() = 0; virtual void Make() = 0;
virtual ~OpProtoAndCheckerMaker() { virtual ~OpProtoAndCheckerMaker() {
CHECK(validated_) << "should call Validate after build"; CHECK(validated_) << "should call Validate after build";
} }
void SetProto(proto::OpProto *proto) { proto_ = proto; }
void SetChecker(OpAttrChecker *attr_checker) { op_checker_ = attr_checker; }
void Validate();
protected: protected:
struct VariableBuilder { struct VariableBuilder {
proto::OpProto::Var *var_; proto::OpProto::Var *var_;
...@@ -76,6 +86,7 @@ class OpProtoAndCheckerMaker { ...@@ -76,6 +86,7 @@ class OpProtoAndCheckerMaker {
private: private:
void CheckNoDuplicatedInOutAttrs(); void CheckNoDuplicatedInOutAttrs();
void Validate();
proto::OpProto *proto_; proto::OpProto *proto_;
OpAttrChecker *op_checker_; OpAttrChecker *op_checker_;
......
...@@ -28,10 +28,8 @@ TEST(ProtoMaker, DuplicatedAttr) { ...@@ -28,10 +28,8 @@ TEST(ProtoMaker, DuplicatedAttr) {
paddle::framework::proto::OpProto op_proto; paddle::framework::proto::OpProto op_proto;
paddle::framework::OpAttrChecker op_checker; paddle::framework::OpAttrChecker op_checker;
TestAttrProtoMaker proto_maker; TestAttrProtoMaker proto_maker;
proto_maker.SetProto(&op_proto); ASSERT_THROW(proto_maker(&op_proto, &op_checker),
proto_maker.SetChecker(&op_checker); paddle::platform::EnforceNotMet);
proto_maker.Make();
ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
} }
class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
...@@ -46,8 +44,6 @@ TEST(ProtoMaker, DuplicatedInOut) { ...@@ -46,8 +44,6 @@ TEST(ProtoMaker, DuplicatedInOut) {
paddle::framework::proto::OpProto op_proto; paddle::framework::proto::OpProto op_proto;
paddle::framework::OpAttrChecker op_checker; paddle::framework::OpAttrChecker op_checker;
TestAttrProtoMaker proto_maker; TestAttrProtoMaker proto_maker;
proto_maker.SetProto(&op_proto); ASSERT_THROW(proto_maker(&op_proto, &op_checker),
proto_maker.SetChecker(&op_checker); paddle::platform::EnforceNotMet);
proto_maker.Make();
ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
} }
cc_library(analysis SRCS dot.cc node.cc node.h) set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(analysis SRCS dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc fluid_to_data_flow_graph_pass.cc
DEPS paddle_fluid)
cc_test(test_node SRCS node_tester.cc DEPS analysis) cc_test(test_node SRCS node_tester.cc DEPS analysis)
cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)
cc_test(test_data_flow_graph SRCS data_flow_graph_tester.cc DEPS analysis ${FLUID_CORE_MODULES} paddle_fluid
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
set_tests_properties(test_data_flow_graph PROPERTIES DEPENDS test_word2vec)
cc_test(test_subgraph_splitter
SRCS subgraph_splitter_tester.cc
DEPS analysis paddle_fluid tensor
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
set_tests_properties(test_subgraph_splitter PROPERTIES DEPENDS test_word2vec)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/dot.h"
namespace paddle {
namespace inference {
namespace analysis {
// It is a better idea that the inputs and outputs of this graph is set manully
// before, but there must be a Pass that helps to prune the unnecessary ops that
// do not contribute to the given targets, so in this pass, analysis and get the
// inputs and outputs is OK.
void DataFlowGraph::Build() {
inputs.clear();
outputs.clear();
std::unordered_set<Node *> ins;
std::unordered_set<Node *> outs;
for (auto &node : nodes.nodes()) {
for (auto *in : node->inlinks) {
ins.insert(in);
}
for (auto *out : node->outlinks) {
outs.insert(out);
}
}
// The nodes that in ins but not in outs is the graph's inputs
// similarly, the nodes that in outs but not in ins is the graphs' outputs
for (auto *in : ins) {
if (!outs.count(in)) {
inputs.push_back(in);
}
}
for (auto *out : outs) {
if (!outs.count(out)) {
outputs.push_back(out);
}
}
}
std::string DataFlowGraph::DotString() const {
Dot dot;
// Add nodes
for (size_t i = 0; i < nodes.size(); i++) {
const Node &node = nodes.Get(i);
switch (node.type()) {
case Node::Type::kValue:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunction:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunctionBlock:
dot.AddNode(node.repr(), node.dot_attrs());
break;
default:
PADDLE_THROW("unsupported Node type %d", static_cast<int>(node.type()));
}
}
// Add edges
for (size_t i = 0; i < nodes.size(); i++) {
const Node &node = nodes.Get(i);
for (auto &in : node.inlinks) {
dot.AddEdge(in->repr(), node.repr(), {});
}
}
return dot.Build();
}
//
// NodesBFSIterator
//
GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
const std::vector<Node *> &source)
: queue_(source.begin(), source.end()) {}
// GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
// GraphTraits<DataFlowGraph>::NodesBFSIterator &&other) noexcept
// : queue_(std::move(other.queue_)),
// visited_(std::move(other.visited_)) {}
GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
const GraphTraits<DataFlowGraph>::NodesBFSIterator &other)
: queue_(other.queue_), visited_(other.visited_) {}
Node &GraphTraits<DataFlowGraph>::NodesBFSIterator::operator*() {
PADDLE_ENFORCE(!queue_.empty());
return *queue_.front();
}
Node *GraphTraits<DataFlowGraph>::NodesBFSIterator::operator->() {
PADDLE_ENFORCE(!queue_.empty());
return queue_.front();
}
GraphTraits<DataFlowGraph>::NodesBFSIterator &
GraphTraits<DataFlowGraph>::NodesBFSIterator::operator=(
const GraphTraits<DataFlowGraph>::NodesBFSIterator &other) {
queue_ = other.queue_;
visited_ = other.visited_;
return *this;
}
GraphTraits<DataFlowGraph>::NodesBFSIterator
&GraphTraits<DataFlowGraph>::NodesBFSIterator::operator++() {
PADDLE_ENFORCE(!queue_.empty());
auto *cur = queue_.front();
visited_.insert(cur);
queue_.pop_front();
for (auto *output : cur->outlinks) {
if (!visited_.count(output)) {
queue_.push_back(output);
visited_.insert(output);
}
}
return *this;
}
bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==(
const GraphTraits<DataFlowGraph>::NodesBFSIterator &other) {
if (queue_.empty()) return other.queue_.empty();
if ((!queue_.empty()) && (!other.queue_.empty())) {
return queue_.front() == other.queue_.front() &&
visited_.size() == other.visited_.size(); // here need to check the
// equality of queue and
// visited. Just a light but week implementation.
}
return false;
}
//
// NodesDFSIterator
//
GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
const std::vector<Node *> &source) {
for (auto *x : source) stack_.push(x);
}
// GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
// GraphTraits<DataFlowGraph>::NodesDFSIterator &&other) noexcept
// : stack_(std::move(other.stack_)),
// visited_(std::move(other.visited_)) {}
GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
const GraphTraits<DataFlowGraph>::NodesDFSIterator &other)
: stack_(other.stack_), visited_(other.visited_) {}
Node &GraphTraits<DataFlowGraph>::NodesDFSIterator::operator*() {
PADDLE_ENFORCE(!stack_.empty());
return *stack_.top();
}
GraphTraits<DataFlowGraph>::NodesDFSIterator
&GraphTraits<DataFlowGraph>::NodesDFSIterator::operator++() {
if (stack_.empty()) return *this;
visited_.insert(stack_.top());
auto *cur = stack_.top();
stack_.pop();
for (auto *x : cur->outlinks) {
if (!visited_.count(x)) {
stack_.push(x);
visited_.insert(x);
}
}
return *this;
}
bool GraphTraits<DataFlowGraph>::NodesDFSIterator::operator==(
const GraphTraits<DataFlowGraph>::NodesDFSIterator &other) {
if (stack_.empty()) return other.stack_.empty();
if ((!stack_.empty()) && (!other.stack_.empty())) {
return stack_.top() == other.stack_.top();
}
return false;
}
GraphTraits<DataFlowGraph>::NodesDFSIterator &
GraphTraits<DataFlowGraph>::NodesDFSIterator::operator=(
const GraphTraits<DataFlowGraph>::NodesDFSIterator &other) {
stack_ = other.stack_;
visited_ = other.visited_;
return *this;
}
Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return stack_.top();
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* Data flow graph is an pass that build the basic graph. It contains a graph
* and the iterators that enable the iteration over the graph.
*/
#pragma once
#include <deque>
#include <stack>
#include <unordered_set>
#include "paddle/fluid/inference/analysis/graph_traits.h"
#include "paddle/fluid/inference/analysis/node.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* DataFlowGraph - A container of Value and Function Nodes.
*/
struct DataFlowGraph {
NodeMap nodes;
std::vector<Node *> inputs;
std::vector<Node *> outputs;
// Extract inputs and outputs of the graph.
void Build();
// Output a DOT graph file for debug.
std::string DotString() const;
};
/*
* An graph trait help to traverse the graph using BFS.
* The BFS start from a graph's inputs, the graph should be fully-connected, so
* that the iterator can reach the end.
*/
template <>
struct GraphTraits<DataFlowGraph> {
// BFS iterator on nodes.
struct NodesBFSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesBFSIterator() = default;
explicit NodesBFSIterator(const std::vector<Node *> &source);
// NodesBFSIterator(NodesBFSIterator &&other) noexcept;
// NOTE Heavy to use.
NodesBFSIterator(const NodesBFSIterator &other);
Node &operator*();
NodesBFSIterator &operator++();
Node *operator->();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesBFSIterator &operator=(const NodesBFSIterator &other);
bool operator==(const NodesBFSIterator &other);
bool operator!=(const NodesBFSIterator &other) { return !(*this == other); }
private:
std::deque<Node *> queue_;
std::unordered_set<Node *> visited_;
};
// DFS iterator on nodes.
struct NodesDFSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesDFSIterator() = default;
explicit NodesDFSIterator(const std::vector<Node *> &source);
// NodesDFSIterator(NodesDFSIterator &&other) noexcept;
NodesDFSIterator(const NodesDFSIterator &other);
Node &operator*();
NodesDFSIterator &operator++();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesDFSIterator &operator=(const NodesDFSIterator &other);
bool operator==(const NodesDFSIterator &other);
bool operator!=(const NodesDFSIterator &other) { return !(*this == other); }
Node *operator->();
private:
std::stack<Node *> stack_;
std::unordered_set<Node *> visited_;
};
explicit GraphTraits(DataFlowGraph *graph) : graph_(graph) {}
// default use BFS to visit the nodes.
iterator_range<NodesBFSIterator> nodes() {
return iterator_range<NodesBFSIterator>(nodes_bfs_begin(), nodes_bfs_end());
}
iterator_range<NodesBFSIterator> nodes_in_BFS() {
return iterator_range<NodesBFSIterator>(nodes_bfs_begin(), nodes_bfs_end());
}
iterator_range<NodesDFSIterator> nodes_in_DFS() {
return iterator_range<NodesDFSIterator>(nodes_dfs_begin(), nodes_dfs_end());
}
private:
NodesBFSIterator nodes_bfs_begin() {
return NodesBFSIterator(graph_->inputs);
}
NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); }
NodesDFSIterator nodes_dfs_begin() {
return NodesDFSIterator(graph_->inputs);
}
NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); }
private:
DataFlowGraph *graph_;
};
// Extract the inputs and outputs of a graph. The inputs and outputs of a
// sub-graph is the inputs nodes and output nodes that doesn't inside the
// sub-graph.
std::pair<
std::vector<Node *>,
std::vector<
Node *>> static ExtractInputAndOutputOfSubGraph(std::vector<Node *>
&graph) {
std::unordered_set<Node *> nodes(graph.begin(), graph.end());
std::unordered_set<Node *> inputs;
std::unordered_set<Node *> outputs;
for (auto &node : graph) {
for (auto *in : node->inlinks) {
if (!nodes.count(in) && in->type() == Node::Type::kValue) {
inputs.insert(in);
}
}
for (auto *out : node->outlinks) {
if (!nodes.count(out) && out->type() == Node::Type::kValue) {
outputs.insert(out);
}
}
}
return std::make_pair(std::vector<Node *>(inputs.begin(), inputs.end()),
std::vector<Node *>(outputs.begin(), outputs.end()));
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST(DataFlowGraph, BFS) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
dfg.Build();
for (auto* in : dfg.inputs) {
LOG(INFO) << "inputs: " << in->name() << " "
<< static_cast<int>(in->type());
}
for (auto* out : dfg.outputs) {
LOG(INFO) << "outputs: " << out->name() << " "
<< static_cast<int>(out->type());
}
GraphTraits<DataFlowGraph> trait(&dfg);
auto nodes = trait.nodes();
int count = 0;
for (auto it = nodes.begin(); it != nodes.end(); ++it) {
LOG(INFO) << "visiting " << it->name();
++count;
}
ASSERT_EQ(count, dfg.nodes.size());
}
TEST(DataFlowGraph, DFS) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
dfg.Build();
GraphTraits<DataFlowGraph> trait(&dfg);
auto nodes = trait.nodes_in_DFS();
int count = 0;
for (auto it = nodes.begin(); it != nodes.end(); ++it) {
LOG(INFO) << "visiting " << it->name();
++count;
}
ASSERT_EQ(count, dfg.nodes.size());
}
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include <glog/logging.h>
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/io.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST_F(DFG_Tester, Test) {
framework::proto::ProgramDesc new_desc;
DataFlowGraph graph;
FluidToDataFlowGraphPass pass0;
DataFlowGraphToFluidPass pass1;
pass0.Initialize(desc);
pass1.Initialize(&new_desc);
pass0.Run(&graph);
pass1.Run(&graph);
pass0.Finalize();
pass1.Finalize();
LOG(INFO) << graph.nodes.size();
}
} // analysis
} // inference
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include <vector>
namespace paddle {
namespace inference {
namespace analysis {
FluidToDataFlowGraphPass::FluidToDataFlowGraphPass() {}
bool FluidToDataFlowGraphPass::Initialize() { return Pass::Initialize(); }
bool FluidToDataFlowGraphPass::Initialize(
const framework::proto::ProgramDesc &desc) {
desc_ = &desc;
return true;
}
bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); }
void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
// insert vars
std::unordered_map<std::string, size_t> var2id;
auto &main_block = desc_->blocks(framework::kRootBlockIndex);
for (int i = 0; i < main_block.vars_size(); i++) {
const auto &var = main_block.vars(i);
auto *v = graph->nodes.Create(Node::Type::kValue);
v->SetName(var.name());
v->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&var)));
var2id[var.name()] = v->id();
}
for (int i = 0; i < main_block.ops_size(); i++) {
const auto &op = main_block.ops(i);
auto *o = graph->nodes.Create(Node::Type::kFunction);
o->SetName(op.type());
static_cast<Function *>(o)->SetFuncType(op.type());
// Link to the original protobuf message's memory, make it easier to
// generate from a data flow graph to fluid ProgramDesc.
o->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&op)));
// set inputs and outputs
// TODO(Superjomn) make sure the InputNames is the real variable name.
for (int j = 0; j < op.inputs_size(); j++) {
auto &in_var = op.inputs(j);
for (int k = 0; k < in_var.arguments_size(); k++) {
auto *in = graph->nodes.GetMutable(var2id.at(in_var.arguments(k)));
in->outlinks.push_back(o);
o->inlinks.push_back(in);
}
}
for (int j = 0; j < op.outputs_size(); j++) {
auto &out_var = op.outputs(j);
for (int k = 0; k < out_var.arguments_size(); k++) {
auto *out = graph->nodes.GetMutable(var2id[out_var.arguments(k)]);
out->inlinks.push_back(o);
o->outlinks.push_back(out);
}
}
}
// Analysis and extract the inputs and outputs of this graph.
graph->Build();
}
Pass *FluidToDataFlowGraphPass::CreatePrinterPass(
std::ostream &os, const std::string &banner) const {
return nullptr;
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file implements the transformation from data flow graph to fluid
* ProgramDesc.
*/
#pragma once
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Transform a FluidDesc to a data flow graph.
*/
class FluidToDataFlowGraphPass final : public DataFlowGraphPass {
public:
FluidToDataFlowGraphPass();
bool Initialize() override;
bool Initialize(const framework::proto::ProgramDesc &desc) override;
bool Finalize() override;
void Run(DataFlowGraph *graph) override;
Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const override;
private:
framework::proto::ProgramDesc const *desc_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST_F(DFG_Tester, Init) {
FluidToDataFlowGraphPass pass;
pass.Initialize();
pass.Initialize(desc);
DataFlowGraph graph;
pass.Run(&graph);
ASSERT_GT(graph.nodes.size(), 0);
pass.Finalize();
LOG(INFO) << '\n' << graph.DotString();
}
} // analysis
} // inference
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/graph_traits.h"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the GraphTraits<X> template class that should be specified
* by classes that want to be iteratable by generic graph iterators.
*
* This file also defines the marker class Inverse that is used to iterate over
* graphs in a graph defined, inverse ordering...
*/
#pragma once
#include "paddle/fluid/inference/analysis/helper.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* This class should be specialized by different graph types...
* That's why the base class is empty.
*/
template <typename GraphType>
struct GraphTraits {
// using NodesBFSIterator = xxx
// NodesBFSIterator nodes_begin();
// NodesBFSIterator nodes_end();
};
/*
* Inverse - This class is used as a marker class to tell the graph iterator to
* iterate in a graph defined Inverse order.
*/
template <typename GraphType>
struct Inverse {
const GraphType &graph;
explicit Inverse(const GraphType &graph) : graph(graph) {}
};
/*
* Provide a partial specialization of GraphTraits so that the inverse of an
* inverse turns into the original graph.
*/
template <typename GraphType>
struct GraphTraits<Inverse<Inverse<GraphType>>> : GraphTraits<GraphType> {};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
template <typename IteratorT> #define SET_TYPE(type__) dic_[typeid(type__).hash_code()] = #type__;
class iterator_range { /*
IteratorT begin_, end_; * Map typeid to representation.
*/
public: struct DataTypeNamer {
template <typename Container> static const DataTypeNamer &Global() {
explicit iterator_range(Container &&c) : begin_(c.begin()), end_(c.end()) {} static auto *x = new DataTypeNamer();
return *x;
iterator_range(const IteratorT &begin, const IteratorT &end) }
: begin_(begin), end_(end) {}
template <typename T>
const IteratorT &begin() const { return begin_; } const std::string &repr() const {
const IteratorT &end() const { return end_; } auto x = typeid(T).hash_code();
}; PADDLE_ENFORCE(dic_.count(x), "unknown type for representation");
return dic_.at(x);
/* }
* An registry helper class, with its records keeps the order they registers.
*/ const std::string &repr(size_t &hash) const {
template <typename T> PADDLE_ENFORCE(dic_.count(hash), "unknown type for representation");
class OrderedRegistry { return dic_.at(hash);
public: }
T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name)); private:
dic_[name] = data_.size(); DataTypeNamer() {
data_.emplace_back(std::unique_ptr<T>(x)); SET_TYPE(int);
return data_.back().get(); SET_TYPE(bool);
} SET_TYPE(float);
}
T *Lookup(const std::string &name) {
auto it = dic_.find(name); std::unordered_map<decltype(typeid(int).hash_code()), std::string> dic_;
if (it == dic_.end()) return nullptr; };
return data_[it->second].get(); #undef SET_TYPE
}
template <typename IteratorT>
protected: class iterator_range {
std::unordered_map<std::string, int> dic_; IteratorT begin_, end_;
std::vector<std::unique_ptr<T>> data_;
}; public:
template <typename Container>
} // namespace analysis explicit iterator_range(Container &&c) : begin_(c.begin()), end_(c.end()) {}
} // namespace inference
} // namespace paddle iterator_range(const IteratorT &begin, const IteratorT &end)
: begin_(begin), end_(end) {}
#define PADDLE_DISALLOW_COPY_AND_ASSIGN(type__) \
\ const IteratorT &begin() const { return begin_; }
type__(const type__ &) = delete; \ const IteratorT &end() const { return end_; }
\ };
void operator=(const type__ &) = delete;
/*
* An registry helper class, with its records keeps the order they registers.
*/
template <typename T>
class OrderedRegistry {
public:
T *Register(const std::string &name, T *x) {
PADDLE_ENFORCE(!dic_.count(name));
dic_[name] = data_.size();
data_.emplace_back(std::unique_ptr<T>(x));
return data_.back().get();
}
T *Lookup(const std::string &name) {
auto it = dic_.find(name);
if (it == dic_.end()) return nullptr;
return data_[it->second].get();
}
protected:
std::unordered_map<std::string, int> dic_;
std::vector<std::unique_ptr<T>> data_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
#define PADDLE_DISALLOW_COPY_AND_ASSIGN(type__) \
\
type__(const type__ &) = delete; \
\
void operator=(const type__ &) = delete;
...@@ -117,7 +117,10 @@ class Node { ...@@ -117,7 +117,10 @@ class Node {
type_hash_ = typeid(T).hash_code(); type_hash_ = typeid(T).hash_code();
data_.resize(sizeof(T)); data_.resize(sizeof(T));
} }
PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), "type not matched"); PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(),
"type not matched, origin is %s, want %s",
DataTypeNamer::Global().repr(type_hash_),
DataTypeNamer::Global().repr<T>());
PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error"); PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error");
return *reinterpret_cast<T *>(&data_[0]); return *reinterpret_cast<T *>(&data_[0]);
} }
...@@ -127,6 +130,10 @@ class Node { ...@@ -127,6 +130,10 @@ class Node {
size_t type_hash_{std::numeric_limits<size_t>::max()}; size_t type_hash_{std::numeric_limits<size_t>::max()};
}; };
bool IsFunction() const { return type_ == Node::Type::kFunction; }
bool IsValue() const { return type_ == Node::Type::kValue; }
bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; }
virtual ~Node() {} virtual ~Node() {}
friend class NodeMap; friend class NodeMap;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/inference/analysis/node.h" #include "paddle/fluid/inference/analysis/node.h"
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/pass.h"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <iosfwd>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace paddle {
namespace inference {
namespace analysis {
class Pass {
public:
Pass() = default;
virtual ~Pass() {}
// Virtual method overridden by subclasses to do only necessary initialization
// before any pass is run.
virtual bool Initialize() { return false; }
// There is some passes such as FlowToDataFlowGraphPass that needs a
// ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it
// only couple with the proto file.
virtual bool Initialize(const framework::proto::ProgramDesc &desc) {
return false;
}
// There are some Passes such as DataFlowGraphToFluidPass that will output a
// ProgramDesc.
virtual bool Initialize(framework::proto::ProgramDesc *desc) { return false; }
// Virtual method overriden by subclasses to do any necessary clean up after
// all passes have run.
virtual bool Finalize() { return false; }
// Get a Pass appropriate to print the Node this pass operates on.
virtual Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const = 0;
// Run on a single Node.
virtual void Run(Node *x) { LOG(FATAL) << "not valid"; }
// Run on a single Function.
virtual void Run(Function *x) { LOG(FATAL) << "not valid"; }
// Run on a single FunctionBlock.
virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; }
// Run on a single DataFlowGraph.
virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; }
};
// NodePass process on any Node types.
class NodePass : public Pass {
public:
virtual void Run(Node *node) = 0;
};
// NodePass process on any Function node types.
class FunctionPass : public Pass {
public:
virtual void Run(Function *node) = 0;
};
// NodePass process on any FunctionBlock node types.
class FunctionBlockPass : public Pass {
public:
virtual void Run(FunctionBlock *node) = 0;
};
// GraphPass processes on any GraphType.
class DataFlowGraphPass : public Pass {
public:
virtual void Run(DataFlowGraph *graph) = 0;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace paddle {
namespace inference {
namespace analysis {
const char *SubGraphSplitter::kMarkerAttrName =
"_sub_graph_splitter_inside_sub_graph";
std::vector<std::vector<Node *>> SubGraphSplitter::operator()() {
MarkNodesInsideSubGraph();
return ExtractSubGraphs();
}
// Mark the output variables inside a subgraph with the func.
inline void MarkOutLinksInSubGraph(const Function *func) {
for (auto *var : func->outlinks) {
var->attr(SubGraphSplitter::kMarkerAttrName).Bool() = true;
}
}
void SubGraphSplitter::MarkNodesInsideSubGraph() {
for (auto &node : GraphTraits<DataFlowGraph>(graph_).nodes()) {
if (node_inside_subgraph_teller_(&node)) {
node.attr(kMarkerAttrName).Bool() = true;
if (node.type() == Node::Type::kFunction) {
// If a function is inside the sub-graph, mark all the output variables
// to be inside too, so that two marked functions will be inside a same
// sub-graph, lets take a example: A_function->var->B_function, if
// A_function is marked, var should also be marked, so that B_function
// will be in the same sub-graph with A_function if B_function is
// marked.
MarkOutLinksInSubGraph(static_cast<const Function *>(&node));
}
}
}
}
const char *kUnionFindParent = "_sub_graph_splitter_union_find_parent_";
// Use the Union Find(UF) algorithm to find fully connected sub-graphs, if node
// a's output is node b, that is a and b is in the same sub-graph. The UF
// algorithm will group them to the same cluster.
using node_map_t = std::unordered_map<int, Node *>;
// Find the ancestor id of a node.
int UnionFindGetAncestor(const node_map_t &node_map, size_t id) {
int tmp = id;
do {
tmp = node_map.at(tmp)->attr(kUnionFindParent).Int32();
} while (node_map.at(tmp)->attr(kUnionFindParent).Int32() != tmp);
return tmp;
}
// Make this two node share the same ancestor.
// TODO(Superjom) bad performance, make a balanced tree latter.
void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) {
int a_ancestor = UnionFindGetAncestor(node_map, a);
int b_ancestor = UnionFindGetAncestor(node_map, b);
node_map.at(b_ancestor)->attr(kUnionFindParent).Int32() = a_ancestor;
node_map.at(a)->attr(kUnionFindParent).Int32() = a_ancestor;
node_map.at(b)->attr(kUnionFindParent).Int32() = a_ancestor;
}
std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
std::vector<Node *> marked_nodes;
for (auto &node : GraphTraits<DataFlowGraph>(graph_).nodes()) {
if (node.attr(kMarkerAttrName).Bool()) {
marked_nodes.push_back(&node);
}
}
// extract sub-graphs in the marked node set, use Union Find algorithm.
node_map_t node_map; // id to ptr
for (auto *n : marked_nodes) {
// n's parent == n.id means it is the ancestor
n->attr(kUnionFindParent).Int32() = n->id();
node_map[n->id()] = n;
}
std::unordered_set<Node *> visited;
for (auto *n : marked_nodes) {
for (auto *out : n->outlinks) {
if (node_map.count(out->id())) {
UnionFindCombine(node_map, n->id(), out->id());
}
}
}
std::unordered_map<int /*ancestor*/, std::vector<Node *>> clusters;
for (auto *n : marked_nodes) {
if (n->type() == Node::Type::kFunction) {
clusters[UnionFindGetAncestor(node_map,
n->attr(kUnionFindParent).Int32())]
.push_back(n);
}
}
std::vector<std::vector<Node *>> result;
std::for_each(clusters.begin(), clusters.end(),
[&](const decltype(clusters)::value_type &it) {
result.push_back(it.second);
});
return result;
}
void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); }
void SubGraphFuse::ReplaceNodesWithSubGraphs() {
auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)();
for (auto &subgraph : subgraphs) {
// replace this sub-graph with the first node. Two steps: 1. Create a Block
// Node that contains this subgraph 2. Mark the nodes inside the sub-graph
// as deleted. 3. Replace the deleted node with the new Block Node.
auto *block_node = graph_->nodes.Create(Node::Type::kFunctionBlock);
auto io = ExtractInputAndOutputOfSubGraph(subgraph);
block_node->inlinks = std::move(io.first);
block_node->outlinks = std::move(io.second);
for (auto *node : subgraph) {
// TODO(Superjomn) need a unified mechanism to treat deleted node in each
// pass.
node->SetDeleted();
}
std::unordered_map<Node *, Node *>
delelte_node_map; // deleted node to BlockNode
for (auto *n : block_node->inlinks) {
n->inlinks.clear();
}
for (auto *n : block_node->outlinks) {
n->outlinks.clear();
}
for (auto *n : block_node->inlinks) {
n->outlinks.push_back(block_node);
}
for (auto *n : block_node->outlinks) {
n->inlinks.push_back(n);
}
}
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the the class to partition a graph.
*/
#pragma once
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Detect the nodes in a sub-graph that meet some conditions. This class doesn't
* modify the graph.
*/
class SubGraphSplitter {
public:
static const char *kMarkerAttrName;
// Tell whether a node is inside a sub-graph.
using NodeInsideSubgraphTeller = std::function<bool(const Node *)>;
SubGraphSplitter(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller)
: graph_(graph), node_inside_subgraph_teller_(teller) {}
std::vector<std::vector<Node *>> operator()();
protected:
// Mark the nodes inside the accepted sub-graph using
// node_inside_subgraph_teller.
void MarkNodesInsideSubGraph();
// Merge the marked nodes into sub-graphs and return the sub-graphs.
std::vector<std::vector<Node *>> ExtractSubGraphs();
private:
DataFlowGraph *graph_;
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
};
/*
* SubGraphFuse - Replace some nodes with the sub-graph node they are inside. To
* some extent, the TensorRT engine is just a fusion op for a model.
*/
class SubGraphFuse {
public:
using NodeInsideSubgraphTeller = SubGraphSplitter::NodeInsideSubgraphTeller;
SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller)
: graph_(graph), node_inside_subgraph_teller_(teller) {}
// The main method which run all the logic.
void operator()();
protected:
// Remove the nodes inside sub-graphs and replace with the SubGraphNode.
void ReplaceNodesWithSubGraphs();
private:
DataFlowGraph *graph_;
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
TEST_F(DFG_Tester, Split) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
LOG(INFO) << "spliter\n" << dfg.DotString();
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
ASSERT_GT(dfg.nodes.size(), 5UL);
auto subgraphs = SubGraphSplitter(&dfg, teller)();
// Check the number of the marked nodes.
int marked_nodes = 0;
for (auto& node : dfg.nodes.nodes()) {
if (node->IsFunction() &&
node->attr(SubGraphSplitter::kMarkerAttrName).Bool()) {
++marked_nodes;
}
}
EXPECT_EQ(marked_nodes, 6);
// For human debug.
for (auto& subgraph : subgraphs) {
LOG(INFO) << "subgraph size " << subgraph.size();
for (auto* node : subgraph) {
LOG(INFO) << "node " << node->repr();
}
}
ASSERT_EQ(subgraphs.size(), 1UL);
// The last sub-graph has 5 Functions.
ASSERT_EQ(subgraphs.back().size(), 6UL);
}
} // namespace analysis
} // namespace inference
} // namespace paddle
此差异已折叠。
nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES}) nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine) DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine
SERIAL)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor) nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
...@@ -201,11 +201,13 @@ if(WITH_DISTRIBUTE) ...@@ -201,11 +201,13 @@ if(WITH_DISTRIBUTE)
set_source_files_properties(send_vars_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(send_vars_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
op_library(send_barrier_op DEPS ${DISTRIBUTE_DEPS}) op_library(send_barrier_op DEPS ${DISTRIBUTE_DEPS})
set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) #set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op listen_and_serv_op sum_op executor) #cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op
# listen_and_serv_op sum_op executor SERIAL)
if(WITH_GPU) if(WITH_GPU)
set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op listen_and_serv_op executor) cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op
listen_and_serv_op executor SERIAL)
op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc) op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc)
set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else() else()
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册