提交 afbc4ce4 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into mklml_funcs

......@@ -57,7 +57,10 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
......@@ -202,7 +205,7 @@ endif(USE_NNPACK)
add_subdirectory(proto)
if(NOT MOBILE_INFERENCE)
if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
......@@ -230,3 +233,7 @@ if(WITH_DOC)
find_python_module(recommonmark REQUIRED)
add_subdirectory(doc)
endif()
if (WITH_CONTRIB)
add_subdirectory(paddle/contrib)
endif()
......@@ -101,6 +101,3 @@ RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
# development image default do build work
CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"]
......@@ -40,5 +40,3 @@ RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \
unzip -q android-ndk-r14b-linux-x86_64.zip && \
mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
rm -rf /opt/android-ndk-tmp
CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"]
......@@ -24,22 +24,22 @@ Currently supported `--model` argument include:
* Run the following command to start a benchmark job locally:
```bash
python fluid_benchmark.py --model mnist --parallel 1 --device GPU --with_test
python fluid_benchmark.py --model mnist --device GPU
```
You can choose to use GPU/CPU training. With GPU training, you can specify
`--parallel 1` to run multi GPU training.
`--gpus <gpu_num>` to run multi GPU training.
* Run distributed training with parameter servers:
* start parameter servers:
```bash
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver
```
* start trainers:
```bash
PADDLE_TRAINING_ROLE=PSERVER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method pserver
PADDLE_TRAINING_ROLE=TRAINER PADDLE_PSERVER_PORT=7164 PADDLE_PSERVER_IPS=127.0.0.1 PADDLE_TRAINERS=1 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method pserver
```
* Run distributed training using NCCL2
```bash
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --parallel 0 --device GPU --update_method nccl2
PADDLE_PSERVER_PORT=7164 PADDLE_TRAINER_IPS=192.168.0.2,192.168.0.3 PADDLE_CURRENT_IP=127.0.0.1 PADDLE_TRAINER_ID=0 python fluid_benchmark.py --model mnist --device GPU --update_method nccl2
```
## Run Distributed Benchmark on Kubernetes Cluster
......@@ -48,7 +48,7 @@ We provide a script `kube_gen_job.py` to generate Kubernetes yaml files to submi
distributed benchmark jobs to your cluster. To generate a job yaml, just run:
```bash
python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver --with_test" --disttype pserver
python kube_gen_job.py --jobname myjob --pscpu 4 --cpu 8 --gpu 8 --psmemory 20 --memory 40 --pservers 4 --trainers 4 --entry "python fluid_benchmark.py --model mnist --parallel 1 --device GPU --update_method pserver " --disttype pserver
```
Then the yaml files are generated under directory `myjob`, you can run:
......
......@@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS)
add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS)
endif(USE_EIGEN_FOR_BLAS)
if(EIGEN_USE_THREADS)
add_definitions(-DEIGEN_USE_THREADS)
endif(EIGEN_USE_THREADS)
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
......
......@@ -212,6 +212,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake
${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_SKIP_RPATH=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
......
......@@ -1003,9 +1003,9 @@ dice_loss
.. autofunction:: paddle.fluid.layers.dice_loss
:noindex:
bilinear_interp
upsampling_bilinear2d
____
.. autofunction:: paddle.fluid.layers.bilinear_interp
.. autofunction:: paddle.fluid.layers.upsampling_bilinear2d
:noindex:
......@@ -35,13 +35,11 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
# 2. 可选步骤:源码中构建用于编译PaddlePaddle的Docker镜像
docker build -t paddle:dev .
# 3. 执行下面的命令编译CPU-Only的二进制
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. 或者也可以使用为上述可选步骤构建的镜像(必须先执行第2步)
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。如果使用自行
构建的镜像(上述第4步)会执行 :code:`Dockerfile` 描述的默认入口程序 :code:`build.sh` 可以省略步骤3中
最后的执行脚本的命令。
注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。
编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装:
......@@ -72,15 +70,15 @@ PaddlePaddle需要使用Docker环境完成编译,这样可以免去单独安
.. code-block:: bash
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/docker/build.sh
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test
如果期望执行其中一个单元测试,(比如 :code:`test_sum_op` ):
.. code-block:: bash
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
bash /paddle/paddle/scripts/docker/build.sh
cd /paddle/build
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
./paddle/scripts/paddle_build.sh build
cd build
ctest -R test_sum_op -V
.. _faq_docker:
......
......@@ -34,14 +34,12 @@ Or you can build your own image from source as the optional step below:
# 2. Optional: build development docker image from source
docker build -t paddle:dev .
# 3. Run the following command to build a CPU-Only binaries
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x /paddle/paddle/scripts/paddle_build.sh build
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. Or, use your built Docker image to build PaddlePaddle (must run step 2)
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
NOTE: The above command try to mount the current working directory (root directory of source code)
into :code:`/paddle` directory inside docker container. If you are using your own image
(Step 4) it will run default entry-point :code:`build.sh` , so you could omit the last
command in step 3.
into :code:`/paddle` directory inside docker container.
When the compile finishes, you can get the output whl package under
build/python/dist, then you can choose to install the whl on local
......@@ -74,15 +72,15 @@ Set :code:`WITH_GPU=ON` Can also run tests on GPU.
.. code-block:: bash
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/paddle/scripts/docker/build.sh
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=ON" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh test
If you wish to run only one unit test, like :code:`test_sum_op`:
.. code-block:: bash
docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
bash /paddle/paddle/scripts/docker/build.sh
cd /paddle/build
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=ON" -e "RUN_TEST=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 /bin/bash
./paddle/scripts/paddle_build.sh build
cd build
ctest -R test_sum_op -V
.. _faq_docker:
......
......@@ -98,7 +98,7 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note
国内用户可以使用下面的镜像源来加速访问:
.. code-block: bash
.. code-block:: bash
docker run -p 8888:8888 docker.paddlepaddlehub.com/book
......
......@@ -105,7 +105,7 @@ We provide a packaged book image, simply issue the command:
For users in China, we provide a faster mirror:
.. code-block: bash
.. code-block:: bash
docker run -p 8888:8888 docker.paddlepaddlehub.com/book
......
......@@ -11,7 +11,6 @@ GTAGS
*.pb.cc
*.pb.h
*_pb2.py
paddle_*
output/
google/
Makefile
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
add_subdirectory(inference)
......@@ -89,7 +89,7 @@ cd Paddle
# to `FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04` and similarly for other configurations
nvidia-docker build -t paddle:float16 .
# After running this, different results will be written to different log files in Paddle/contrib/float16/
nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/contrib/float16/run_float16_demo.sh
nvidia-docker run -it -v $PWD:/paddle paddle:float16 /paddle/paddle/contrib/float16/run_float16_demo.sh
```
#### Accuracy
......
......@@ -3,7 +3,7 @@
BUILD_PATH=/paddle/fp16_build
WHEEL_PATH=$BUILD_PATH/python/dist
INFER_PATH=$BUILD_PATH/paddle/fluid/inference/tests/book
DEMO_PATH=/paddle/contrib/float16
DEMO_PATH=/paddle/paddle/contrib/float16
# Use the single most powerful CUDA GPU on your machine
export CUDA_VISIBLE_DEVICES=0
......@@ -50,7 +50,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
--data_set=imagenet \
--dirname=$DEMO_PATH/image_classification_imagenet_vgg.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_vgg.inference.model \
--repeat=$REPEAT \
......@@ -68,7 +67,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_resnet \
--data_set=imagenet \
--dirname=$DEMO_PATH/image_classification_imagenet_resnet.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_resnet.inference.model \
--repeat=$REPEAT \
......@@ -86,7 +84,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
--data_set=cifar10 \
--dirname=$DEMO_PATH/image_classification_cifar10_vgg.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_vgg.inference.model \
--repeat=$REPEAT \
......@@ -104,7 +101,6 @@ do
--repeat=1 \
$INFER_PATH/test_inference_image_classification_vgg \
--data_set=cifar10 \
--dirname=$DEMO_PATH/image_classification_cifar10_resnet.inference.model \
--fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_resnet.inference.model \
--repeat=$REPEAT \
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
function(inference_api_test TARGET_NAME TEST_SRC DEP_TEST)
set(options "")
set(oneValueArgs "")
set(multiValueArgs ARGS)
cmake_parse_arguments(inference_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)
set(arg_list "")
if(inference_test_ARGS)
foreach(arg ${inference_test_ARGS})
list(APPEND arg_list "_${arg}")
endforeach()
else()
list(APPEND arg_list "_")
endif()
foreach(arg ${arg_list})
string(REGEX REPLACE "^_$" "" arg "${arg}")
cc_test(${TARGET_NAME}
SRCS ${TEST_SRC}
DEPS paddle_fluid_api paddle_inference_api paddle_inference_api_impl
ARGS --dirname=${PYTHON_TESTS_DIR}/book/)
# set_tests_properties(${TARGET_NAME}
# PROPERTIES DEPENDS ${DEP_TEST})
endforeach()
endfunction(inference_api_test)
cc_library(paddle_inference_api
SRCS paddle_inference_api.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
cc_library(paddle_inference_api_impl
SRCS paddle_inference_api_impl.cc
DEPS paddle_inference_api paddle_fluid_api)
cc_test(test_paddle_inference_api
SRCS test_paddle_inference_api.cc
DEPS paddle_inference_api)
inference_api_test(test_paddle_inference_api_impl
test_paddle_inference_api_impl.cc
test_word2vec)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/contrib/inference/paddle_inference_api.h"
......@@ -12,49 +12,74 @@
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file contains the definition of a simple Inference API for Paddle.
*
* ATTENTION: It requires some C++ features, for lower version C++ or C, we
* might release another API.
*/
#pragma once
#include <memory>
#include <string>
#include <vector>
namespace paddle {
class Predictor {
public:
struct Attr;
Predictor() = default;
enum PaddleDType {
FLOAT32,
INT64,
};
// Build the network before inference.
bool Init(const Attr& attr);
struct PaddleBuf {
void* data; // pointer to the data memory.
size_t length; // number of memory bytes.
};
struct PaddleTensor {
std::string name; // variable name.
std::vector<int> shape;
PaddleBuf data; // blob of data.
PaddleDType dtype;
};
/*
* A simple Inference API for Paddle. Currently this API might just be used by
* non-sequence scenerios.
* TODO(Superjomn) Prepare another API for NLP-related usages.
*/
class PaddlePredictor {
public:
struct Config;
PaddlePredictor() = default;
PaddlePredictor(const PaddlePredictor&) = delete;
// Predict an record.
// Arguments:
// inputs: the name of the input variables.
// outputs: the name of the output varaibles.
// input_shapes: the shape of the input variables.
// output_shapes: the shape of the output variables.
// input_data: the data of the input variables.
// output_data: the data of the output variables.
bool Run(const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
const std::vector<std::vector<int>>& input_shapes,
const std::vector<std::vector<int>>& output_shapes,
const std::vector<std::vector<float>>& input_data,
std::vector<std::vector<float>>* output_data);
// Clone a predictor that share the model weights.
Predictor* Clone();
// The caller should be responsible for allocating and releasing the memory of
// `inputs`. `inputs` should be alive until Run returns. caller should be
// responsible for releasing the memory of `output_data`.
virtual bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data) = 0;
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
virtual std::unique_ptr<PaddlePredictor> Clone() = 0;
// Destroy the Predictor.
~Predictor();
virtual ~PaddlePredictor() {}
struct Attr {
friend std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
const PaddlePredictor::Config& config);
// The common configs for all the predictors.
struct Config {
enum class EngineKind;
std::string model_dir; // path to the model directory.
bool enable_engine{false}; // Enable to execute (part of) the model on
// third-party engines.
EngineKind engine_kind{Attr::EngineKind::kNone};
// third-party engines.
EngineKind engine_kind{Config::EngineKind::kNone};
enum class EngineKind {
kNone = -1, // Use the native Fluid facility.
......@@ -66,4 +91,8 @@ public:
};
};
// A factory to help create difference predictor.
template <typename ConfigT>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sys/time.h>
#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "paddle/contrib/inference/paddle_inference_api_impl.h"
namespace paddle {
namespace {
// Timer for timer
class Timer {
public:
double start;
double startu;
void tic() {
struct timeval tp;
gettimeofday(&tp, NULL);
start = tp.tv_sec;
startu = tp.tv_usec;
}
double toc() {
struct timeval tp;
gettimeofday(&tp, NULL);
double used_time_ms =
(tp.tv_sec - start) * 1000.0 + (tp.tv_usec - startu) / 1000.0;
return used_time_ms;
}
};
template <class T>
std::string num2str(T a) {
std::stringstream istr;
istr << a;
return istr.str();
}
} // namespace
bool PaddlePredictorImpl::Init() {
VLOG(3) << "Predictor::init()";
// TODO(panyx0718): Should CPU vs GPU device be decided by id?
if (config_.device >= 0) {
place_ = paddle::platform::CUDAPlace(config_.device);
} else {
place_ = paddle::platform::CPUPlace();
}
paddle::framework::InitDevices(false);
executor_.reset(new paddle::framework::Executor(place_));
scope_.reset(new paddle::framework::Scope());
// Initialize the inference program
if (!config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.model_dir);
} else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
} else {
LOG(ERROR) << "fail to load inference model.";
return false;
}
ctx_ = executor_->Prepare(*inference_program_, 0);
// Create variables
// TODO(panyx0718): Why need to test share_variables here?
if (config_.share_variables) {
executor_->CreateVariables(*inference_program_, scope_.get(), 0);
}
// Get the feed_target_names and fetch_target_names
feed_target_names_ = inference_program_->GetFeedTargetNames();
fetch_target_names_ = inference_program_->GetFetchTargetNames();
return true;
}
bool PaddlePredictorImpl::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) {
VLOG(3) << "Predictor::predict";
Timer timer;
timer.tic();
// set feed variable
std::map<std::string, const paddle::framework::LoDTensor *> feed_targets;
std::vector<paddle::framework::LoDTensor> feeds;
if (!SetFeed(inputs, &feeds)) {
LOG(ERROR) << "fail to set feed";
return false;
}
for (size_t i = 0; i < feed_target_names_.size(); ++i) {
feed_targets[feed_target_names_[i]] = &feeds[i];
}
// get fetch variable
std::map<std::string, paddle::framework::LoDTensor *> fetch_targets;
std::vector<paddle::framework::LoDTensor> fetchs;
fetchs.resize(fetch_target_names_.size());
for (size_t i = 0; i < fetch_target_names_.size(); ++i) {
fetch_targets[fetch_target_names_[i]] = &fetchs[i];
}
// Run the inference program
// if share variables, we need not create variables
executor_->RunPreparedContext(ctx_.get(),
scope_.get(),
&feed_targets,
&fetch_targets,
!config_.share_variables);
if (!GetFetch(fetchs, output_data)) {
LOG(ERROR) << "fail to get fetchs";
return false;
}
VLOG(3) << "predict cost: " << timer.toc() << "ms";
return true;
}
std::unique_ptr<PaddlePredictor> PaddlePredictorImpl::Clone() {
VLOG(3) << "Predictor::clone";
std::unique_ptr<PaddlePredictorImpl> cls(new PaddlePredictorImpl(config_));
if (!cls->InitShared(this)) {
LOG(ERROR) << "fail to call InitShared";
return nullptr;
}
return cls;
}
// TODO(panyx0718): Consider merge with Init()?
bool PaddlePredictorImpl::InitShared(PaddlePredictorImpl *cls) {
VLOG(3) << "Predictor::init_shared";
// 1. Define place, executor, scope
if (this->config_.device >= 0) {
place_ = paddle::platform::CUDAPlace();
} else {
place_ = paddle::platform::CPUPlace();
}
this->executor_.reset(new paddle::framework::Executor(this->place_));
this->scope_.reset(new paddle::framework::Scope());
// Initialize the inference program
if (!this->config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
this->inference_program_ = paddle::inference::Load(
this->executor_.get(), this->scope_.get(), this->config_.model_dir);
} else if (!this->config_.prog_file.empty() &&
!this->config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
this->inference_program_ =
paddle::inference::Load(this->executor_.get(),
this->scope_.get(),
this->config_.prog_file,
this->config_.param_file);
}
this->ctx_ = this->executor_->Prepare(*this->inference_program_, 0);
// 3. create variables
// TODO(panyx0718): why test share_variables.
if (config_.share_variables) {
this->executor_->CreateVariables(
*this->inference_program_, this->scope_.get(), 0);
}
// 4. Get the feed_target_names and fetch_target_names
this->feed_target_names_ = this->inference_program_->GetFeedTargetNames();
this->fetch_target_names_ = this->inference_program_->GetFetchTargetNames();
return true;
}
bool PaddlePredictorImpl::SetFeed(
const std::vector<PaddleTensor> &inputs,
std::vector<paddle::framework::LoDTensor> *feeds) {
VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feed_target_names_.size()) {
LOG(ERROR) << "wrong feed input size.";
return false;
}
for (size_t i = 0; i < feed_target_names_.size(); ++i) {
paddle::framework::LoDTensor input;
paddle::framework::DDim ddim =
paddle::framework::make_ddim(inputs[i].shape);
void *input_ptr;
if (inputs[i].dtype == PaddleDType::INT64) {
input_ptr =
input.mutable_data<int64_t>(ddim, paddle::platform::CPUPlace());
} else if (inputs[i].dtype == PaddleDType::FLOAT32) {
input_ptr = input.mutable_data<float>(ddim, paddle::platform::CPUPlace());
} else {
LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
return false;
}
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr),
inputs[i].data.data,
inputs[i].data.length);
feeds->push_back(input);
LOG(ERROR) << "Actual feed type " << feeds->back().type().name();
}
return true;
}
bool PaddlePredictorImpl::GetFetch(
const std::vector<paddle::framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *outputs) {
VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetchs.size());
for (size_t i = 0; i < fetchs.size(); ++i) {
// TODO(panyx0718): Support fetch of other types.
if (fetchs[i].type() != typeid(float)) {
LOG(ERROR) << "only support fetching float now.";
return false;
}
std::vector<int> shape;
auto dims_i = fetchs[i].dims();
auto lod = fetchs[i].lod();
const float *output_ptr = fetchs[i].data<float>();
// const int64_t* output_ptr = fetchs[i].data<int64_t>();
auto num = fetchs[i].numel();
std::vector<float> data;
if (0 == lod.size()) {
std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
for (int j = 0; j < dims_i.size(); ++j) {
shape.push_back(dims_i[j]);
}
} else {
// for batch detection
// image[0] -> output[0] shape {145, 6}
// image[1] -> output[1] shape {176, 6}
// then,
// the batch output shape {321, 6}
// the lod {{0, 145, 321}}
// so we should append output[0] to {176, 6}
size_t max_dim = 0;
for (size_t j = 1; j < lod[0].size(); j++) {
max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]);
}
size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back();
if (max_dim > 0) {
data.resize((lod[0].size() - 1) * max_dim * common_dim, 0);
}
for (size_t j = 1; j < lod[0].size(); j++) {
size_t start = lod[0][j - 1] * common_dim;
size_t end = lod[0][j] * common_dim;
if (end > start) {
std::copy(output_ptr + start,
output_ptr + end,
data.begin() + (j - 1) * max_dim * common_dim);
}
}
shape.push_back(lod[0].size() - 1);
shape.push_back(max_dim);
for (int j = 1; j < dims_i.size(); ++j) {
shape.push_back(dims_i[j]);
}
}
outputs->at(i).shape = shape;
outputs->at(i).data.length = sizeof(float) * data.size();
outputs->at(i).data.data = malloc(outputs->at(i).data.length);
std::memcpy(
outputs->at(i).data.data, data.data(), outputs->at(i).data.length);
outputs->at(i).dtype = PaddleDType::FLOAT32;
// TODO(panyx0718): support other types? fill tensor name? avoid a copy.
}
return true;
}
std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl(
const VisConfig &config) {
VLOG(3) << "create PaddlePredictorImpl";
// 1. GPU memeroy
std::vector<std::string> flags;
if (config.fraction_of_gpu_memory >= 0.0f ||
config.fraction_of_gpu_memory <= 0.95f) {
flags.push_back("dummpy");
std::string flag = "--fraction_of_gpu_memory_to_use=" +
num2str<float>(config.fraction_of_gpu_memory);
flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags);
}
std::unique_ptr<PaddlePredictorImpl> predictor(
new PaddlePredictorImpl(config));
if (!predictor->Init()) {
return nullptr;
}
return predictor;
}
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <memory>
#include <string>
#include <vector>
#include "paddle/contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/init.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
struct VisConfig : public PaddlePredictor::Config {
int device;
float fraction_of_gpu_memory;
std::string prog_file;
std::string param_file;
bool share_variables;
};
/*
* Do not use this, just a demo indicating how to customize a Predictor.
*/
class PaddlePredictorImpl : public PaddlePredictor {
public:
explicit PaddlePredictorImpl(const VisConfig &config) : config_(config) {}
bool Init();
bool Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) override;
std::unique_ptr<PaddlePredictor> Clone() override;
~PaddlePredictorImpl() override{};
private:
bool InitShared(PaddlePredictorImpl *cls);
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
std::vector<paddle::framework::LoDTensor> *feeds);
bool GetFetch(const std::vector<paddle::framework::LoDTensor> &fetchs,
std::vector<PaddleTensor> *output_data);
VisConfig config_;
paddle::platform::Place place_;
std::unique_ptr<paddle::framework::Executor> executor_;
std::unique_ptr<paddle::framework::Scope> scope_;
std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx_;
std::unique_ptr<paddle::framework::ProgramDesc> inference_program_;
std::vector<std::string> feed_target_names_;
std::vector<std::string> fetch_target_names_;
};
std::unique_ptr<PaddlePredictorImpl> CreatePaddlePredictorImpl(
const VisConfig &config);
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/contrib/inference/paddle_inference_api.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
namespace paddle {
/*
* Do not use this, just a demo indicating how to customize a config for a
* specific predictor.
*/
struct DemoConfig : public PaddlePredictor::Config {
float other_config;
};
/*
* Do not use this, just a demo indicating how to customize a Predictor.
*/
class DemoPredictor : public PaddlePredictor {
public:
explicit DemoPredictor(const DemoConfig &config) {
LOG(INFO) << "I get other_config " << config.other_config;
}
bool Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data) override {
LOG(INFO) << "Run";
return false;
}
std::unique_ptr<PaddlePredictor> Clone() override { return nullptr; }
~DemoPredictor() override {}
};
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<DemoConfig>(
const DemoConfig &config) {
std::unique_ptr<PaddlePredictor> x(new DemoPredictor(config));
return x;
}
TEST(paddle_inference_api, demo) {
DemoConfig config;
config.other_config = 1.7;
auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> outputs;
predictor->Run({}, &outputs);
}
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "gflags/gflags.h"
#include "paddle/contrib/inference/paddle_inference_api_impl.h"
#include "paddle/fluid/inference/tests/test_helper.h"
DEFINE_string(dirname, "", "Directory of the inference model.");
namespace paddle {
PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
PaddleTensor pt;
pt.data.data = t->data<void>();
if (t->type() == typeid(int64_t)) {
pt.data.length = t->numel() * sizeof(int64_t);
pt.dtype = PaddleDType::INT64;
} else if (t->type() == typeid(float)) {
pt.data.length = t->numel() * sizeof(float);
pt.dtype = PaddleDType::FLOAT32;
} else {
LOG(FATAL) << "unsupported type.";
}
pt.shape = framework::vectorize2int(t->dims());
return pt;
}
TEST(paddle_inference_api_impl, word2vec) {
VisConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model";
LOG(INFO) << "dirname " << config.model_dir;
config.fraction_of_gpu_memory = 0.85;
config.device = 0;
config.share_variables = true;
std::unique_ptr<PaddlePredictorImpl> predictor =
CreatePaddlePredictorImpl(config);
framework::LoDTensor first_word, second_word, third_word, fourth_word;
framework::LoD lod{{0, 1}};
int64_t dict_size = 2073; // The size of dictionary
SetupLoDTensor(&first_word, lod, static_cast<int64_t>(0), dict_size - 1);
SetupLoDTensor(&second_word, lod, static_cast<int64_t>(0), dict_size - 1);
SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1);
SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);
std::vector<PaddleTensor> cpu_feeds;
cpu_feeds.push_back(LodTensorToPaddleTensor(&first_word));
cpu_feeds.push_back(LodTensorToPaddleTensor(&second_word));
cpu_feeds.push_back(LodTensorToPaddleTensor(&third_word));
cpu_feeds.push_back(LodTensorToPaddleTensor(&fourth_word));
std::vector<PaddleTensor> outputs;
ASSERT_TRUE(predictor->Run(cpu_feeds, &outputs));
ASSERT_EQ(outputs.size(), 1);
for (size_t i = 0; i < outputs.size(); ++i) {
size_t len = outputs[i].data.length;
float* data = static_cast<float*>(outputs[i].data.data);
for (int j = 0; j < len / sizeof(float); ++j) {
ASSERT_LT(data[j], 1.0);
ASSERT_GT(data[j], -1.0);
}
free(outputs[i].data.data);
}
}
} // namespace paddle
......@@ -243,13 +243,8 @@ const std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() const {
}
void OpDesc::Rename(const std::string &old_name, const std::string &new_name) {
for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name);
}
for (auto &output : outputs_) {
std::replace(output.second.begin(), output.second.end(), old_name,
new_name);
}
RenameInput(old_name, new_name);
RenameOutput(old_name, new_name);
need_update_ = true;
}
......@@ -274,6 +269,13 @@ void OpDesc::RenameInput(const std::string &old_name,
for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name);
}
auto it = attrs_.find(framework::OpProtoAndCheckerMaker::OpRoleVarAttrName());
if (it != attrs_.end()) {
auto &op_vars = boost::get<std::vector<std::string>>(it->second);
std::replace(op_vars.begin(), op_vars.end(), old_name, new_name);
}
need_update_ = true;
}
......
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
cc_library(paddle_fluid_api
SRCS io.cc
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
......
......@@ -149,12 +149,14 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
}
if (platform::is_gpu_place(ctx.GetPlace())) {
#ifdef PADDLE_WITH_CUDA
// GPU data is copied to CPU buffer when sending,
// free the buffer when possible.
destroy_callback = [](void* backing) {
platform::CUDAPinnedPlace cuda_pinned;
memory::Free(cuda_pinned, backing);
};
#endif
}
std::string header;
......
......@@ -24,6 +24,8 @@ detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc)
detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu)
detection_library(target_assign_op SRCS target_assign_op.cc
target_assign_op.cu)
detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
polygon_box_transform_op.cu)
# Export local libraries to parent
set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"It must use CUDAPlace.");
auto* in = ctx.Input<Tensor>("Input");
auto in_dims = in->dims();
const T* in_data = in->data<T>();
auto* out = ctx.Output<Tensor>("Output");
T* out_data = out->mutable_data<T>(ctx.GetPlace());
int batch_size = in_dims[0];
int geo_channel = in_dims[1];
int height = in_dims[2];
int width = in_dims[3];
int id = 0;
for (int id_n = 0; id_n < batch_size * geo_channel; ++id_n) {
for (int id_h = 0; id_h < height; ++id_h) {
for (int id_w = 0; id_w < width; ++id_w) {
id = id_n * height * width + width * id_h + id_w;
if (id_n % 2 == 0) {
out_data[id] = id_w - in_data[id];
} else {
out_data[id] = id_h - in_data[id];
}
}
}
}
}
};
class PolygonBoxTransformOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(
ctx->HasInput("Input"),
"Input (Input) of polygon_box transform op should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("Output"),
"Output (Output) of polygon_box transform op should not be null.");
auto in_dim = ctx->GetInputDim("Input");
PADDLE_ENFORCE_EQ(in_dim.size(), 4, "input's rank must be 4.");
PADDLE_ENFORCE_EQ(in_dim[1] % 2, 0,
"input's second dimension must be even.");
ctx->SetOutputDim("Output", in_dim);
}
};
class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput(
"Input",
"The input with shape [batch_size, geometry_channels, height, width]");
AddOutput("Output", "The output with the same shape as input");
AddComment(R"DOC(
PolygonBoxTransform Operator.
The input is the final geometry output in detection network.
We use 2*n numbers to denote the coordinate shift from n corner vertices of
the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi),
the geometry output contains 2*n channels.
PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(polygon_box_transform, ops::PolygonBoxTransformOp,
ops::PolygonBoxTransformOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(
polygon_box_transform,
ops::PolygonBoxTransformCPUKernel<paddle::platform::CPUPlace, float>,
ops::PolygonBoxTransformCPUKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/gpu_info.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using platform::PADDLE_CUDA_NUM_THREADS;
#define CUDA_BLOCK_SIZE 16
template <typename T>
__global__ void PolygonBoxTransformKernel(const int n, const int h, const int w,
const T* input, T* output) {
int id_n = threadIdx.x + blockDim.x * blockIdx.x;
int id_h = threadIdx.y + blockDim.y * blockIdx.y;
int id_w = threadIdx.z + blockDim.z * blockIdx.z;
if (id_n < n && id_h < h && id_w < w) {
int id = id_n * h * w + w * id_h + id_w;
if (id_n % 2 == 0) {
output[id] = id_w - input[id];
} else {
output[id] = id_h - input[id];
}
}
}
template <typename T>
class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use CUDAPlace.");
auto* in = ctx.Input<Tensor>("Input");
auto in_dims = in->dims();
const T* in_data = in->data<T>();
auto* out = ctx.Output<Tensor>("Output");
T* out_data = out->mutable_data<T>(ctx.GetPlace());
int batch_size = in_dims[0];
int geo_channels = in_dims[1];
int height = in_dims[2];
int width = in_dims[3];
dim3 threadsPerBlock(
PADDLE_CUDA_NUM_THREADS / (CUDA_BLOCK_SIZE * CUDA_BLOCK_SIZE),
CUDA_BLOCK_SIZE, CUDA_BLOCK_SIZE);
dim3 numBlocks((batch_size * geo_channels) / threadsPerBlock.x,
(height + threadsPerBlock.y - 1) / threadsPerBlock.y,
(width + threadsPerBlock.z - 1) / threadsPerBlock.z);
auto stream = ctx.cuda_device_context().stream();
PolygonBoxTransformKernel<T><<<numBlocks, threadsPerBlock, 0, stream>>>(
batch_size * geo_channels, height, width, in_data, out_data);
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_CUDA_KERNEL(
polygon_box_transform,
paddle::operators::PolygonBoxTransformOpCUDAKernel<float>,
paddle::operators::PolygonBoxTransformOpCUDAKernel<double>);
......@@ -24,6 +24,14 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* out = ctx.Output<framework::Tensor>("Out");
auto* in = ctx.Input<framework::LoDTensor>("Input");
if (in->lod().size() && ctx.Attr<int>("input_dim_idx") == 0) {
// set the correct batch size for the LoDTensor.
auto odims = out->dims();
int output_dim_idx = ctx.Attr<int>("output_dim_idx");
odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
out->mutable_data<T>(odims, ctx.GetPlace());
}
out->mutable_data<T>(ctx.GetPlace());
auto value = ctx.Attr<float>("value");
......
......@@ -46,7 +46,10 @@ class CrossEntropyFunctor<platform::CPUDeviceContext, T> {
const int64_t* label_data = labels->data<int64_t>();
for (int i = 0; i < batch_size; ++i) {
int index = i * class_num + label_data[i];
int lbl = label_data[i];
PADDLE_ENFORCE_GE(lbl, 0);
PADDLE_ENFORCE_LT(lbl, class_num);
int index = i * class_num + lbl;
loss_data[i] = -math::TolerableValue<T>()(std::log(prob_data[index]));
}
}
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include "unsupported/Eigen/CXX11/Tensor"
#include "paddle/function/EigenThreadDevice.h"
namespace paddle {
......@@ -70,25 +70,26 @@ struct EigenBlasGemm {
dims[0].first = transA ? 0 : 1;
dims[0].second = transB ? 1 : 0;
Eigen::DefaultDevice device;
auto* device = EigenDeviceWarpper::device();
if (N == ldc) {
if (alpha == T(1) && beta == T(0)) {
c.device(device) = a.contract(b, dims);
c.device(*device) = a.contract(b, dims);
} else if (alpha == T(1) && beta == T(1)) {
c.device(device) += a.contract(b, dims);
c.device(*device) += a.contract(b, dims);
} else {
c.device(device) = alpha * a.contract(b, dims) + beta * c;
c.device(*device) = alpha * a.contract(b, dims) + beta * c;
}
} else {
if (alpha == T(1) && beta == T(0)) {
c.slice(offsetC, extentC).device(device) = a.contract(b, dims);
c.slice(offsetC, extentC).device(*device) = a.contract(b, dims);
} else if (alpha == T(1) && beta == T(1)) {
c.slice(offsetC, extentC).device(device) += a.contract(b, dims);
c.slice(offsetC, extentC).device(*device) += a.contract(b, dims);
} else {
c.slice(offsetC, extentC).device(device) =
c.slice(offsetC, extentC).device(*device) =
alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC);
}
}
EigenDeviceWarpper::free_device(device);
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#if defined(__OSX__) || defined(__APPLE__)
#include <sys/sysctl.h>
#include <sys/types.h>
#endif
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
#if defined(__ANDROID__)
int GetCpuCount() {
FILE* fp = fopen("/sys/devices/system/cpu/possible", "r");
if (!fp) {
return 1;
}
int rank0, rank1;
int num = fscanf(fp, "%d-%d", &rank0, &rank1);
fclose(fp);
if (num < 2) return 1;
return rank1 + 1;
}
#elif defined(__OSX__) || defined(__APPLE__)
int GetCpuCount() {
int count = 0;
size_t len = sizeof(int);
sysctlbyname("hw.ncpu", &count, &len, NULL, 0);
return count > 0 ? count : 1;
}
#else
int GetCpuCount() { return 1; }
#endif
class EigenDeviceWarpper {
public: // NOLINT
#if EIGEN_USE_THREADS
static Eigen::ThreadPoolDevice* device() {
const int num_cpus = GetCpuCount();
const int num_threads = (num_cpus > 2) ? 2 : num_cpus;
static Eigen::ThreadPool tp(num_threads);
static Eigen::ThreadPoolDevice* device =
new Eigen::ThreadPoolDevice(&tp, num_threads);
return device;
}
static void free_device(Eigen::ThreadPoolDevice* device) {
// do nothing
}
#else
static Eigen::DefaultDevice* device() {
Eigen::DefaultDevice* device = new Eigen::DefaultDevice;
return device;
}
static void free_device(Eigen::DefaultDevice* device) { delete device; }
#endif
};
} // namespace paddle
......@@ -7,6 +7,10 @@ set(OPITMIZER_SRCS
sgd_optimizer.cc
)
cc_library(paddle_optimizer STATIC SRCS ${OPITMIZER_SRCS} DEPS paddle_proto glog)
cc_test(serialization_test SRCS serialization_test.cc DEPS paddle_proto)
cc_test(parameter_optimizer_test SRCS parameter_optimizer_test.cc DEPS paddle_optimizer)
add_library(paddle_optimizer ${OPITMIZER_SRCS})
target_link_libraries(paddle_optimizer paddle_proto glog)
if (WITH_TESTING)
add_unittest(serialization_test serialization_test.cc)
add_unittest(parameter_optimizer_test parameter_optimizer_test.cc)
endif()
#!/bin/bash
function cmake_gen() {
mkdir -p /paddle/build
cd /paddle/build
# build script will not fail if *.deb does not exist
rm *.deb 2>/dev/null || true
# delete previous built whl packages
rm -rf /paddle/paddle/dist 2>/dev/null || true
# Support build for all python versions, currently
# including cp27-cp27m and cp27-cp27mu.
PYTHON_FLAGS=""
if [ "$1" != "" ]; then
echo "using python abi: $1"
if [ "$1" == "cp27-cp27m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:}
export PATH=/opt/python/cp27-cp27m/bin/:${PATH}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
elif [ "$1" == "cp27-cp27mu" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:}
export PATH=/opt/python/cp27-cp27mu/bin/:${PATH}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
fi
fi
cat <<EOF
========================================
Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
${PYTHON_FLAGS}
-DWITH_DSO=ON
-DWITH_DOC=${WITH_DOC:-OFF}
-DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF}
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF}
-DWITH_MKL=${WITH_MKL:-ON}
-DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
-DWITH_SWIG_PY=ON
-DWITH_C_API=${WITH_C_API:-OFF}
-DWITH_PYTHON=${WITH_PYTHON:-ON}
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
-DCUDNN_ROOT=/usr/
-DWITH_TESTING=${WITH_TESTING:-ON}
-DWITH_FAST_BUNDLE_TEST=ON
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF}
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
# docker environment is fully controlled by this script.
# See /Paddle/CMakeLists.txt, UNITTEST_USE_VIRTUALENV option.
cmake .. \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \
${PYTHON_FLAGS} \
-DWITH_DSO=ON \
-DWITH_DOC=${WITH_DOC:-OFF} \
-DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} \
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \
-DWITH_MKL=${WITH_MKL:-ON} \
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
-DWITH_C_API=${WITH_C_API:-OFF} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \
-DCUDNN_ROOT=/usr/ \
-DWITH_TESTING=${WITH_TESTING:-ON} \
-DWITH_FAST_BUNDLE_TEST=ON \
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
}
function run_build() {
cat <<EOF
============================================
Building in /paddle/build ...
============================================
EOF
make clean
make -j `nproc`
}
function run_test() {
if [ ${WITH_TESTING:-ON} == "ON" ] && [ ${RUN_TEST:-OFF} == "ON" ] ; then
cat <<EOF
========================================
Running unit tests ...
========================================
EOF
ctest --output-on-failure
# make install should also be test when unittest
make install -j `nproc`
pip install /usr/local/opt/paddle/share/wheels/*.whl
if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then
paddle version
fi
fi
}
function gen_docs() {
if [[ ${WITH_DOC:-OFF} == "ON" ]]; then
cat <<EOF
========================================
Building documentation ...
In /paddle/build_doc
========================================
EOF
mkdir -p /paddle/build_doc
pushd /paddle/build_doc
cmake .. \
-DWITH_DOC=ON \
-DWITH_GPU=OFF \
-DWITH_AVX=${WITH_AVX:-ON} \
-DWITH_SWIG_PY=ON
make -j `nproc` paddle_docs paddle_apis
popd
fi
if [[ ${WOBOQ:-OFF} == 'ON' ]]; then
cat <<EOF
========================================
Converting C++ source code into HTML ...
========================================
EOF
export WOBOQ_OUT=/paddle/build/woboq_out
mkdir -p $WOBOQ_OUT
cp -rv /woboq/data $WOBOQ_OUT/../data
/woboq/generator/codebrowser_generator \
-b /paddle/build \
-a \
-o $WOBOQ_OUT \
-p paddle:/paddle
/woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT
fi
}
function gen_dockerfile() {
# Set BASE_IMAGE according to env variables
if [[ ${WITH_GPU} == "ON" ]]; then
BASE_IMAGE="nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04"
else
BASE_IMAGE="ubuntu:16.04"
fi
DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:\${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi
cat <<EOF
========================================
Generate /paddle/build/Dockerfile ...
========================================
EOF
cat > /paddle/build/Dockerfile <<EOF
FROM ${BASE_IMAGE}
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV HOME /root
EOF
if [[ ${WITH_GPU} == "ON" ]]; then
NCCL_DEPS="apt-get install -y libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 &&"
else
NCCL_DEPS=""
fi
if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then
PADDLE_VERSION="paddle version"
CMD='"paddle", "version"'
else
PADDLE_VERSION="true"
CMD='"true"'
fi
cat >> /paddle/build/Dockerfile <<EOF
ADD python/dist/*.whl /
# run paddle version to install python packages first
RUN apt-get update &&\
${NCCL_DEPS}\
apt-get install -y wget python-pip dmidecode python-tk && easy_install -U pip && \
pip install /*.whl; apt-get install -f -y && \
apt-get clean -y && \
rm -f /*.whl && \
${PADDLE_VERSION} && \
ldconfig
${DOCKERFILE_CUDNN_DSO}
${DOCKERFILE_GPU_ENV}
ENV NCCL_LAUNCH_MODE PARALLEL
EOF
if [[ ${WITH_GOLANG:-OFF} == "ON" ]]; then
cat >> /paddle/build/Dockerfile <<EOF
ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/
EOF
fi
cat >> /paddle/build/Dockerfile <<EOF
# default command shows the paddle version and exit
CMD [${CMD}]
EOF
}
function gen_capi_package() {
if [[ ${WITH_C_API} == "ON" ]]; then
install_prefix="/paddle/build/capi_output"
rm -rf $install_prefix
make DESTDIR="$install_prefix" install
cd $install_prefix/usr/local
ls | egrep -v "^Found.*item$" | xargs tar -cf /paddle/build/paddle.tgz
fi
}
function gen_fluid_inference_lib() {
if [ ${WITH_C_API:-OFF} == "OFF" ] ; then
cat <<EOF
========================================
Deploying fluid inference library ...
========================================
EOF
make -j `nproc` inference_lib_dist
fi
}
set -xe
cmake_gen ${PYTHON_ABI:-""}
run_build
run_test
gen_docs
gen_dockerfile
gen_capi_package
gen_fluid_inference_lib
if [[ ${WITH_C_API:-OFF} == "ON" ]]; then
printf "PaddlePaddle C-API libraries was generated on build/paddle.tgz\n"
else
printf "If you need to install PaddlePaddle in develop docker image,"
printf "please make install or pip install build/python/dist/*.whl.\n"
fi
#!/bin/bash
set -xe
if [ $ANDROID_ABI == "arm64-v8a" ]; then
ANDROID_ARCH=arm64
if [ $ANDROID_API -lt 21 ]; then
echo "Warning: arm64-v8a requires ANDROID_API >= 21."
ANDROID_API=21
fi
else # armeabi, armeabi-v7a
ANDROID_ARCH=arm
fi
ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API
cat <<EOF
============================================
Generating the standalone toolchain ...
${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh
--arch=$ANDROID_ARCH
--platform=android-$ANDROID_API
--install-dir=${ANDROID_STANDALONE_TOOLCHAIN}
============================================
EOF
${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh \
--arch=$ANDROID_ARCH \
--platform=android-$ANDROID_API \
--install-dir=$ANDROID_STANDALONE_TOOLCHAIN
BUILD_ROOT=/paddle/build_android
DEST_ROOT=/paddle/install_android
mkdir -p $BUILD_ROOT
cd $BUILD_ROOT
if [ $ANDROID_ABI == "armeabi-v7a" ]; then
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
-DANDROID_ABI=$ANDROID_ABI \
-DANDROID_ARM_NEON=ON \
-DANDROID_ARM_MODE=ON \
-DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DUSE_EIGEN_FOR_BLAS=ON \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
elif [ $ANDROID_ABI == "arm64-v8a" ]; then
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
-DANDROID_ABI=$ANDROID_ABI \
-DANDROID_ARM_MODE=ON \
-DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DUSE_EIGEN_FOR_BLAS=OFF \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
elif [ $ANDROID_ABI == "armeabi" ]; then
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
-DANDROID_ABI=$ANDROID_ABI \
-DANDROID_ARM_MODE=ON \
-DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
else
echo "Invalid ANDROID_ABI: $ANDROID_ABI"
fi
cat <<EOF
============================================
Building in $BUILD_ROOT ...
============================================
EOF
make -j `nproc`
make install -j `nproc`
#!/bin/bash
/usr/sbin/sshd -D &
jupyter notebook --ip=0.0.0.0 /paddle/book/
#!/bin/bash
set -e
# the number of process to run tests
NUM_PROC=6
# calculate and set the memory usage for each process
MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`)
export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE
# get the CUDA device count
CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
for (( i = 0; i < $NUM_PROC; i++ )); do
cuda_list=()
for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do
s=$[i+j]
n=$[s%CUDA_DEVICE_COUNT]
if [ $j -eq 0 ]; then
cuda_list=("$n")
else
cuda_list="$cuda_list,$n"
fi
done
echo $cuda_list
# CUDA_VISIBLE_DEVICES http://acceleware.com/blog/cudavisibledevices-masking-gpus
# ctest -I https://cmake.org/cmake/help/v3.0/manual/ctest.1.html?highlight=ctest
env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC --output-on-failure &
done
wait
......@@ -104,6 +104,8 @@ function cmake_gen() {
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF}
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=ON
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
......@@ -129,7 +131,8 @@ EOF
-DWITH_FAST_BUNDLE_TEST=ON \
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=ON
}
function abort(){
......
#!/bin/bash
set -e
# Create the build directory for CMake.
mkdir -p $TRAVIS_BUILD_DIR/build
cd $TRAVIS_BUILD_DIR/build
# Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
make -j `nproc` paddle_docs paddle_apis
# check websites for broken links
linkchecker doc/v2/en/html/index.html
linkchecker doc/v2/cn/html/index.html
linkchecker doc/v2/api/en/html/index.html
#!/bin/bash
set -e
# Create the build directory for CMake.
mkdir -p $TRAVIS_BUILD_DIR/build_ios
cd $TRAVIS_BUILD_DIR/build_ios
# Compile paddle binaries
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=OS \
-DCMAKE_OSX_ARCHITECTURES="arm64" \
-DWITH_C_API=ON \
-DUSE_EIGEN_FOR_BLAS=ON \
-DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \
-DCMAKE_BUILD_TYPE=Release \
..
make -j 2
#!/bin/bash
function abort(){
echo "Your change doesn't follow PaddlePaddle's code style." 1>&2
echo "Please use pre-commit to check what is wrong." 1>&2
exit 1
}
trap 'abort' 0
set -e
# install glide
curl https://glide.sh/get | bash
eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
# set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle
cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd -
go get github.com/alecthomas/gometalinter
gometalinter --install
cd $TRAVIS_BUILD_DIR
export PATH=/usr/bin:$PATH
pre-commit install
clang-format --version
if ! pre-commit run -a ; then
git diff
exit 1
fi
trap : 0
......@@ -23,7 +23,7 @@ from ..executor import global_scope
__all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file',
'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer',
'Preprocessor'
'random_data_generator', 'Preprocessor'
]
......
......@@ -81,7 +81,7 @@ __all__ = [
'label_smooth',
'roi_pool',
'dice_loss',
'bilinear_interp',
'upsampling_bilinear2d',
]
......@@ -3917,8 +3917,10 @@ def dice_loss(input, label, epsilon=0.00001):
return reduce_mean(dice_score)
def bilinear_interp(input, out_h, out_w, name=None):
def upsampling_bilinear2d(input, out_shape=None, scale=None, name=None):
"""
The mathematical meaning of upsampling_bilinear2d is also called
Bilinear interpolation.
Bilinear interpolation is an extension of linear interpolation for
interpolating functions of two variables (e.g. H-direction and
W-direction in this layer) on a rectilinear 2D grid.
......@@ -3930,8 +3932,13 @@ def bilinear_interp(input, out_h, out_w, name=None):
input (Variable): The input tensor of bilinear interpolation,
This is a 4-D tensor of the shape
(num_batches, channels, in_h, in_w).
out_h (int): output height of bilinear interpolation layer.
out_w (int): output width of bilinear interpolation layer.
out_shape(list|tuple|None): Output shape of bilinear interpolation
layer, the shape is (out_h, out_w).
Default: None
scale(int|None): The multiplier for the input height or width.
At least one of out_shape or scale must be set.
And out_shape has a higher priority than scale.
Default: None
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
......@@ -3942,10 +3949,27 @@ def bilinear_interp(input, out_h, out_w, name=None):
Examples:
.. code-block:: python
out = fluid.layers.bilinear_interp(input, out_h=12, out_w=12)
out = fluid.layers.bilinear_interp(input, out_shape=[12, 12])
"""
if out_shape is None and scale is None:
raise ValueError("One of out_shape and scale must not be None")
helper = LayerHelper('bilinear_interp', **locals())
dtype = helper.input_dtype()
def _is_list_or_turple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if out_shape is not None:
if not (_is_list_or_turple_(out_shape) and len(out_shape) == 2):
raise ValueError('out_shape should be a list or tuple ',
'with length 2, (out_h, out_w).')
out_shape = list(map(int, out_shape))
out_h = out_shape[0]
out_w = out_shape[1]
else:
out_h = int(input.shape[2] * scale)
out_w = int(input.shape[3] * scale)
out = helper.create_tmp_variable(dtype)
helper.append_op(
type="bilinear_interp",
......
......@@ -93,12 +93,12 @@ def _convert_lod(lod):
def create_lod_tensor(data, lod, place):
"""Create a lod tensor from a numpy array or an existing lod tensor.
"""Create a lod tensor from a numpy array, a list, or an existing lod tensor.
Create a lod tensor by doing the following:
1. Check that the length-based input lod is valid.
2. Convert the length-based lod to a offset-based LoD.
3. Copy the data from a numpy array or a existing lod tensor to
3. Copy the data from a numpy array, a list or a existing lod tensor to
CPU or GPU device (based on input place).
4. Set the level of detail (LoD) using the offset-based LoD.
......@@ -117,7 +117,7 @@ def create_lod_tensor(data, lod, place):
for more details regarding LoD.
Args:
data: a numpy array or a LoDTensor holding the data to be copied.
data: a numpy array or a LoDTensor or a list holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
......@@ -126,6 +126,18 @@ def create_lod_tensor(data, lod, place):
"""
if isinstance(data, core.LoDTensor):
return create_lod_tensor(np.array(data), lod, place)
elif isinstance(data, list):
# When input data is a list, it only deal with the case where the base element
# is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated
# LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number
# of words or other indexes in the sequence.
new_lod = []
for seq in data:
new_lod.append(len(seq))
assert [new_lod] == lod, "data and lod do not match"
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
return create_lod_tensor(flattened_data, lod, place)
elif isinstance(data, np.ndarray):
assert _validate_lod(lod,
data.shape[0]), "the provided lod info is invalid"
......@@ -134,9 +146,8 @@ def create_lod_tensor(data, lod, place):
tensor.set_lod(_convert_lod(lod))
return tensor
else:
raise Exception(
"data should be either a LoDTensor or a Numpy array, but you pass type %s instead"
% (type(data)))
raise TypeError(
"data should be either a LoDTensor, a Numpy array or a list")
def create_random_int_lodtensor(lod, base_shape, place, low, high):
......
......@@ -48,7 +48,7 @@ def linear():
return avg_loss
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
......@@ -68,8 +68,8 @@ def train(use_cuda, train_program, save_dirname):
['15.343549569447836']
...
'''
if save_dirname is not None:
trainer.save_params(save_dirname)
if params_dirname is not None:
trainer.save_params(params_dirname)
trainer.stop()
trainer.train(
......@@ -80,13 +80,13 @@ def train(use_cuda, train_program, save_dirname):
# infer
def infer(use_cuda, inference_program, save_dirname=None):
if save_dirname is None:
def infer(use_cuda, inference_program, params_dirname=None):
if params_dirname is None:
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 10
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
......@@ -100,10 +100,10 @@ def main(use_cuda):
return
# Directory for saving the trained model
save_dirname = "fit_a_line.inference.model"
params_dirname = "fit_a_line.inference.model"
train(use_cuda, linear, save_dirname)
infer(use_cuda, inference_program, save_dirname)
train(use_cuda, linear, params_dirname)
infer(use_cuda, inference_program, params_dirname)
class TestFitALine(unittest.TestCase):
......
......@@ -85,7 +85,7 @@ def train_network():
return [avg_cost, accuracy]
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
BATCH_SIZE = 128
EPOCH_NUM = 1
......@@ -105,8 +105,8 @@ def train(use_cuda, train_program, save_dirname):
print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
if accuracy > 0.01: # Low threshold for speeding up CI
if save_dirname is not None:
trainer.save_params(save_dirname)
if params_dirname is not None:
trainer.save_params(params_dirname)
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
......@@ -122,10 +122,10 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['pixel', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
infer_func=inference_program, param_path=params_dirname, place=place)
# The input's dimension of conv should be 4-D or 5-D.
# Use normilized image pixels as input data, which should be in the range
......@@ -142,12 +142,14 @@ def main(use_cuda):
save_path = "image_classification_resnet.inference.model"
train(
use_cuda=use_cuda, train_program=train_network, save_dirname=save_path)
use_cuda=use_cuda,
train_program=train_network,
params_dirname=save_path)
infer(
use_cuda=use_cuda,
inference_program=inference_network,
save_dirname=save_path)
params_dirname=save_path)
if __name__ == '__main__':
......
......@@ -64,7 +64,7 @@ def train_network():
return [avg_cost, accuracy]
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
BATCH_SIZE = 128
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -82,8 +82,8 @@ def train(use_cuda, train_program, save_dirname):
print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy))
if accuracy > 0.01: # Low threshold for speeding up CI
if save_dirname is not None:
trainer.save_params(save_dirname)
if params_dirname is not None:
trainer.save_params(params_dirname)
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
......@@ -99,10 +99,10 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['pixel', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
infer_func=inference_program, param_path=params_dirname, place=place)
# The input's dimension of conv should be 4-D or 5-D.
# Use normilized image pixels as input data, which should be in the range
......@@ -119,12 +119,14 @@ def main(use_cuda):
save_path = "image_classification_vgg.inference.model"
train(
use_cuda=use_cuda, train_program=train_network, save_dirname=save_path)
use_cuda=use_cuda,
train_program=train_network,
params_dirname=save_path)
infer(
use_cuda=use_cuda,
inference_program=inference_network,
save_dirname=save_path)
params_dirname=save_path)
if __name__ == '__main__':
......
......@@ -141,7 +141,7 @@ def train_program():
return [avg_cost]
def train(use_cuda, train_program, save_path):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
......@@ -172,7 +172,7 @@ def train(use_cuda, train_program, save_path):
print("avg_cost: %s" % avg_cost)
if float(avg_cost) < 100.0: # Large value to increase CI speed
trainer.save_params(save_path)
trainer.save_params(params_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
float(avg_cost)))
......@@ -183,7 +183,7 @@ def train(use_cuda, train_program, save_path):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
trainer.save_params(save_path)
trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
......@@ -197,10 +197,10 @@ def train(use_cuda, train_program, save_path):
feed_order=feed_order)
def infer(use_cuda, inference_program, save_path):
def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inference_program, param_path=save_path, place=place)
inference_program, param_path=params_dirname, place=place)
# Setup inputs by creating LoDTensors to represent sequences of words.
# Here each word is the basic element of these LoDTensors and the shape of
......@@ -251,9 +251,9 @@ def infer(use_cuda, inference_program, save_path):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "label_semantic_roles.inference.model"
train(use_cuda, train_program, save_path)
infer(use_cuda, inference_program, save_path)
params_dirname = "label_semantic_roles.inference.model"
train(use_cuda, train_program, params_dirname)
infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
......
......@@ -57,7 +57,7 @@ def train_program():
return [avg_cost, acc]
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
......@@ -78,7 +78,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, avg_cost, acc))
......@@ -100,11 +100,11 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['img', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
......@@ -116,17 +116,17 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
save_dirname = "recognize_digits_conv.inference.model"
params_dirname = "recognize_digits_conv.inference.model"
# call train() with is_local argument to run distributed train
train(
use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)
params_dirname=params_dirname)
if __name__ == '__main__':
......
......@@ -44,7 +44,7 @@ def train_program():
return [avg_cost, acc]
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
......@@ -62,7 +62,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
else:
print('BatchID {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format(
event.epoch + 1, avg_cost, acc))
......@@ -81,11 +81,11 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['img', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 1
tensor_img = numpy.random.uniform(-1.0, 1.0,
......@@ -97,17 +97,17 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
save_dirname = "recognize_digits_mlp.inference.model"
params_dirname = "recognize_digits_mlp.inference.model"
# call train() with is_local argument to run distributed train
train(
use_cuda=use_cuda,
train_program=train_program,
save_dirname=save_dirname)
params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_dirname=save_dirname)
params_dirname=params_dirname)
if __name__ == '__main__':
......
......@@ -155,7 +155,7 @@ def train_program():
return [avg_cost, scale_infer]
def train(use_cuda, train_program, save_path):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.SGD(learning_rate=0.2)
......@@ -180,7 +180,7 @@ def train(use_cuda, train_program, save_path):
print("avg_cost: %s" % avg_cost)
if float(avg_cost) < 4: # Smaller value to increase CI speed
trainer.save_params(save_path)
trainer.save_params(params_dirname)
trainer.stop()
else:
print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
......@@ -197,43 +197,30 @@ def train(use_cuda, train_program, save_path):
num_epochs=1,
event_handler=event_handler,
reader=train_reader,
feed_order=[
'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id',
'category_id', 'movie_title', 'score'
])
feed_order=feed_order)
def infer(use_cuda, inference_program, save_path):
def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inference_program, param_path=save_path, place=place)
def create_lod_tensor(data, lod=None):
tensor = fluid.LoDTensor()
if lod is None:
# Tensor, the shape is [batch_size, 1]
index = 0
lod_0 = [index]
for l in range(len(data)):
index += 1
lod_0.append(index)
lod = [lod_0]
tensor.set_lod(lod)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
tensor.set(flattened_data, place)
return tensor
# Generate a random input for inference
user_id = create_lod_tensor([[1]])
gender_id = create_lod_tensor([[1]])
age_id = create_lod_tensor([[0]])
job_id = create_lod_tensor([[10]])
movie_id = create_lod_tensor([[783]])
category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
[[0, 5]])
inference_program, param_path=params_dirname, place=place)
# Use the first data from paddle.dataset.movielens.test() as input.
# Use create_lod_tensor(data, lod, place) API to generate LoD Tensor,
# where `data` is a list of sequences of index numbers, `lod` is
# the level of detail (lod) info associated with `data`.
# For example, data = [[10, 2, 3], [2, 3]] means that it contains
# two sequences of indexes, of length 3 and 2, respectively.
# Correspondingly, lod = [[3, 2]] contains one level of detail info,
# indicating that `data` consists of two sequences of length 3 and 2.
user_id = fluid.create_lod_tensor([[1]], [[1]], place)
gender_id = fluid.create_lod_tensor([[1]], [[1]], place)
age_id = fluid.create_lod_tensor([[0]], [[1]], place)
job_id = fluid.create_lod_tensor([[10]], [[1]], place)
movie_id = fluid.create_lod_tensor([[783]], [[1]], place)
category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place)
movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]],
place)
results = inferencer.infer(
{
......@@ -253,12 +240,15 @@ def infer(use_cuda, inference_program, save_path):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "recommender_system.inference.model"
train(use_cuda=use_cuda, train_program=train_program, save_path=save_path)
params_dirname = "recommender_system.inference.model"
train(
use_cuda=use_cuda,
train_program=train_program,
params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=inference_program,
save_path=save_path)
params_dirname=params_dirname)
if __name__ == '__main__':
......
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# This test is buggy
# py_test(test_understand_sentiment_dynamic_rnn SRCS
# test_understand_sentiment_dynamic_rnn.py SERIAL)
LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn)
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
......
......@@ -64,7 +64,7 @@ def train_program(word_dict):
return [avg_cost, accuracy]
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
......@@ -85,7 +85,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
trainer.stop()
else:
......@@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
......@@ -112,13 +112,13 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['words', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=save_dirname,
param_path=params_dirname,
place=place)
# Setup input by creating LoDTensor to represent sequence of words.
......@@ -143,9 +143,9 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "understand_sentiment_conv.inference.model"
train(use_cuda, train_program, save_path)
infer(use_cuda, inference_program, save_path)
params_dirname = "understand_sentiment_conv.inference.model"
train(use_cuda, train_program, params_dirname)
infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
......
......@@ -79,7 +79,7 @@ def train_program(word_dict):
return [avg_cost, accuracy]
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
......@@ -100,7 +100,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
trainer.stop()
else:
......@@ -112,7 +112,7 @@ def train(use_cuda, train_program, save_dirname):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
......@@ -127,13 +127,13 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['words', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=save_dirname,
param_path=params_dirname,
place=place)
# Setup input by creating LoDTensor to represent sequence of words.
......@@ -158,9 +158,9 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "understand_sentiment_conv.inference.model"
train(use_cuda, train_program, save_path)
infer(use_cuda, inference_program, save_path)
params_dirname = "understand_sentiment_conv.inference.model"
train(use_cuda, train_program, params_dirname)
infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
......
......@@ -71,7 +71,7 @@ def train_program(word_dict):
return [avg_cost, accuracy]
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
optimizer = fluid.optimizer.Adagrad(learning_rate=0.002)
......@@ -92,7 +92,7 @@ def train(use_cuda, train_program, save_dirname):
print("acc : %s" % acc)
if acc > 0.2: # Smaller value to increase CI speed
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
trainer.stop()
else:
......@@ -104,7 +104,7 @@ def train(use_cuda, train_program, save_dirname):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, map(np.array, event.metrics)))
if event.step == 1: # Run 2 iterations to speed CI
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
trainer.stop()
train_reader = paddle.batch(
......@@ -119,13 +119,13 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['words', 'label'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=save_dirname,
param_path=params_dirname,
place=place)
# Setup input by creating LoDTensor to represent sequence of words.
......@@ -150,9 +150,9 @@ def infer(use_cuda, inference_program, save_dirname=None):
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "understand_sentiment_stacked_lstm.inference.model"
train(use_cuda, train_program, save_path)
infer(use_cuda, inference_program, save_path)
params_dirname = "understand_sentiment_stacked_lstm.inference.model"
train(use_cuda, train_program, params_dirname)
infer(use_cuda, inference_program, params_dirname)
if __name__ == '__main__':
......
......@@ -80,7 +80,7 @@ def train_program(is_sparse):
return avg_cost
def train(use_cuda, train_program, save_dirname):
def train(use_cuda, train_program, params_dirname):
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
test_reader = paddle.batch(
......@@ -97,7 +97,7 @@ def train(use_cuda, train_program, save_dirname):
print("loss= ", avg_cost)
if avg_cost < 10.0:
trainer.save_params(save_dirname)
trainer.save_params(params_dirname)
trainer.stop()
if math.isnan(avg_cost):
......@@ -115,10 +115,10 @@ def train(use_cuda, train_program, save_dirname):
feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
def infer(use_cuda, inference_program, save_dirname=None):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path=save_dirname, place=place)
infer_func=inference_program, param_path=params_dirname, place=place)
# Setup inputs by creating 4 LoDTensors representing 4 words. Here each word
# is simply an index to look up for the corresponding word vector and hence
......@@ -153,17 +153,17 @@ def main(use_cuda, is_sparse):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
save_path = "word2vec.inference.model"
params_dirname = "word2vec.inference.model"
train(
use_cuda=use_cuda,
train_program=partial(train_program, is_sparse),
save_dirname=save_path)
params_dirname=params_dirname)
infer(
use_cuda=use_cuda,
inference_program=partial(inference_program, is_sparse),
save_dirname=save_path)
params_dirname=params_dirname)
if __name__ == '__main__':
......
......@@ -173,63 +173,33 @@ def train(use_cuda, save_dirname, is_local=True):
test_reader = paddle.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def func_feed(feeding, data):
feed_tensors = {}
for (key, idx) in feeding.iteritems():
tensor = fluid.LoDTensor()
if key != "category_id" and key != "movie_title":
if key == "score":
numpy_data = np.array(map(lambda x: x[idx], data)).astype(
"float32")
else:
numpy_data = np.array(map(lambda x: x[idx], data)).astype(
"int64")
else:
numpy_data = map(lambda x: np.array(x[idx]).astype("int64"),
data)
lod_info = [len(item) for item in numpy_data]
offset = 0
lod = [offset]
for item in lod_info:
offset += item
lod.append(offset)
numpy_data = np.concatenate(numpy_data, axis=0)
tensor.set_lod([lod])
numpy_data = numpy_data.reshape([numpy_data.shape[0], 1])
tensor.set(numpy_data, place)
feed_tensors[key] = tensor
return feed_tensors
feed_order = [
'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id',
'movie_title', 'score'
]
def train_loop(main_program):
exe.run(framework.default_startup_program())
feed_list = [
main_program.global_block().var(var_name) for var_name in feed_order
]
feeder = fluid.DataFeeder(feed_list, place)
PASS_NUM = 100
for pass_id in range(PASS_NUM):
for batch_id, data in enumerate(train_reader()):
# train a mini-batch
outs = exe.run(program=main_program,
feed=func_feed(feeding, data),
feed=feeder.feed(data),
fetch_list=[avg_cost])
out = np.array(outs[0])
if (batch_id + 1) % 10 == 0:
avg_cost_set = []
for test_data in test_reader():
avg_cost_np = exe.run(
program=test_program,
feed=func_feed(feeding, test_data),
fetch_list=[avg_cost])
avg_cost_np = exe.run(program=test_program,
feed=feeder.feed(test_data),
fetch_list=[avg_cost])
avg_cost_set.append(avg_cost_np[0])
break # test only 1 segment for speeding up CI
......@@ -279,23 +249,6 @@ def infer(use_cuda, save_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
def create_lod_tensor(data, lod=None):
tensor = fluid.LoDTensor()
if lod is None:
# Tensor, the shape is [batch_size, 1]
index = 0
lod_0 = [index]
for l in range(len(data)):
index += 1
lod_0.append(index)
lod = [lod_0]
tensor.set_lod(lod)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
tensor.set(flattened_data, place)
return tensor
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc,
......@@ -307,26 +260,33 @@ def infer(use_cuda, save_dirname=None):
# Use the first data from paddle.dataset.movielens.test() as input
assert feed_target_names[0] == "user_id"
user_id = create_lod_tensor([[1]])
# Use create_lod_tensor(data, lod, place) API to generate LoD Tensor
# where `data` is a list of sequences of index numbers, `lod` is
# the level of detail (lod) info associated with `data`.
# For example, data = [[10, 2, 3], [2, 3]] means that it contains
# two sequences of indexes, of length 3 and 2, respectively.
# Correspondingly, lod = [[3, 2]] contains one level of detail info,
# indicating that `data` consists of two sequences of length 3 and 2.
user_id = fluid.create_lod_tensor([[1]], [[1]], place)
assert feed_target_names[1] == "gender_id"
gender_id = create_lod_tensor([[1]])
gender_id = fluid.create_lod_tensor([[1]], [[1]], place)
assert feed_target_names[2] == "age_id"
age_id = create_lod_tensor([[0]])
age_id = fluid.create_lod_tensor([[0]], [[1]], place)
assert feed_target_names[3] == "job_id"
job_id = create_lod_tensor([[10]])
job_id = fluid.create_lod_tensor([[10]], [[1]], place)
assert feed_target_names[4] == "movie_id"
movie_id = create_lod_tensor([[783]])
movie_id = fluid.create_lod_tensor([[783]], [[1]], place)
assert feed_target_names[5] == "category_id"
category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place)
assert feed_target_names[6] == "movie_title"
movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
[[0, 5]])
movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]],
[[5]], place)
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
......
......@@ -53,11 +53,14 @@ class TestLoDTensor(unittest.TestCase):
self.assertEqual(_convert_lod(lod), converted_lod)
def test_create_lod_tensor(self):
# Only numpy array or a fluid LoDTensor is valid input to
# create_lod_tensor function, currently a list of lists is not.
data = [[1, 2], [3, 4]]
self.assertRaises(Exception, create_lod_tensor, data, [],
# Create LoDTensor from a list
data = [[1, 2, 3], [3, 4]]
wrong_lod = [[2, 2]]
correct_lod = [[3, 2]]
self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod,
fluid.CPUPlace())
tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace())
self.assertEqual(tensor.lod(), [[0, 3, 5]])
# Create LoDTensor from numpy array
data = numpy.random.random([10, 1])
......
......@@ -479,9 +479,9 @@ class OpTest(unittest.TestCase):
def np_dtype_to_fluid_dtype(input):
"""Change the dtype of float16 numpy array
numpy float16 is binded to paddle::platform::float16
numpy float16 is binded to paddle::platform::float16
in tensor_py.h via the help of uint16 data type since
the internal memory representation of float16 is
the internal memory representation of float16 is
uint16_t in paddle and np.uint16 in numpy, which are
themselves binded together by pybind.
......@@ -489,9 +489,9 @@ class OpTest(unittest.TestCase):
input: input numpy array
Returns:
input: The dtype of input will be changed to np.uint16 if
input: The dtype of input will be changed to np.uint16 if
it is originally np.float16, such that the internal memory
of input will be reinterpreted as of dtype np.uint16.
of input will be reinterpreted as of dtype np.uint16.
"""
if input.dtype == np.float16:
input.dtype = np.uint16
......
......@@ -50,5 +50,27 @@ class TestFillConstantBatchSizeLikeWhenSecondDimIsBatchSize(OpTest):
self.check_output()
class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest):
def setUp(self):
self.op_type = "fill_constant_batch_size_like"
self.inputs = {
'Input': (np.random.random((31, 28)).astype("float32"),
[[0, 9, 23, 31]])
}
self.attrs = {
'value': 3.5,
'shape': [-1, 16],
'input_dim_idx': 0,
'output_dim_idx': 0
}
out = np.random.random((3, 16)).astype("float32")
out.fill(3.5)
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
......@@ -369,11 +369,13 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(output)
print(str(program))
def test_bilinear_interp(self):
def test_upsampling_bilinear2d(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[3, 9, 6], dtype="float32")
output = layers.bilinear_interp(x, 12, 12)
output = layers.upsampling_bilinear2d(x, out_shape=[12, 12])
self.assertIsNotNone(output)
output = layers.upsampling_bilinear2d(x, scale=3)
self.assertIsNotNone(output)
print(str(program))
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
def PolygonBoxRestore(input):
shape = input.shape
batch_size = shape[0]
geo_channels = shape[1]
h = shape[2]
w = shape[3]
h_indexes = np.array(range(h) * w).reshape(
[w, h]).transpose()[np.newaxis, :] # [1, h, w]
w_indexes = np.array(range(w) * h).reshape(
[h, w])[np.newaxis, :] # [1, h, w]
indexes = np.concatenate(
(w_indexes, h_indexes))[np.newaxis, :] # [1, 2, h, w]
indexes = indexes.repeat(
[geo_channels / 2],
axis=0)[np.newaxis, :] # [1, geo_channels/2, 2, h, w]
indexes = indexes.repeat(
[batch_size], axis=0) # [batch_size, geo_channels/2, 2, h, w]
return indexes.reshape(
input.shape) - input # [batch_size, geo_channels, h, w]
class TestPolygonBoxRestoreOp(OpTest):
def config(self):
self.input_shape = (1, 8, 2, 2)
def setUp(self):
self.config()
self.op_type = "polygon_box_transform"
input = np.random.random(self.input_shape).astype("float32")
self.inputs = {'Input': input}
output = PolygonBoxRestore(input)
self.outputs = {'Output': output}
def test_check_output(self):
self.check_output()
class TestCase1(TestPolygonBoxRestoreOp):
def config(self):
self.input_shape = (2, 10, 3, 2)
class TestCase2(TestPolygonBoxRestoreOp):
def config(self):
self.input_shape = (3, 12, 4, 5)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册