提交 f91422ec 编写于 作者: C chengduoZH

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into refine_im2col

......@@ -98,7 +98,7 @@ IF(NOT ${CBLAS_FOUND})
ENDIF()
INSTALL(CODE "execute_process(
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CBLAS_INSTALL_DIR}/lib
destination ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}
${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}
)"
)
INSTALL(CODE "MESSAGE(STATUS \"Installing: \"
......
......@@ -100,8 +100,9 @@ void ROIPoolLayer::forward(PassType passType) {
size_t roiEndH = round(bottomROIs[4] * spatialScale_);
CHECK_GE(roiBatchIdx, 0UL);
CHECK_LT(roiBatchIdx, batchSize);
size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL);
size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL);
size_t roiHeight =
std::max(roiEndH - roiStartH + 1, static_cast<size_t>(1));
size_t roiWidth = std::max(roiEndW - roiStartW + 1, static_cast<size_t>(1));
real binSizeH =
static_cast<real>(roiHeight) / static_cast<real>(pooledHeight_);
real binSizeW =
......@@ -114,10 +115,14 @@ void ROIPoolLayer::forward(PassType passType) {
size_t wstart = static_cast<size_t>(std::floor(pw * binSizeW));
size_t hend = static_cast<size_t>(std::ceil((ph + 1) * binSizeH));
size_t wend = static_cast<size_t>(std::ceil((pw + 1) * binSizeW));
hstart = std::min(std::max(hstart + roiStartH, 0UL), height_);
wstart = std::min(std::max(wstart + roiStartW, 0UL), width_);
hend = std::min(std::max(hend + roiStartH, 0UL), height_);
wend = std::min(std::max(wend + roiStartW, 0UL), width_);
hstart = std::min(
std::max(hstart + roiStartH, static_cast<size_t>(0)), height_);
wstart = std::min(
std::max(wstart + roiStartW, static_cast<size_t>(0)), width_);
hend = std::min(std::max(hend + roiStartH, static_cast<size_t>(0)),
height_);
wend = std::min(std::max(wend + roiStartW, static_cast<size_t>(0)),
width_);
bool isEmpty = (hend <= hstart) || (wend <= wstart);
size_t poolIndex = ph * pooledWidth_ + pw;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/beam_search_op.h"
#include <map>
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
framework::LoDTensor *selected_ids,
framework::LoDTensor *selected_scores) {
auto items = SelectTopBeamSizeItems();
auto selected_items = ToMap(items);
PruneEndidCandidates(pre_ids, &selected_items);
// calculate the output tensor's height
size_t num_instances = std::accumulate(
std::begin(items), std::end(items), 0,
[](size_t a, std::vector<Item> &b) { return a + b.size(); });
// the output tensor shape should be [num_instances, 1]
auto dims = framework::make_ddim(
std::vector<int64_t>({static_cast<int>(num_instances), 1}));
selected_ids->Resize(dims);
selected_scores->Resize(dims);
std::map<size_t /*offset*/, std::vector<Item>> hash;
framework::LoD new_lod;
auto *ids_data = selected_ids->mutable_data<int>(platform::CPUPlace());
auto *scores_data =
selected_scores->mutable_data<float>(platform::CPUPlace());
// fill in data
std::vector<size_t> low_level;
size_t low_offset = 0;
for (auto &items : selected_items) {
low_level.push_back(low_offset);
for (auto &item : items) {
ids_data[low_offset] = item.id;
scores_data[low_offset] = item.score;
low_offset++;
}
}
// fill lod
auto abs_lod = framework::ToAbsOffset(ids_->lod());
auto &high_level = abs_lod[lod_level_];
framework::LoD lod(2);
lod[0].assign(high_level.begin(), high_level.end());
lod[1].assign(low_level.begin(), low_level.end());
selected_ids->set_lod(lod);
selected_scores->set_lod(lod);
}
void BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids,
std::vector<std::vector<Item>> *items) {
auto *pre_ids_data = pre_ids.data<int>();
for (size_t offset = 0; offset < items->size(); offset++) {
auto prefix_id = pre_ids_data[offset];
if (prefix_id == end_id_) {
items->at(offset).clear();
}
}
}
std::vector<std::vector<BeamSearch::Item>> BeamSearch::ToMap(
const std::vector<std::vector<Item>> &items) {
std::vector<std::vector<Item>> result;
for (auto &entries : items) {
for (const auto &item : entries) {
if (item.offset >= result.size()) {
result.resize(item.offset + 1);
}
result[item.offset].push_back(item);
}
}
return result;
}
std::vector<std::vector<BeamSearch::Item>>
BeamSearch::SelectTopBeamSizeItems() {
std::vector<std::vector<Item>> result;
std::vector<Item> items;
// for each source sentence, select the top beam_size items across all
// candidate sets.
while (NextItemSet(&items)) {
std::nth_element(std::begin(items), std::begin(items) + beam_size_,
std::end(items), [](const Item &a, const Item &b) {
// TODO(superjom) make score's comparation customizable.
// partial sort in descending order
return a.score > b.score;
});
// prune the top beam_size items.
if (items.size() > beam_size_) {
items.resize(beam_size_);
}
result.emplace_back(items);
}
return result;
}
// the candidates of a source
bool BeamSearch::NextItemSet(std::vector<BeamSearch::Item> *items) {
if (sent_offset_ >= ids_->NumElements(lod_level_)) {
return false;
}
// find the current candidates
auto ids = *ids_;
auto scores = *scores_;
auto source_abs_two_level_lod = framework::SliceInLevel(
ids.lod(), lod_level_, sent_offset_, sent_offset_ + 1);
source_abs_two_level_lod = framework::ToAbsOffset(source_abs_two_level_lod);
auto abs_lod = framework::ToAbsOffset(ids.lod());
PADDLE_ENFORCE_GE(source_abs_two_level_lod.size(), 2UL);
auto *ids_data = ids.data<int>();
auto *scores_data = scores.data<float>();
size_t instance_dim = 1;
for (int i = 1; i < ids.dims().size(); i++) {
instance_dim *= ids.dims()[i];
}
items->clear();
items->reserve(framework::product(ids.dims()));
for (size_t offset = abs_lod[lod_level_][sent_offset_];
offset < abs_lod[lod_level_][sent_offset_ + 1]; offset++) {
for (int d = 0; d < instance_dim; d++) {
const size_t dim_offset = offset * instance_dim + d;
items->emplace_back(offset, ids_data[dim_offset],
scores_data[dim_offset]);
}
}
sent_offset_++;
return true;
}
class BeamSearchProtoAndCheckerMaker
: public framework::OpProtoAndCheckerMaker {
public:
BeamSearchProtoAndCheckerMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
// inputs and outputs stored in proto
AddInput("pre_ids", "ids in previous step");
AddInput("ids", "a LoDTensor of shape of [None,k]");
AddInput("scores",
"a LoDTensor that has the same shape and LoD with `ids`");
AddOutput("selected_ids",
"a LoDTensor that stores the IDs selected by beam search");
AddOutput(
"selected_scores",
"a LoDTensor that has the same shape and LoD with `selected_ids`");
// Attributes stored in AttributeMap
AddAttr<int>("level", "the level of LoDTensor");
AddAttr<int>("beam_size", "beam size for beam search");
AddAttr<int>("end_id",
"the token id which indicates the end of a sequence");
AddComment(
"This is a beam search operator that help to generate sequences.");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(beam_search, paddle::operators::BeamSearchOp,
paddle::operators::BeamSearchProtoAndCheckerMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_TESTING
#include "gtest/gtest.h"
#endif
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
/*
* This is an implementation of beam search.
*
* To explain the details, lets take machine translation task for example, in
* this task, one source sentence is translated to multiple target sentences,
* during this period, one sentence will be translated to multiple translation
* prefixes(target sentence that have not ended), in each time step a prefix
* will have some candidates, input the candidate ids and their corresponding
* scores (probabilities), it will sort and select the top beam_size candidates
* for each source sentence, and store the selected candidates's score and their
* corresponding ids to LoDTensors.
*
* A detailed example:
*
* Input
*
* ids:
* LoD (should have 2 levels)
* first level: [0, 1, 4]
* second level: [0, 1, 2, 3, 4]
*
* tensor's data
* [
* [4, 2, 5]
* [2, 1, 3]
* [3, 5, 2]
* [8, 2, 1]
* ]
*
* scores:
* LoD same as `ids`
* tensor's data
* [
* [0.5, 0.3, 0.2]
* [0.6, 0.3, 0.1]
* [0.9, 0.5, 0.1]
* [0.7, 0.5, 0.1]
* ]
*
* the inputs means that there are 2 source sentences to translate, and the
* first source has 1 prefix, the second source has 2 prefix.
*
* lets assume beam size is 2, and the beam search's output should be
* LoD
* first level:
* [0, 1, 2]
* second level:
* [0, 2, 4]
*
* tensor's data
* [[
* 0.5,
* 0.3,
* 0.9,
* 0.7
* ]]
*
* TODO all the prune operations should be in the beam search, so it is better
* to split the beam search algorithm into a sequence of smaller operators, and
* the prune operators can be inserted in this sequence.
*/
class BeamSearch {
public:
// TODO(superjom) make type customizable
using id_t = size_t;
using score_t = float;
/*
* Input the arguments that needed by this class.
*/
BeamSearch(const framework::LoDTensor& ids,
const framework::LoDTensor& scores, size_t level, size_t beam_size,
int end_id)
: beam_size_(beam_size),
ids_(&ids),
scores_(&scores),
lod_level_(level),
end_id_(end_id) {}
/*
* The main function of beam search.
*
* @selected_ids: a [None, 1]-shaped tensor with LoD.
* In a machine translation model, it might be the candidate term id sets,
* each set stored as a varience-length sequence.
* The format might be described with a two-level LoD
* - [[0 1]
* - [0 1 2]]
* - [[]
* - [0 1]]
* the first level of LoD tells that there are two source sentences. The
* second level describes the details of the candidate id set's offsets in
* the
* source sentences.
*
* @selected_scores: a LoD tensor with the same shape and LoD with
* selected_ids.
* It stores the corresponding scores of candidate ids in selected_ids.
*
* Return false if all the input tensor is empty, in machine translation task
* that means no candidates is provided, and the task will stop running.
*/
void operator()(const framework::LoDTensor& pre_ids,
framework::LoDTensor* selected_ids,
framework::LoDTensor* selected_scores);
protected:
/*
* The basic items help to sort.
*/
struct Item {
Item() {}
Item(size_t offset, size_t id, float score)
: offset(offset), id(id), score(score) {}
// offset in the lod_level_+1
size_t offset;
// the candidate id
id_t id;
// the corresponding score
score_t score;
};
void PruneEndidCandidates(const framework::LoDTensor& pre_ids,
std::vector<std::vector<Item>>* items);
/*
* Transform the items into a map whose key is offset, value is the items.
* NOTE low performance
*/
std::vector<std::vector<Item>> ToMap(
const std::vector<std::vector<Item>>& inputs);
/*
* For each source, select top beam_size records.
*/
std::vector<std::vector<Item>> SelectTopBeamSizeItems();
/*
* Get the items of next source sequence, return false if no remaining items.
*/
bool NextItemSet(std::vector<Item>* items);
private:
size_t beam_size_;
const framework::LoDTensor* ids_;
const framework::LoDTensor* scores_;
size_t lod_level_{0};
size_t sent_offset_{0};
int end_id_{0};
};
class BeamSearchOp : public framework::OperatorBase {
public:
BeamSearchOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
BeamSearchOp(const BeamSearchOp& o)
: framework::OperatorBase(
static_cast<const framework::OperatorBase&>(o)) {
PADDLE_THROW("Not Implemented");
}
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
LOG(INFO) << "run beam search op";
auto ids_var = scope.FindVar(Input("ids"));
auto scores_var = scope.FindVar(Input("scores"));
auto pre_ids_var = scope.FindVar(Input("pre_ids"));
PADDLE_ENFORCE_NOT_NULL(ids_var);
PADDLE_ENFORCE_NOT_NULL(scores_var);
PADDLE_ENFORCE_NOT_NULL(pre_ids_var);
auto& ids = ids_var->Get<framework::LoDTensor>();
auto& scores = scores_var->Get<framework::LoDTensor>();
auto& pre_ids = pre_ids_var->Get<framework::LoDTensor>();
size_t level = Attr<int>("level");
size_t beam_size = Attr<int>("beam_size");
int end_id = Attr<int>("end_id");
LOG(INFO) << "init beam search";
BeamSearch alg(ids, scores, level, beam_size, end_id);
LOG(INFO) << "after beam search";
auto selected_ids_var = scope.FindVar(Output("selected_ids"));
auto selected_scores_var = scope.FindVar(Output("selected_scores"));
PADDLE_ENFORCE_NOT_NULL(selected_ids_var);
PADDLE_ENFORCE_NOT_NULL(selected_scores_var);
auto& selected_ids_tensor =
*selected_ids_var->GetMutable<framework::LoDTensor>();
auto& selected_scores_tensor =
*selected_scores_var->GetMutable<framework::LoDTensor>();
LOG(INFO) << "run beam search";
alg(pre_ids, &selected_ids_tensor, &selected_scores_tensor);
LOG(INFO) << "finish beam search";
}
};
} // namespace operators
} // namespace paddle
......@@ -19,7 +19,13 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL(
elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, int64_t>);
......@@ -19,7 +19,13 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
elementwise_div,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL(
elementwise_div_grad,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, int64_t>);
......@@ -20,8 +20,12 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, double>);
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL(
elementwise_mul_grad,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, double>);
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, int64_t>);
......@@ -19,7 +19,13 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
elementwise_sub,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL(
elementwise_sub_grad,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, float>);
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, int64_t>);
#!/bin/bash
set -e
echo "Post install paddle debian package."
echo "Install some python package used for paddle. You can run "
echo " pip install /usr/opt/paddle/share/wheels/*.whl to install them."
find /usr/ -name '*paddle*.whl' | xargs pip install
......@@ -2,178 +2,198 @@
## Goals
We want the building procedure generates Docker images so that we can run PaddlePaddle applications on Kubernetes clusters.
We want to make the building procedures:
We want to build .deb packages so that enterprise users can run PaddlePaddle applications without Docker.
1. Static, can reproduce easily.
1. Generate python `whl` packages that can be widely use cross many distributions.
1. Build different binaries per release to satisfy different environments:
- Binaries for different CUDA and CUDNN versions, like CUDA 7.5, 8.0, 9.0
- Binaries containing only capi
- Binaries for python with wide unicode support or not.
1. Build docker images with PaddlePaddle pre-installed, so that we can run
PaddlePaddle applications directly in docker or on Kubernetes clusters.
We want to minimize the size of generated Docker images and .deb packages so to reduce the download time.
To achieve this, we created a repo: https://github.com/PaddlePaddle/buildtools
which gives several docker images that are `manylinux1` sufficient. Then we
can build PaddlePaddle using these images to generate corresponding `whl`
binaries.
We want to encapsulate building tools and dependencies in a *development* Docker image so to ease the tools installation for developers.
## Run The Build
Developers use various editors (emacs, vim, Eclipse, Jupyter Notebook), so the development Docker image contains only building tools, not editing tools, and developers are supposed to git clone source code into their development computers and map the code into the development container.
### Build Evironments
We want the procedure and tools also work with testing, continuous integration, and releasing.
The pre-built build environment images are:
| Image | Tag |
| ----- | --- |
| paddlepaddle/paddle_manylinux_devel | cuda7.5_cudnn5 |
| paddlepaddle/paddle_manylinux_devel | cuda8.0_cudnn5 |
| paddlepaddle/paddle_manylinux_devel | cuda7.5_cudnn7 |
| paddlepaddle/paddle_manylinux_devel | cuda9.0_cudnn7 |
## Docker Images
So we need two Docker images for each version of PaddlePaddle:
1. `paddle:<version>-dev`
This a development image contains only the development tools and standardizes the building procedure. Users include:
### Start Build
- developers -- no longer need to install development tools on the host, and can build their current work on the host (development computer).
- release engineers -- use this to build the official release from certain branch/tag on Github.com.
- document writers / Website developers -- Our documents are in the source repo in the form of .md/.rst files and comments in source code. We need tools to extract the information, typeset, and generate Web pages.
Choose one docker image that suit your environment and run the following
command to start a build:
Of course, developers can install building tools on their development computers. But different versions of PaddlePaddle might require different set or version of building tools. Also, it makes collaborative debugging easier if all developers use a unified development environment.
The development image should include the following tools:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
```bash
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TESTING=OFF" -e "RUN_TEST=OFF" -e "PYTHON_ABI=cp27-cp27mu" paddlepaddle/paddle_manylinux_devel /paddle/paddle/scripts/docker/build.sh
```
Many developers work on a remote computer with GPU; they could ssh into the computer and `docker exec` into the development container. However, running `sshd` in the container allows developers to ssh into the container directly.
After the build finishes, you can get output `whl` package under
`build/python/dist`.
1. `paddle:<version>`
This command mounts the source directory on the host into `/paddle` in the container, then run the build script `/paddle/paddle/scripts/docker/build.sh`
in the container. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed.
This is the production image, generated using the development image. This image might have multiple variants:
### Build Options
- GPU/AVX `paddle:<version>-gpu`
- GPU/no-AVX `paddle:<version>-gpu-noavx`
- no-GPU/AVX `paddle:<version>`
- no-GPU/no-AVX `paddle:<version>-noavx`
Users can specify the following Docker build arguments with either "ON" or "OFF" value:
We allow users to choose between GPU and no-GPU because the GPU version image is much larger than then the no-GPU version.
| Option | Default | Description |
| ------ | -------- | ----------- |
| `WITH_GPU` | OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. |
| `WITH_AVX` | OFF | Set to "ON" to enable AVX support. |
| `WITH_TESTING` | ON | Build unit tests binaries. |
| `WITH_MKLDNN` | ON | Build with [Intel® MKL DNN](https://github.com/01org/mkl-dnn) support. |
| `WITH_MKLML` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) support. |
| `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. |
| `WITH_SWIG_PY` | ON | Build with SWIG python API support. |
| `WITH_C_API` | OFF | Build capi libraries for inference. |
| `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. |
| `WITH_STYLE_CHECK` | ON | Check the code style when building. |
| `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu |
| `RUN_TEST` | OFF | Run unit test immediently after the build. |
| `WITH_DOC` | OFF | Build docs after build binaries. |
| `WOBOQ` | OFF | Generate WOBOQ code viewer under `build/woboq_out` |
We allow users the choice between AVX and no-AVX, because some cloud providers don't provide AVX-enabled VMs.
## Docker Images
## Development Environment
You can get the latest PaddlePaddle docker images by
`docker pull paddlepaddle/paddle:<version>` or build one by yourself.
Here we describe how to use above two images. We start from considering our daily development environment.
### Official Docker Releases
Developers work on a computer, which is usually a laptop or desktop:
Official docker images at
[here](https://hub.docker.com/r/paddlepaddle/paddle/tags/),
you can choose either latest or images with a release tag like `0.10.0`,
Currently available tags are:
<img src="doc/paddle-development-environment.png" width=500 />
| Tag | Description |
| ------ | --------------------- |
| latest | latest CPU only image |
| latest-gpu | latest binary with GPU support |
| 0.10.0 | release 0.10.0 CPU only binary image |
| 0.10.0-gpu | release 0.10.0 with GPU support |
or, they might rely on a more sophisticated box (like with GPUs):
### Build Your Own Image
<img src="doc/paddle-development-environment-gpu.png" width=500 />
Build PaddlePaddle docker images are quite simple since PaddlePaddle can
be installed by just running `pip install`. A sample `Dockerfile` is:
A principle here is that source code lies on the development computer (host) so that editors like Eclipse can parse the source code to support auto-completion.
```dockerfile
FROM nvidia/cuda:7.5-cudnn5-runtime-centos6
RUN yum install -y centos-release-SCL
RUN yum install -y python27
# This whl package is generated by previous build steps.
ADD python/dist/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl /
RUN pip install /paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl && rm -f /*.whl
```
Then build the image by running `docker build -t [REPO]/paddle:[TAG] .` under
the directory containing your own `Dockerfile`.
## Usages
- NOTE: note that you can choose different base images for your environment, you can find all the versions [here](https://hub.docker.com/r/nvidia/cuda/).
### Build the Development Docker Image
### Use Docker Images
The following commands check out the source code to the host and build the development image `paddle:dev`:
Suppose that you have written an application program `train.py` using
PaddlePaddle, we can test and run it using docker:
```bash
git clone https://github.com/PaddlePaddle/Paddle paddle
cd paddle
docker build -t paddle:dev .
docker run --rm -it -v $PWD:/work paddlepaddle/paddle /work/a.py
```
The `docker build` command assumes that `Dockerfile` is in the root source tree. Note that in this design, this `Dockerfile` is this only one in our repo.
Users can specify a Ubuntu mirror server for faster downloading:
```bash
docker build -t paddle:dev --build-arg UBUNTU_MIRROR=mirror://mirrors.ubuntu.com/mirrors.txt .
```
But this works only if all dependencies of `train.py` are in the production image. If this is not the case, we need to build a new Docker image from the production image and with more dependencies installs.
### Build PaddlePaddle from Source Code
### Run PaddlePaddle Book In Docker
Given the development image `paddle:dev`, the following command builds PaddlePaddle from the source tree on the development computer (host):
Our [book repo](https://github.com/paddlepaddle/book) also provide a docker
image to start a jupiter notebook inside docker so that you can run this book
using docker:
```bash
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TESTING=OFF" -e "RUN_TEST=OFF" paddle:dev
docker run -d -p 8888:8888 paddlepaddle/book
```
This command mounts the source directory on the host into `/paddle` in the container, so the default entry point of `paddle:dev`, `build.sh`, could build the source code with possible local changes. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed.
`build.sh` builds the following:
- PaddlePaddle binaries,
- `$PWD/build/paddle-<version>.deb` for production installation, and
- `$PWD/build/Dockerfile`, which builds the production Docker image.
Please refer to https://github.com/paddlepaddle/book if you want to build this
docker image by your self.
Users can specify the following Docker build arguments with either "ON" or "OFF" value:
- `WITH_GPU`: ***Required***. Generates NVIDIA CUDA GPU code and relies on CUDA libraries.
- `WITH_AVX`: ***Required***. Set to "OFF" prevents from generating AVX instructions. If you don't know what is AVX, you might want to set "ON".
- `WITH_TEST`: ***Optional, default OFF***. Build unit tests binaries. Once you've built the unit tests, you can run these test manually by the following command:
```bash
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" paddle:dev sh -c "cd /paddle/build; make coverall"
```
- `RUN_TEST`: ***Optional, default OFF***. Run unit tests after building. You can't run unit tests without building it.
### Run Distributed Applications
### Build the Production Docker Image
In our [API design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md#distributed-training), we proposed an API that starts a distributed training job on a cluster. This API need to build a PaddlePaddle application into a Docker image as above and calls kubectl to run it on the cluster. This API might need to generate a Dockerfile look like above and call `docker build`.
The following command builds the production image:
Of course, we can manually build an application image and launch the job using the kubectl tool:
```bash
docker build -t paddle -f build/Dockerfile ./build
docker build -f some/Dockerfile -t myapp .
docker tag myapp me/myapp
docker push
kubectl ...
```
This production image is minimal -- it includes binary `paddle`, the shared library `libpaddle.so`, and Python runtime.
## Docker Images for Developers
### Run PaddlePaddle Applications
We have a special docker image for developers:
`paddlepaddle/paddle:<version>-dev`. This image is also generated from
https://github.com/PaddlePaddle/buildtools
Again the development happens on the host. Suppose that we have a simple application program in `a.py`, we can test and run it using the production image:
This a development image contains only the
development tools and standardizes the building procedure. Users include:
```bash
docker run --rm -it -v $PWD:/work paddle /work/a.py
```
- developers -- no longer need to install development tools on the host, and can build their current work on the host (development computer).
- release engineers -- use this to build the official release from certain branch/tag on Github.com.
- document writers / Website developers -- Our documents are in the source repo in the form of .md/.rst files and comments in source code. We need tools to extract the information, typeset, and generate Web pages.
But this works only if all dependencies of `a.py` are in the production image. If this is not the case, we need to build a new Docker image from the production image and with more dependencies installs.
Of course, developers can install building tools on their development computers. But different versions of PaddlePaddle might require different set or version of building tools. Also, it makes collaborative debugging easier if all developers use a unified development environment.
### Build and Run PaddlePaddle Applications
The development image contains the following tools:
We need a Dockerfile in https://github.com/paddlepaddle/book that builds Docker image `paddlepaddle/book:<version>`, basing on the PaddlePaddle production image:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
```
FROM paddlepaddle/paddle:<version>
RUN pip install -U matplotlib jupyter ...
COPY . /book
EXPOSE 8080
CMD ["jupyter"]
```
Many developers work on a remote computer with GPU; they could ssh into the computer and `docker exec` into the development container. However, running `sshd` in the container allows developers to ssh into the container directly.
The book image is an example of PaddlePaddle application image. We can build it
```bash
git clone https://github.com/paddlepaddle/book
cd book
docker build -t book .
```
### Development Workflow
### Build and Run Distributed Applications
Here we describe how the workflow goes on. We start from considering our daily development environment.
In our [API design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md#distributed-training), we proposed an API that starts a distributed training job on a cluster. This API need to build a PaddlePaddle application into a Docker image as above and calls kubectl to run it on the cluster. This API might need to generate a Dockerfile look like above and call `docker build`.
Developers work on a computer, which is usually a laptop or desktop:
Of course, we can manually build an application image and launch the job using the kubectl tool:
<img src="doc/paddle-development-environment.png" width=500 />
```bash
docker build -f some/Dockerfile -t myapp .
docker tag myapp me/myapp
docker push
kubectl ...
```
or, they might rely on a more sophisticated box (like with GPUs):
<img src="doc/paddle-development-environment-gpu.png" width=500 />
A principle here is that source code lies on the development computer (host) so that editors like Eclipse can parse the source code to support auto-completion.
### Reading source code with woboq codebrowser
For developers who are interested in the C++ source code, please use -e "WOBOQ=ON" to enable the building of C++ source code into HTML pages using [Woboq codebrowser](https://github.com/woboq/woboq_codebrowser).
- The following command builds PaddlePaddle, generates HTML pages from C++ source code, and writes HTML pages into `$HOME/woboq_out` on the host:
```bash
docker run -v $PWD:/paddle -v $HOME/woboq_out:/woboq_out -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" -e "WOBOQ=ON" paddle:dev
docker run -v $PWD:/paddle -v $HOME/woboq_out:/woboq_out -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" -e "WOBOQ=ON" paddlepaddle/paddle:latest-dev
```
- You can open the generated HTML files in your Web browser. Or, if you want to run a Nginx container to serve them for a wider audience, you can run:
......
#!/bin/bash
set -xe
function cmake_gen() {
# Set BASE_IMAGE according to env variables
if [[ ${WITH_GPU} == "ON" ]]; then
BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
else
BASE_IMAGE="ubuntu:16.04"
fi
DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi
mkdir -p /paddle/build
cd /paddle/build
......@@ -26,10 +9,29 @@ function cmake_gen() {
# delete previous built whl packages
rm -rf /paddle/paddle/dist 2>/dev/null || true
# Support build for all python versions, currently
# including cp27-cp27m and cp27-cp27mu.
PYTHON_FLAGS=""
if [ "$1" != "" ]; then
echo "using python abi: $1"
if [ "$1" == "cp27-cp27m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
elif [ "$1" == "cp27-cp27mu" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
fi
fi
cat <<EOF
========================================
Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=Release
${PYTHON_FLAGS}
-DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_MKLDNN=${WITH_MKLDNN:-ON}
......@@ -46,12 +48,12 @@ function cmake_gen() {
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
# docker environment is fully controlled by this script.
# See /Paddle/CMakeLists.txt, UNITTEST_USE_VIRTUALENV option.
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
${PYTHON_FLAGS} \
-DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_MKLDNN=${WITH_MKLDNN:-ON} \
......@@ -134,6 +136,19 @@ EOF
function gen_dockerfile() {
# Set BASE_IMAGE according to env variables
if [[ ${WITH_GPU} == "ON" ]]; then
BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
else
BASE_IMAGE="ubuntu:16.04"
fi
DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi
cat <<EOF
========================================
......@@ -168,13 +183,14 @@ EOF
${DOCKERFILE_GPU_ENV}
ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/
ADD paddle/pybind/print_operators_doc /usr/bin/
# default command shows the paddle version and exit
CMD ["paddle", "version"]
EOF
}
cmake_gen
set -xe
cmake_gen ${PYTHON_ABI:-""}
run_build
run_test
gen_docs
......
......@@ -44,7 +44,7 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then
-DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DUSE_EIGEN_FOR_BLAS=ON \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
......@@ -58,7 +58,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then
-DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DUSE_EIGEN_FOR_BLAS=OFF \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
......@@ -72,7 +72,7 @@ elif [ $ANDROID_ABI == "armeabi" ]; then
-DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
-DWITH_STYLE_CHECK=OFF \
......
......@@ -27,6 +27,9 @@ using namespace paddle; // NOLINT
using namespace std; // NOLINT
int main(int argc, char** argv) {
initMain(argc, argv);
initPython(argc, argv);
if (FLAGS_model_dir.empty() || FLAGS_config_file.empty() ||
FLAGS_model_file.empty()) {
LOG(INFO) << "Usage: ./paddle_merge_model --model_dir=pass-00000 "
......@@ -34,9 +37,6 @@ int main(int argc, char** argv) {
return 0;
}
initMain(argc, argv);
initPython(argc, argv);
string confFile = FLAGS_config_file;
#ifndef PADDLE_WITH_CUDA
FLAGS_use_gpu = false;
......
import logging
from paddle.v2.framework.op import Operator, DynamicRecurrentOp
import paddle.v2.framework.core as core
import unittest
import numpy as np
def create_tensor(scope, name, np_data):
tensor = scope.var(name).get_tensor()
tensor.set(np_data, core.CPUPlace())
return tensor
class BeamSearchOpTester(unittest.TestCase):
def setUp(self):
self.scope = core.Scope()
self.ctx = core.DeviceContext.create(core.CPUPlace())
self._create_ids()
self._create_scores()
self._create_pre_ids()
self.scope.var('selected_ids')
self.scope.var('selected_scores')
def test_run(self):
op = Operator(
'beam_search',
pre_ids="pre_ids",
ids='ids',
scores='scores',
selected_ids='selected_ids',
selected_scores='selected_scores',
level=0,
beam_size=2,
end_id=0, )
op.run(self.scope, self.ctx)
selected_ids = self.scope.find_var("selected_ids").get_tensor()
print 'selected_ids', np.array(selected_ids)
print 'lod', selected_ids.lod()
def _create_pre_ids(self):
np_data = np.array([[1, 2, 3, 4]], dtype='int32')
tensor = create_tensor(self.scope, "pre_ids", np_data)
def _create_ids(self):
self.lod = [[0, 1, 4], [0, 1, 2, 3, 4]]
np_data = np.array(
[[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int32')
tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod)
def _create_scores(self):
np_data = np.array(
[
[0.5, 0.3, 0.2],
[0.6, 0.3, 0.1],
[0.9, 0.5, 0.1],
[0.7, 0.5, 0.1],
],
dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册