提交 368dbc28 编写于 作者: N nhzlx

merge develop

......@@ -54,7 +54,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51"
GIT_TAG "64e03a1939e0d526aa8e9f2e3f7dc0ad8d372944"
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
......
......@@ -50,6 +50,33 @@ pop-up box, choose the current release branch and click "Run Build" button. You
* pypi does not allow overwrite the already uploaded version of wheel package, even if you delete the
old version. you must change the version number before upload a new one.
### Publish wheel Packages for MacOS
You need to build the binary wheel package for MacOS before publishing, to
make sure that the package can be used by many versions of MacOS
(10.11, 10.12, 10.13) and different python installs (python.org, homebrew, etc.),
you must build the package ***exactly*** following below steps:
Build steps:
1. install python from python.org downloads, and make sure it's currently in use
in your system.
1. `export MACOSX_DEPLOYMENT_TARGET=10.11`, use `10.11` is enough for recent versions.
1. `git clone https://github.com/PaddlePaddle/Paddle.git && cd Paddle && mkdir build && cd build`
1. `cmake -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_SYSTEM_BLAS=OFF ..`, make sure the output of `cmake` command is using the correct python interpreter installed from python.org
1. `make -j`
1. `pip install delocate`
1. `mkdir fixed_wheel && delocate-wheel -w fixed_wheel python/dist/*.whl`
Then the whl under `fixed_wheel` is ready to upload.
Install steps:
1. run `pip install paddlepaddle...whl`
1. find the `libpython.dylib` that are currently in use:
- for python.org package installs, do nothing.
- for other python installs, find the path of `libpython*.dylib` and `export LD_LIBRARY_PATH=you path && DYLD_LIBRARY_PATH=your path`
## Publish Docker Images
Our CI tool will push latest images to DockerHub, so we only need to push a version tag like:
......
......@@ -9,8 +9,6 @@ Paddle 预测 API
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口
- 库文件\ ``libpaddle_fluid.so`` 或 ``libpaddle_fluid.a``
- 库文件 ``libpaddle_inference_api.so`` 或
``libpaddle_inference_api.a``
编译和依赖可以参考 :ref:`install_or_build_cpp_inference_lib` 。
......@@ -97,8 +95,7 @@ engine
CHECK(predictor->Run(slots, &outputs));
// 获取 outputs ...
编译时,联编 ``libpaddle_fluid.a/.so`` 和
``libpaddle_inference_api.a/.so`` 便可。
编译时,联编 ``libpaddle_fluid.a/.so`` 即可。
详细代码参考
------------
......
......@@ -162,6 +162,7 @@ paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None))
paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
......
......@@ -31,7 +31,8 @@ size_t Tensor::memory_size() const {
return holder_ == nullptr ? 0UL : holder_->size() - offset_;
}
void* Tensor::mutable_data(platform::Place place, std::type_index type) {
void* Tensor::mutable_data(platform::Place place, std::type_index type,
size_t requested_size) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
......@@ -39,7 +40,7 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) {
"When calling this method, the Tensor's numel must be "
"equal or larger than zero. "
"Please check Tensor::Resize has been called first.");
int64_t size = numel() * SizeOfType(type);
size_t size = requested_size ? requested_size : numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) {
......@@ -68,10 +69,10 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) {
offset_);
}
void* Tensor::mutable_data(platform::Place place) {
void* Tensor::mutable_data(platform::Place place, size_t requested_size) {
PADDLE_ENFORCE(this->holder_ != nullptr,
"Cannot invoke mutable data if current hold nothing.");
return mutable_data(place, holder_->type());
return mutable_data(place, holder_->type(), requested_size);
}
Tensor& Tensor::ShareDataWith(const Tensor& src) {
......
......@@ -89,22 +89,24 @@ class Tensor {
* @note If not exist, then allocation.
*/
template <typename T>
T* mutable_data(platform::Place place);
T* mutable_data(platform::Place place, size_t requested_size = 0);
void* mutable_data(platform::Place place, std::type_index type);
void* mutable_data(platform::Place place, std::type_index type,
size_t requested_size = 0);
void* mutable_data(platform::Place place);
void* mutable_data(platform::Place place, size_t requested_size = 0);
/**
* @brief Return a pointer to mutable memory block.
*
* @param[in] dims The dimensions of the memory block.
* @param[in] place The place of the memory block.
* @param[in] requested_size The size of the block in bytes.
*
* @note If not exist, then allocation.
*/
template <typename T>
T* mutable_data(DDim dims, platform::Place place);
T* mutable_data(DDim dims, platform::Place place, size_t requested_size = 0);
/*! Return the dimensions of the memory block. */
const DDim& dims() const;
......
......@@ -46,16 +46,17 @@ inline T* Tensor::data() {
}
template <typename T>
inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
inline T* Tensor::mutable_data(DDim dims, platform::Place place,
size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims);
return mutable_data<T>(place);
return mutable_data<T>(place, requested_size);
}
template <typename T>
inline T* Tensor::mutable_data(platform::Place place) {
inline T* Tensor::mutable_data(platform::Place place, size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T*>(mutable_data(place, typeid(T)));
return reinterpret_cast<T*>(mutable_data(place, typeid(T), requested_size));
}
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
......
......@@ -53,6 +53,18 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler {
key_ += "-BWD";
}
size_t GetDstMemorySize() const {
return conv_pd_->dst_primitive_desc().get_size();
}
size_t GetDiffWeightsMemorySize() const {
return conv_bwd_weights_pd_->diff_weights_primitive_desc().get_size();
}
size_t GetDiffSourceMemorySize() const {
return conv_bwd_data_pd_->diff_src_primitive_desc().get_size();
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemoryFromWeightsPrimitive(
const std::shared_ptr<mkldnn::memory> user_memory_p,
std::vector<mkldnn::primitive>& pipeline) { // NOLINT
......@@ -294,7 +306,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> weights_tz =
......@@ -354,6 +365,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto user_weights_memory_p = handler.AcquireWeightsMemory(
user_weights_md, to_void_cast<T>(filter_data));
T* output_data =
output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
// create reorder primitive if the input format is not the preferred one
auto src_memory_p =
handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline);
......@@ -476,13 +489,6 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
T* input_grad_data = nullptr;
T* filter_grad_data = nullptr;
if (input_grad) {
input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
}
if (filter_grad) {
filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace());
}
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> weights_tz =
paddle::framework::vectorize2int(filter->dims());
......@@ -568,6 +574,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
handler.AcquireDiffDstMemoryFromWeightsPrimitive(
user_diff_dst_memory_p, pipeline);
const size_t size = handler.GetDiffWeightsMemorySize();
filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace(), size);
auto diff_weights_memory_p =
handler.AcquireDiffWeightsMemoryFromWeightsPrimitive(
reinterpret_cast<void*>(filter_grad_data));
......@@ -590,6 +599,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
handler.AcquireDiffDstMemoryFromDataPrimitive(user_diff_dst_memory_p,
pipeline);
const size_t size = handler.GetDiffSourceMemorySize();
input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace(), size);
auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive(
reinterpret_cast<void*>(input_grad_data));
......
......@@ -151,6 +151,7 @@ bool VariableResponse::CopySelectRowsData(
::google::protobuf::io::CodedInputStream* input,
const platform::DeviceContext& ctx, int length) {
auto* slr = GetVar()->GetMutable<framework::SelectedRows>();
slr->mutable_rows()->clear();
slr->mutable_rows()->resize(length /
framework::SizeOfType(typeid(int64_t))); // int64
int64_t* rows_data = slr->mutable_rows()->data();
......
......@@ -15,7 +15,6 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace operators {
......@@ -41,19 +40,33 @@ class FillConstantOp : public framework::OperatorBase {
static_cast<framework::proto::VarType::Type>(Attr<int>("dtype"));
auto value = Attr<float>("value");
auto force_cpu = Attr<bool>("force_cpu");
auto &out =
*scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>();
out.Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
framework::Tensor *tensor = nullptr;
auto &out_var = *scope.FindVar(Output("Out"));
if (out_var.IsType<framework::LoDTensor>()) {
tensor = out_var.GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
} else if (out_var.IsType<framework::SelectedRows>()) {
tensor = out_var.GetMutable<framework::SelectedRows>()->mutable_value();
tensor->Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
} else {
PADDLE_THROW(
"fill constant op's output only"
"supports SelectedRows and LoDTensor");
}
if (force_cpu) {
auto cpu = platform::CPUPlace();
out.mutable_data(cpu, framework::ToTypeIndex(data_type));
tensor->mutable_data(cpu, framework::ToTypeIndex(data_type));
} else {
out.mutable_data(dev_place, framework::ToTypeIndex(data_type));
tensor->mutable_data(dev_place, framework::ToTypeIndex(data_type));
}
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(dev_place);
math::set_constant(dev_ctx, &out, value);
math::set_constant(dev_ctx, tensor, value);
}
};
......
......@@ -165,12 +165,13 @@ void ListenAndServOp::RunSyncLoop(
recv_scope);
VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
rpc_service_->SetCond(distributed::kRequestGet);
rpc_service_->WaitBarrier(distributed::kRequestGet);
rpc_service_->ResetBarrierCounter();
// reset received sparse vars to avoid reuse it in the next mini-batch
dynamic_cast<distributed::RequestSendHandler *>(request_send_handler_.get())
->ResetSparseVarRecorder();
rpc_service_->SetCond(distributed::kRequestGet);
rpc_service_->WaitBarrier(distributed::kRequestGet);
rpc_service_->ResetBarrierCounter();
} // while(true)
}
......
......@@ -48,16 +48,16 @@ class ConcatFunctor<platform::CPUDeviceContext, T> {
auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
for (int k = 0; k < out_rows; ++k) {
T* dst_ptr = output->data<T>() + k * out_cols;
auto output_data = output->data<T>();
int col_idx = 0;
for (int j = 0; j < num; ++j) {
int col_len = input_cols[j];
const T* src_prt = input[j].data<T>() + k * col_len;
memory::Copy(cpu_place, dst_ptr + col_idx, cpu_place, src_prt,
sizeof(T) * col_len);
col_idx += col_len;
auto input_data = input[j].data<T>();
for (int k = 0; k < out_rows; ++k) {
memory::Copy(cpu_place, output_data + k * out_cols + col_idx, cpu_place,
input_data + k * col_len, sizeof(T) * col_len);
}
col_idx += col_len;
}
}
};
......
......@@ -53,25 +53,27 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* out) {
int out_offset = 0;
auto& eigen_place = *context.eigen_device();
int x_item_length = x.numel() / x.dims()[0];
auto out_data = out->data<T>();
auto x_data = x.data<T>();
for (size_t i = 1; i < ref_lod.size(); ++i) {
int repeat_num = ref_lod[i] - ref_lod[i - 1];
int x_start = x_lod[i - 1];
int x_end = x_lod[i];
int x_seq_len = x_end - x_start;
if (repeat_num > 0) {
auto x_sub_tensor = x.Slice(x_start, x_end);
x_sub_tensor.Resize({1, x_sub_tensor.numel()});
int out_start = out_offset;
if (out->lod().size() == 1) {
out_start = out->lod()[0][out_offset];
}
auto out_sub_tensor =
out->Slice(out_start, out_start + x_seq_len * repeat_num);
out_sub_tensor.Resize({repeat_num, x_sub_tensor.dims()[1]});
EigenMatrix<T>::From(out_sub_tensor).device(eigen_place) =
EigenMatrix<T>::From(x_sub_tensor)
.broadcast(Eigen::array<int, 2>({{repeat_num, 1}}));
for (int j = 0; j < repeat_num; j++) {
for (int k = 0; k < x_seq_len; k++) {
for (int l = 0; l < x_item_length; l++) {
out_data[(out_start + j * x_seq_len + k) * x_item_length + l] =
x_data[(x_start + k) * x_item_length + l];
}
}
}
}
out_offset += repeat_num;
}
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/sequence_mask_op.h"
REGISTER_OPERATOR(sequence_mask, paddle::operators::SequenceMaskOp,
paddle::operators::SequenceMaskOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(
sequence_mask,
paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
int>,
paddle::operators::SequenceMaskKernel<paddle::platform::CPUDeviceContext,
int64_t>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/sequence_mask_op.h"
REGISTER_OP_CUDA_KERNEL(
sequence_mask,
paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
int>,
paddle::operators::SequenceMaskKernel<paddle::platform::CUDADeviceContext,
int64_t>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifdef __NVCC__
#include <thrust/device_ptr.h>
#include <thrust/functional.h>
#include <thrust/reduce.h>
#else
#include <algorithm>
#endif
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/for_range.h"
namespace paddle {
namespace operators {
class SequenceMaskOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must exist");
PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) must exist");
auto maxlen = ctx->Attrs().Get<int>("maxlen");
if (maxlen > 0) { // We can only infershape when maxlen > 0
auto dim = framework::vectorize2int(ctx->GetInputDim("X"));
dim.push_back(maxlen);
ctx->SetOutputDim("Y", framework::make_ddim(dim));
}
}
};
class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "The input tensor of sequence_mask op.");
AddOutput("Y", "The output mask of sequence_mask op.");
AddAttr<int>("maxlen",
"The maximum length of the sequence. If maxlen < 0, maxlen "
"= max(Input(X)).")
.SetDefault(-1)
.AddCustomChecker([](int &v) {
PADDLE_ENFORCE(v < 0 || v >= 1,
"Attr(maxlen) must be less than 0 or larger than 1");
});
AddAttr<int>("out_dtype", "Output data type");
AddComment(R"DOC(
SequenceMask Operator
This operator outputs a Mask according to Input(X) and Attr(maxlen).
Supposing Input(X) is a Tensor with shape [d_1, d_2, ..., d_n], the
Output(Y) is a mask with shape [d_1, d_2, ..., d_n, maxlen], where:
Y(i_1, i_2, ..., i_n, j) = (j < X(i_1, i_2, ..., i_n))
If maxlen < 0, maxlen = max(X)
)DOC");
}
};
template <typename Tx, typename Ty>
struct SequenceMaskForRangeFunctor {
HOSTDEVICE SequenceMaskForRangeFunctor(const Tx *x, Ty *y, int maxlen)
: x_(x), y_(y), maxlen_(maxlen) {}
HOSTDEVICE void operator()(int y_idx) const {
int x_idx = y_idx / maxlen_;
int j = y_idx % maxlen_;
y_[y_idx] = static_cast<Ty>(j < x_[x_idx] ? 1 : 0);
}
private:
const Tx *x_;
Ty *y_;
int maxlen_;
};
template <typename DeviceContext, typename Tx>
struct SequenceMaskFunctor {
using Tensor = framework::LoDTensor;
SequenceMaskFunctor(const DeviceContext &ctx, const Tx *x, Tensor *y,
int limits, int maxlen)
: ctx_(ctx), x_(x), y_(y), limits_(limits), maxlen_(maxlen) {}
template <typename Ty>
void operator()() const {
auto *y_data = y_->mutable_data<Ty>(ctx_.GetPlace());
platform::ForRange<DeviceContext> for_range(ctx_, limits_);
for_range(SequenceMaskForRangeFunctor<Tx, Ty>(x_, y_data, maxlen_));
}
private:
const DeviceContext &ctx_;
const Tx *x_;
Tensor *y_;
int limits_;
int maxlen_;
};
template <typename DeviceContext, typename Tx>
class SequenceMaskKernel : public framework::OpKernel<Tx> {
using Tensor = framework::LoDTensor;
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Y");
auto maxlen = ctx.Attr<int>("maxlen");
auto *x_data = x->data<Tx>();
auto x_numel = x->numel();
if (maxlen < 0) {
#ifdef __NVCC__
VLOG(10)
<< "SequenceMaskOp on GPU may be slow when maxlen is not provided.";
maxlen = static_cast<int>(
thrust::reduce(thrust::device_pointer_cast(x_data),
thrust::device_pointer_cast(x_data) + x_numel,
static_cast<Tx>(0), thrust::maximum<Tx>()));
#else
maxlen = static_cast<int>(*std::max_element(x_data, x_data + x_numel));
#endif
auto y_dim = framework::vectorize2int(x->dims());
y_dim.push_back(maxlen);
y->Resize(framework::make_ddim(y_dim));
}
auto out_dtype = static_cast<framework::proto::VarType::Type>(
ctx.Attr<int>("out_dtype"));
auto &dev_ctx = ctx.template device_context<DeviceContext>();
framework::VisitDataType(out_dtype,
SequenceMaskFunctor<DeviceContext, Tx>(
dev_ctx, x_data, y, x_numel * maxlen, maxlen));
}
};
} // namespace operators
} // namespace paddle
......@@ -37,7 +37,7 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
} else {
PADDLE_THROW(
"uniform_random_op's output only"
"supports SelectedRows and Tensor");
"supports SelectedRows and LoDTensor");
}
T* data = tensor->mutable_data<T>(ctx.GetPlace());
unsigned int seed = static_cast<unsigned int>(ctx.Attr<int>("seed"));
......
......@@ -54,7 +54,7 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
} else {
PADDLE_THROW(
"uniform_random_op's output only"
"supports SelectedRows and Tensor");
"supports SelectedRows and LoDTensor");
}
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
......
......@@ -328,6 +328,11 @@ function assert_api_not_changed() {
source .env/bin/activate
pip install ${PADDLE_ROOT}/build/python/dist/*whl
python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid > new.spec
if [ "$1" == "cp35-cp35m" ]; then
# Use sed to make python2 and python3 sepc keeps the same
sed -i 's/arg0: str/arg0: unicode/g' new.spec
sed -i "s/\(.*Transpiler.*\).__init__ ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec
fi
python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API.spec new.spec
deactivate
......@@ -621,7 +626,7 @@ function main() {
gen_capi_package
gen_fluid_inference_lib
test_fluid_inference_lib
assert_api_not_changed
assert_api_not_changed ${PYTHON_ABI:-""}
;;
*)
print_usage
......
......@@ -19,6 +19,7 @@ import hashlib
import os
import errno
import shutil
import six
import sys
import importlib
import paddle.dataset
......@@ -94,6 +95,8 @@ def download(url, module_name, md5sum, save_name=None):
dl = 0
total_length = int(total_length)
for data in r.iter_content(chunk_size=4096):
if six.PY2:
data = six.b(data)
dl += len(data)
f.write(data)
done = int(50 * dl / total_length)
......
......@@ -35,20 +35,22 @@ import itertools
import functools
from .common import download
import tarfile
import six
import scipy.io as scio
from paddle.dataset.image import *
from paddle.reader import *
import os
import numpy as np
from multiprocessing import cpu_count
import six
from six.moves import cPickle as pickle
from six.moves import zip
__all__ = ['train', 'test', 'valid']
DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz'
LABEL_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat'
SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat'
DATA_MD5 = '33bfc11892f1e405ca193ae9a9f2a118'
DATA_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/102flowers.tgz'
LABEL_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/imagelabels.mat'
SETID_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/setid.mat'
DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
# In official 'readme', tstid is the flag of test data
......@@ -120,7 +122,10 @@ def reader_creator(data_file,
file = file.strip()
batch = None
with open(file, 'rb') as f:
if six.PY2:
batch = pickle.load(f)
else:
batch = pickle.load(f, encoding='bytes')
data = batch['data']
labels = batch['label']
for sample, label in zip(data, batch['label']):
......
......@@ -36,11 +36,6 @@ import numpy as np
try:
import cv2
except ImportError:
import sys
sys.stderr.write(
'''Warning with paddle image module: opencv-python should be imported,
or paddle image module could NOT work; please install opencv-python first.'''
)
cv2 = None
import os
import tarfile
......@@ -53,6 +48,18 @@ __all__ = [
]
def _check_cv2():
if cv2 is None:
import sys
sys.stderr.write(
'''Warning with paddle image module: opencv-python should be imported,
or paddle image module could NOT work; please install opencv-python first.'''
)
return False
else:
return True
def batch_images_from_tar(data_file,
dataset_name,
img2label,
......@@ -134,7 +141,7 @@ def load_image_bytes(bytes, is_color=True):
load and return a gray image.
:type is_color: bool
"""
assert cv2 is not None
assert _check_cv2() is True
flag = 1 if is_color else 0
file_bytes = np.asarray(bytearray(bytes), dtype=np.uint8)
......@@ -159,7 +166,7 @@ def load_image(file, is_color=True):
load and return a gray image.
:type is_color: bool
"""
assert cv2 is not None
assert _check_cv2() is True
# cv2.IMAGE_COLOR for OpenCV3
# cv2.CV_LOAD_IMAGE_COLOR for older OpenCV Version
......@@ -188,7 +195,7 @@ def resize_short(im, size):
:param size: the shorter edge size of image after resizing.
:type size: int
"""
assert cv2 is not None
assert _check_cv2() is True
h, w = im.shape[:2]
h_new, w_new = size, size
......
......@@ -103,6 +103,7 @@ __all__ = [
'rank_loss',
'prelu',
'flatten',
'sequence_mask',
'stack',
]
......@@ -5520,7 +5521,75 @@ def flatten(x, axis=1, name=None):
return out
def sequence_mask(x, maxlen=None, dtype='int64', name=None):
"""
**SequenceMask Layer**
This layer outputs a mask according to the input :code:`x` and
:code:`maxlen` with data type of :code:`dtype`.
Supposing :code:`x` is a Tensor with shape [d_1, d_2, ..., d_n], the
:code:`y` is a mask with shape [d_1, d_2, ..., d_n, maxlen], where:
.. math::
y(i_1, i_2,..., i_n, j) = (j < x(i_1, i_2,..., i_n))
Args:
x (Variable): Input tensor of sequence_mask layer,
whose elements are integers less than :code:`maxlen`.
maxlen (int|None): Maximum length of the sequence. If :code:`maxlen`
is None, it would be replace with :math:`max(x)`.
dtype (np.dtype|core.VarDesc.VarType|str): Data type of the output.
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
Returns:
Variable: The output sequence mask.
"""
helper = LayerHelper('sequence_mask', **locals())
if name is None:
out = helper.create_tmp_variable(dtype=dtype)
else:
out = helper.create_tmp_variable(dtype=dtype, name=name)
helper.append_op(
type='sequence_mask',
inputs={'X': [x]},
outputs={'Y': out},
attrs={
'max_len': maxlen if maxlen is not None else -1,
'out_dtype': out.dtype
})
return out
def stack(x, axis=0):
"""
**Stack Layer**
This layer stacks all of the input :code:`x` along axis.
Input :code:`x` can be a single variable, a :code:`list` of variables,
or a :code:`tuple` of variables. If :code:`x` is a :code:`list` or
:code:`tuple`, the shapes of all these variables must be the same.
Supposing the shape of each input is :math:`[d_0, d_1, ..., d_{n-1}]`,
the shape of the output variable would be
:math:`[d_0, d_1, ..., d_{axis}=len(x), ..., d_{n-1}]`.
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x[0])+1`.
If :code:`axis` is None, it would be replaced with 0.
Args:
x (Variable|list(Variable)|tuple(Variable)): Input variables.
axis (int|None): The axis along which all inputs are stacked.
Returns:
Variable: The stacked variable.
"""
helper = LayerHelper('stack', **locals())
axis = 0 if axis is None else axis
......
......@@ -64,6 +64,7 @@ if(WITH_DISTRIBUTE)
endif()
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 150)
py_test_modules(test_dist_transformer MODULES test_dist_transformer SERIAL)
py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext SERIAL)
py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL)
......
......@@ -37,7 +37,7 @@ def attention_lstm(
T = sum(lod[0])
N = len(lod[0])
M = x.shape[1]
D = b.shape[1] / 4
D = b.shape[1] // 4
assert T == x.shape[0]
assert len(fcws) == len(fcbs)
hidden = []
......
......@@ -120,8 +120,8 @@ def operator_equal(a, b):
raise ValueError("In operator_equal not equal:{0}\n".format(k))
elif isinstance(v, collections.OrderedDict):
v0 = sorted(six.iteritems(v), key=lambda x: x[0])
v1 = sorted(six.iteritems(b.__dict__[k]), key=lambda x: x[0])
v0 = sorted(list(six.iteritems(v)), key=lambda x: x[0])
v1 = sorted(list(six.iteritems(b.__dict__[k])), key=lambda x: x[0])
if v0 != v1:
raise ValueError("In operator_equal not equal:{0}\n".format(k))
......@@ -139,16 +139,14 @@ def block_equal(a, b):
continue
elif k == "ops":
assert (len(a.ops) == len(b.ops))
for i in range(0, len(a.ops)):
if not operator_equal(a.ops[i], b.ops[i]):
raise ValueError("In block_equal not equal:{0}\n".format(k))
assert (len(a.ops) == len(b.ops))
elif isinstance(v, collections.OrderedDict):
v0 = sorted(six.iteritems(v), key=lambda x: x[0])
v1 = sorted(six.iteritems(b.__dict__[k]), key=lambda x: x[0])
if v0 != v1:
for key, value in six.iteritems(v):
if str(value) != str(b.__dict__[k][key]):
raise ValueError("In block_equal not equal:{0}\n".format(k))
elif (v != b.__dict__[k]):
......
......@@ -21,6 +21,7 @@ import paddle.fluid as fluid
from paddle.fluid.transpiler.distribute_transpiler import delete_ops
import traceback
import collections
import six
class TranspilerTest(unittest.TestCase):
......@@ -644,17 +645,17 @@ class TestLoadSliceVar(TranspilerTest):
self.assertTrue(pserver._slice_vars_and_attrs)
self.assertTrue(pserver2._slice_vars_and_attrs)
for idx in xrange(len(pserver._slice_vars_and_attrs)):
for idx in six.moves.xrange(len(pserver._slice_vars_and_attrs)):
self.assertEqual(pserver._slice_vars_and_attrs[idx][0],
pserver2._slice_vars_and_attrs[idx][0])
total_numel = reduce(lambda x, y: x * y,
pserver._slice_vars_and_attrs[idx][0].shape)
total_numel = six.moves.reduce(
lambda x, y: x * y, pserver._slice_vars_and_attrs[idx][0].shape)
self.assertEqual(
total_numel,
reduce(lambda x, y: x * y,
pserver._slice_vars_and_attrs[idx][2].shape) + reduce(
lambda x, y: x * y,
six.moves.reduce(lambda x, y: x * y,
pserver._slice_vars_and_attrs[idx][2].shape) +
six.moves.reduce(lambda x, y: x * y,
pserver2._slice_vars_and_attrs[idx][2].shape))
......
......@@ -18,6 +18,9 @@ import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.op import Operator
class TestFillConstantOp1(OpTest):
def setUp(self):
......@@ -47,5 +50,31 @@ class TestFillConstantOp2(OpTest):
self.check_output()
class TestFillConstantOpWithSelectedRows(OpTest):
def check_with_place(self, place):
scope = core.Scope()
# create Out Variable
out = scope.var('Out').get_selected_rows()
# create and run fill_constant_op operator
fill_constant_op = Operator(
"fill_constant", shape=[123, 92], value=3.8, Out='Out')
fill_constant_op.run(scope, place)
# get result from Out
result_array = np.array(out.get_tensor())
full_array = np.full((123, 92), 3.8, 'float32')
self.assertTrue(np.array_equal(result_array, full_array))
def test_fill_constant_with_selected_rows(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
for place in places:
self.check_with_place(place)
if __name__ == "__main__":
unittest.main()
......@@ -51,30 +51,28 @@ class PReluTest(OpTest):
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X', 'Alpha'], 'Out')
def test_check_grad_ignore_x(self):
def test_check_grad_1_ignore_x(self):
self.check_grad(['Alpha'], 'Out', no_grad_set=set('X'))
def test_check_grad_ignore_alpha(self):
self.check_grad(['X'], 'Out', no_grad_set=set('Alpha'))
class TestCase1(PReluTest):
def initTestCase(self):
self.attrs = {'mode': "all"}
def test_check_grad_2(self):
self.check_grad(['X', 'Alpha'], 'Out')
def test_check_grad_3_ignore_alpha(self):
self.check_grad(['X'], 'Out', no_grad_set=set('Alpha'))
class TestCase2(PReluTest):
def initTestCase(self):
self.attrs = {'mode': "channel"}
# TODO(minqiyang): Resume these test cases after fixing Python3 CI job issues
# class TestCase1(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "all"}
class TestCase3(PReluTest):
def initTestCase(self):
self.attrs = {'mode': "element"}
# class TestCase2(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "channel"}
# class TestCase3(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "element"}
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from op_test import OpTest
import paddle.fluid as fluid
from paddle.fluid.framework import convert_np_dtype_to_dtype_
import paddle.fluid.core as core
import numpy as np
import copy
import unittest
class SequenceMaskTestBase(OpTest):
def initDefaultParameters(self):
self.op_type = 'sequence_mask'
self.maxlen = 10
self.mask_dtype = 'int64'
self.x = [[0, 3, 4], [5, 7, 9]]
def initParameters(self):
pass
def setUp(self):
self.initDefaultParameters()
self.initParameters()
if not isinstance(self.x, np.ndarray):
self.x = np.array(self.x)
self.inputs = {'X': self.x}
self.outputs = {'Y': self.calc_ground_truth_mask()}
self.attrs = {
'maxlen': self.maxlen,
'out_dtype': convert_np_dtype_to_dtype_(self.mask_dtype)
}
def calc_ground_truth_mask(self):
maxlen = np.max(self.x) if self.maxlen < 0 else self.maxlen
shape = self.x.shape + (maxlen, )
index_broadcast = np.broadcast_to(
np.reshape(
range(maxlen), newshape=[1] * self.x.ndim + [-1]),
shape=shape)
x_broadcast = np.broadcast_to(
np.reshape(
self.x, newshape=self.x.shape + (-1, )), shape=shape)
return (index_broadcast < x_broadcast).astype(self.mask_dtype)
def test_check_output(self):
self.check_output()
class SequenceMaskTest1(SequenceMaskTestBase):
def initParameters(self):
self.mask_dtype = 'bool'
class SequenceMaskTest2(SequenceMaskTestBase):
def initParameters(self):
self.mask_dtype = 'uint8'
class SequenceMaskTest3(SequenceMaskTestBase):
def initParameters(self):
self.mask_dtype = 'int32'
class SequenceMaskTest4(SequenceMaskTestBase):
def initParameters(self):
self.mask_dtype = 'float32'
class SequenceMaskTest5(SequenceMaskTestBase):
def initParameters(self):
self.mask_dtype = 'float64'
class SequenceMaskTest6(SequenceMaskTestBase):
def initParameters(self):
self.maxlen = -1
if __name__ == '__main__':
unittest.main()
......@@ -159,7 +159,7 @@ def program_to_code(prog):
get_indent_space(indent), '{', block_idx))
indent += 1
# sort all vars
all_vars = sorted(block.vars.iteritems(), key=lambda x: x[0])
all_vars = sorted(six.iteritems(block.vars), key=lambda x: x[0])
for var in all_vars:
print("{}{}".format(
get_indent_space(indent), variable_to_code(var[1])))
......
......@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
import re
......@@ -46,7 +48,7 @@ Diff: set(['test_parallel_executor_crf'])
start_parts = escape(l).split(" ")
m = re.search("Start\s+[0-9]+\:\s([a-z0-9_]+)", escape(l))
started.add(m.group(1))
print "Diff: ", started - passed
print("Diff: ", started - passed)
if __name__ == "__main__":
......
......@@ -17,6 +17,8 @@ Print all signature of a python module in alphabet order.
Usage:
./print_signature "paddle.fluid" > signature.txt
"""
from __future__ import print_function
import importlib
import inspect
import collections
......@@ -64,4 +66,4 @@ def visit_all_module(mod):
visit_all_module(importlib.import_module(sys.argv[1]))
for name in member_dict:
print name, member_dict[name]
print(name, member_dict[name])
......@@ -14,6 +14,7 @@
import argparse
import json
import six
import sys
import unittest
......@@ -124,7 +125,7 @@ class Timeline(object):
return cur_pid
def _allocate_pids(self):
for k, profile_pb in self._profile_dict.iteritems():
for k, profile_pb in six.iteritems(self._profile_dict):
for event in profile_pb.events:
if event.type == profiler_pb2.Event.CPU:
if (k, event.device_id, "CPU") not in self._devices:
......@@ -140,7 +141,7 @@ class Timeline(object):
(k, event.device_id), pid)
def _allocate_events(self):
for k, profile_pb in self._profile_dict.iteritems():
for k, profile_pb in six.iteritems(self._profile_dict):
for event in profile_pb.events:
if event.type == profiler_pb2.Event.CPU:
type = "CPU"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册