提交 7bdaf096 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into refine/jit

...@@ -54,7 +54,7 @@ ExternalProject_Add( ...@@ -54,7 +54,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS} DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51" GIT_TAG "64e03a1939e0d526aa8e9f2e3f7dc0ad8d372944"
PREFIX ${MKLDNN_SOURCES_DIR} PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
......
...@@ -50,6 +50,33 @@ pop-up box, choose the current release branch and click "Run Build" button. You ...@@ -50,6 +50,33 @@ pop-up box, choose the current release branch and click "Run Build" button. You
* pypi does not allow overwrite the already uploaded version of wheel package, even if you delete the * pypi does not allow overwrite the already uploaded version of wheel package, even if you delete the
old version. you must change the version number before upload a new one. old version. you must change the version number before upload a new one.
### Publish wheel Packages for MacOS
You need to build the binary wheel package for MacOS before publishing, to
make sure that the package can be used by many versions of MacOS
(10.11, 10.12, 10.13) and different python installs (python.org, homebrew, etc.),
you must build the package ***exactly*** following below steps:
Build steps:
1. install python from python.org downloads, and make sure it's currently in use
in your system.
1. `export MACOSX_DEPLOYMENT_TARGET=10.11`, use `10.11` is enough for recent versions.
1. `git clone https://github.com/PaddlePaddle/Paddle.git && cd Paddle && mkdir build && cd build`
1. `cmake -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_SYSTEM_BLAS=OFF ..`, make sure the output of `cmake` command is using the correct python interpreter installed from python.org
1. `make -j`
1. `pip install delocate`
1. `mkdir fixed_wheel && delocate-wheel -w fixed_wheel python/dist/*.whl`
Then the whl under `fixed_wheel` is ready to upload.
Install steps:
1. run `pip install paddlepaddle...whl`
1. find the `libpython.dylib` that are currently in use:
- for python.org package installs, do nothing.
- for other python installs, find the path of `libpython*.dylib` and `export LD_LIBRARY_PATH=you path && DYLD_LIBRARY_PATH=your path`
## Publish Docker Images ## Publish Docker Images
Our CI tool will push latest images to DockerHub, so we only need to push a version tag like: Our CI tool will push latest images to DockerHub, so we only need to push a version tag like:
......
...@@ -129,10 +129,6 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, ...@@ -129,10 +129,6 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
"Optimized for variable") "Optimized for variable")
.SetDefault({}); .SetDefault({});
AddAttr<std::vector<std::string>>(OpCreationCallstackAttrName(),
"Callstack for Op Creatation.")
.SetDefault({});
Validate(); Validate();
} }
......
...@@ -39,7 +39,6 @@ class OpProtoAndCheckerMaker { ...@@ -39,7 +39,6 @@ class OpProtoAndCheckerMaker {
public: public:
static const char *OpRoleAttrName() { return "op_role"; } static const char *OpRoleAttrName() { return "op_role"; }
static const char *OpRoleVarAttrName() { return "op_role_var"; } static const char *OpRoleVarAttrName() { return "op_role_var"; }
static const char *OpCreationCallstackAttrName() { return "op_callstack"; }
void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
......
...@@ -11,17 +11,15 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,17 +11,15 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/operator.h" #include <gflags/gflags.h>
#include <glog/logging.h>
#include <algorithm> #include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -129,48 +127,19 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { ...@@ -129,48 +127,19 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
} }
void OperatorBase::Run(const Scope& scope, const platform::Place& place) { void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
try { VLOG(4) << place << " " << DebugStringEx(&scope);
if (VLOG_IS_ON(4)) { if (platform::is_gpu_place(place)) {
VLOG(4) << place << " " << DebugStringEx(&scope);
}
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW("Cannot run operator on place %s", place);
#else #else
auto dev_id = boost::get<platform::CUDAPlace>(place).device; auto dev_id = boost::get<platform::CUDAPlace>(place).device;
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
#endif #endif
}
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place);
if (VLOG_IS_ON(3)) {
VLOG(3) << place << " " << DebugStringEx(&scope);
}
} catch (platform::EnforceNotMet exception) {
if (Attrs().count("sub_block") != 0) {
throw exception;
}
auto& callstack = Attr<std::vector<std::string>>(
OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
if (callstack.empty()) {
throw exception;
}
std::ostringstream sout;
sout << "Invoke operator " << Type() << " error.\n";
sout << "Python Callstacks: \n";
for (auto& line : callstack) {
sout << line;
}
sout << "C++ Callstacks: \n";
sout << exception.err_str_;
exception.err_str_ = sout.str();
throw exception;
} catch (...) {
std::rethrow_exception(std::current_exception());
} }
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place);
VLOG(3) << place << " " << DebugStringEx(&scope);
} }
bool OperatorBase::HasInputs(const std::string& name) const { bool OperatorBase::HasInputs(const std::string& name) const {
...@@ -198,7 +167,7 @@ const std::vector<std::string>& OperatorBase::Inputs( ...@@ -198,7 +167,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
} }
bool OperatorBase::HasOutputs(const std::string& name) const { bool OperatorBase::HasOutputs(const std::string& name) const {
if (outputs_.end() != outputs_.find(name)) { if (outputs_.find(name) != outputs_.end()) {
return true; return true;
} else { } else {
return false; return false;
......
...@@ -31,7 +31,8 @@ size_t Tensor::memory_size() const { ...@@ -31,7 +31,8 @@ size_t Tensor::memory_size() const {
return holder_ == nullptr ? 0UL : holder_->size() - offset_; return holder_ == nullptr ? 0UL : holder_->size() - offset_;
} }
void* Tensor::mutable_data(platform::Place place, std::type_index type) { void* Tensor::mutable_data(platform::Place place, std::type_index type,
size_t requested_size) {
if (holder_ != nullptr) { if (holder_ != nullptr) {
holder_->set_type(type); holder_->set_type(type);
} }
...@@ -39,7 +40,7 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) { ...@@ -39,7 +40,7 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) {
"When calling this method, the Tensor's numel must be " "When calling this method, the Tensor's numel must be "
"equal or larger than zero. " "equal or larger than zero. "
"Please check Tensor::Resize has been called first."); "Please check Tensor::Resize has been called first.");
int64_t size = numel() * SizeOfType(type); size_t size = requested_size ? requested_size : numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || !(holder_->place() == place) || if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) { holder_->size() < size + offset_) {
...@@ -68,10 +69,10 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) { ...@@ -68,10 +69,10 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type) {
offset_); offset_);
} }
void* Tensor::mutable_data(platform::Place place) { void* Tensor::mutable_data(platform::Place place, size_t requested_size) {
PADDLE_ENFORCE(this->holder_ != nullptr, PADDLE_ENFORCE(this->holder_ != nullptr,
"Cannot invoke mutable data if current hold nothing."); "Cannot invoke mutable data if current hold nothing.");
return mutable_data(place, holder_->type()); return mutable_data(place, holder_->type(), requested_size);
} }
Tensor& Tensor::ShareDataWith(const Tensor& src) { Tensor& Tensor::ShareDataWith(const Tensor& src) {
......
...@@ -89,22 +89,24 @@ class Tensor { ...@@ -89,22 +89,24 @@ class Tensor {
* @note If not exist, then allocation. * @note If not exist, then allocation.
*/ */
template <typename T> template <typename T>
T* mutable_data(platform::Place place); T* mutable_data(platform::Place place, size_t requested_size = 0);
void* mutable_data(platform::Place place, std::type_index type); void* mutable_data(platform::Place place, std::type_index type,
size_t requested_size = 0);
void* mutable_data(platform::Place place); void* mutable_data(platform::Place place, size_t requested_size = 0);
/** /**
* @brief Return a pointer to mutable memory block. * @brief Return a pointer to mutable memory block.
* *
* @param[in] dims The dimensions of the memory block. * @param[in] dims The dimensions of the memory block.
* @param[in] place The place of the memory block. * @param[in] place The place of the memory block.
* @param[in] requested_size The size of the block in bytes.
* *
* @note If not exist, then allocation. * @note If not exist, then allocation.
*/ */
template <typename T> template <typename T>
T* mutable_data(DDim dims, platform::Place place); T* mutable_data(DDim dims, platform::Place place, size_t requested_size = 0);
/*! Return the dimensions of the memory block. */ /*! Return the dimensions of the memory block. */
const DDim& dims() const; const DDim& dims() const;
......
...@@ -46,16 +46,17 @@ inline T* Tensor::data() { ...@@ -46,16 +46,17 @@ inline T* Tensor::data() {
} }
template <typename T> template <typename T>
inline T* Tensor::mutable_data(DDim dims, platform::Place place) { inline T* Tensor::mutable_data(DDim dims, platform::Place place,
size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims); Resize(dims);
return mutable_data<T>(place); return mutable_data<T>(place, requested_size);
} }
template <typename T> template <typename T>
inline T* Tensor::mutable_data(platform::Place place) { inline T* Tensor::mutable_data(platform::Place place, size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T*>(mutable_data(place, typeid(T))); return reinterpret_cast<T*>(mutable_data(place, typeid(T), requested_size));
} }
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
......
...@@ -135,7 +135,7 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -135,7 +135,7 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Variance", AddInput("Variance",
"The global variance (for training) " "The global variance (for training) "
"or estimated Variance (for testing)"); "or estimated Variance (for testing)");
AddOutput("Y", "result after normalization"); AddOutput("Y", "result after normalization").Reuse("X");
AddOutput("MeanOut", AddOutput("MeanOut",
"Share memory with Mean. " "Share memory with Mean. "
"Store the global mean when training") "Store the global mean when training")
......
...@@ -53,6 +53,18 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler { ...@@ -53,6 +53,18 @@ class ConvMKLDNNHandler : public platform::MKLDNNHandler {
key_ += "-BWD"; key_ += "-BWD";
} }
size_t GetDstMemorySize() const {
return conv_pd_->dst_primitive_desc().get_size();
}
size_t GetDiffWeightsMemorySize() const {
return conv_bwd_weights_pd_->diff_weights_primitive_desc().get_size();
}
size_t GetDiffSourceMemorySize() const {
return conv_bwd_data_pd_->diff_src_primitive_desc().get_size();
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemoryFromWeightsPrimitive( std::shared_ptr<mkldnn::memory> AcquireSrcMemoryFromWeightsPrimitive(
const std::shared_ptr<mkldnn::memory> user_memory_p, const std::shared_ptr<mkldnn::memory> user_memory_p,
std::vector<mkldnn::primitive>& pipeline) { // NOLINT std::vector<mkldnn::primitive>& pipeline) { // NOLINT
...@@ -294,7 +306,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -294,7 +306,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>(); const T* filter_data = filter->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims()); std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> weights_tz = std::vector<int> weights_tz =
...@@ -354,6 +365,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -354,6 +365,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto user_weights_memory_p = handler.AcquireWeightsMemory( auto user_weights_memory_p = handler.AcquireWeightsMemory(
user_weights_md, to_void_cast<T>(filter_data)); user_weights_md, to_void_cast<T>(filter_data));
T* output_data =
output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
// create reorder primitive if the input format is not the preferred one // create reorder primitive if the input format is not the preferred one
auto src_memory_p = auto src_memory_p =
handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline); handler.AcquireSrcMemoryFromPrimitive(user_src_memory_p, pipeline);
...@@ -476,13 +489,6 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -476,13 +489,6 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
T* input_grad_data = nullptr; T* input_grad_data = nullptr;
T* filter_grad_data = nullptr; T* filter_grad_data = nullptr;
if (input_grad) {
input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
}
if (filter_grad) {
filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace());
}
std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims()); std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
std::vector<int> weights_tz = std::vector<int> weights_tz =
paddle::framework::vectorize2int(filter->dims()); paddle::framework::vectorize2int(filter->dims());
...@@ -568,6 +574,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -568,6 +574,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
handler.AcquireDiffDstMemoryFromWeightsPrimitive( handler.AcquireDiffDstMemoryFromWeightsPrimitive(
user_diff_dst_memory_p, pipeline); user_diff_dst_memory_p, pipeline);
const size_t size = handler.GetDiffWeightsMemorySize();
filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace(), size);
auto diff_weights_memory_p = auto diff_weights_memory_p =
handler.AcquireDiffWeightsMemoryFromWeightsPrimitive( handler.AcquireDiffWeightsMemoryFromWeightsPrimitive(
reinterpret_cast<void*>(filter_grad_data)); reinterpret_cast<void*>(filter_grad_data));
...@@ -590,6 +599,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -590,6 +599,9 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
handler.AcquireDiffDstMemoryFromDataPrimitive(user_diff_dst_memory_p, handler.AcquireDiffDstMemoryFromDataPrimitive(user_diff_dst_memory_p,
pipeline); pipeline);
const size_t size = handler.GetDiffSourceMemorySize();
input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace(), size);
auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive( auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive(
reinterpret_cast<void*>(input_grad_data)); reinterpret_cast<void*>(input_grad_data));
......
...@@ -151,6 +151,7 @@ bool VariableResponse::CopySelectRowsData( ...@@ -151,6 +151,7 @@ bool VariableResponse::CopySelectRowsData(
::google::protobuf::io::CodedInputStream* input, ::google::protobuf::io::CodedInputStream* input,
const platform::DeviceContext& ctx, int length) { const platform::DeviceContext& ctx, int length) {
auto* slr = GetVar()->GetMutable<framework::SelectedRows>(); auto* slr = GetVar()->GetMutable<framework::SelectedRows>();
slr->mutable_rows()->clear();
slr->mutable_rows()->resize(length / slr->mutable_rows()->resize(length /
framework::SizeOfType(typeid(int64_t))); // int64 framework::SizeOfType(typeid(int64_t))); // int64
int64_t* rows_data = slr->mutable_rows()->data(); int64_t* rows_data = slr->mutable_rows()->data();
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -41,19 +40,33 @@ class FillConstantOp : public framework::OperatorBase { ...@@ -41,19 +40,33 @@ class FillConstantOp : public framework::OperatorBase {
static_cast<framework::proto::VarType::Type>(Attr<int>("dtype")); static_cast<framework::proto::VarType::Type>(Attr<int>("dtype"));
auto value = Attr<float>("value"); auto value = Attr<float>("value");
auto force_cpu = Attr<bool>("force_cpu"); auto force_cpu = Attr<bool>("force_cpu");
auto &out =
*scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>(); framework::Tensor *tensor = nullptr;
out.Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
auto &out_var = *scope.FindVar(Output("Out"));
if (out_var.IsType<framework::LoDTensor>()) {
tensor = out_var.GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
} else if (out_var.IsType<framework::SelectedRows>()) {
tensor = out_var.GetMutable<framework::SelectedRows>()->mutable_value();
tensor->Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
} else {
PADDLE_THROW(
"fill constant op's output only"
"supports SelectedRows and LoDTensor");
}
if (force_cpu) { if (force_cpu) {
auto cpu = platform::CPUPlace(); auto cpu = platform::CPUPlace();
out.mutable_data(cpu, framework::ToTypeIndex(data_type)); tensor->mutable_data(cpu, framework::ToTypeIndex(data_type));
} else { } else {
out.mutable_data(dev_place, framework::ToTypeIndex(data_type)); tensor->mutable_data(dev_place, framework::ToTypeIndex(data_type));
} }
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(dev_place); auto &dev_ctx = *pool.Get(dev_place);
math::set_constant(dev_ctx, &out, value); math::set_constant(dev_ctx, tensor, value);
} }
}; };
......
...@@ -165,12 +165,13 @@ void ListenAndServOp::RunSyncLoop( ...@@ -165,12 +165,13 @@ void ListenAndServOp::RunSyncLoop(
recv_scope); recv_scope);
VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)"; VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
rpc_service_->SetCond(distributed::kRequestGet);
rpc_service_->WaitBarrier(distributed::kRequestGet);
rpc_service_->ResetBarrierCounter();
// reset received sparse vars to avoid reuse it in the next mini-batch // reset received sparse vars to avoid reuse it in the next mini-batch
dynamic_cast<distributed::RequestSendHandler *>(request_send_handler_.get()) dynamic_cast<distributed::RequestSendHandler *>(request_send_handler_.get())
->ResetSparseVarRecorder(); ->ResetSparseVarRecorder();
rpc_service_->SetCond(distributed::kRequestGet);
rpc_service_->WaitBarrier(distributed::kRequestGet);
rpc_service_->ResetBarrierCounter();
} // while(true) } // while(true)
} }
......
...@@ -48,16 +48,16 @@ class ConcatFunctor<platform::CPUDeviceContext, T> { ...@@ -48,16 +48,16 @@ class ConcatFunctor<platform::CPUDeviceContext, T> {
auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace()); auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation // computation
for (int k = 0; k < out_rows; ++k) { auto output_data = output->data<T>();
T* dst_ptr = output->data<T>() + k * out_cols; int col_idx = 0;
int col_idx = 0; for (int j = 0; j < num; ++j) {
for (int j = 0; j < num; ++j) { int col_len = input_cols[j];
int col_len = input_cols[j]; auto input_data = input[j].data<T>();
const T* src_prt = input[j].data<T>() + k * col_len; for (int k = 0; k < out_rows; ++k) {
memory::Copy(cpu_place, dst_ptr + col_idx, cpu_place, src_prt, memory::Copy(cpu_place, output_data + k * out_cols + col_idx, cpu_place,
sizeof(T) * col_len); input_data + k * col_len, sizeof(T) * col_len);
col_idx += col_len;
} }
col_idx += col_len;
} }
} }
}; };
......
...@@ -53,25 +53,27 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> { ...@@ -53,25 +53,27 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/ const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* out) { LoDTensor* out) {
int out_offset = 0; int out_offset = 0;
auto& eigen_place = *context.eigen_device(); int x_item_length = x.numel() / x.dims()[0];
auto out_data = out->data<T>();
auto x_data = x.data<T>();
for (size_t i = 1; i < ref_lod.size(); ++i) { for (size_t i = 1; i < ref_lod.size(); ++i) {
int repeat_num = ref_lod[i] - ref_lod[i - 1]; int repeat_num = ref_lod[i] - ref_lod[i - 1];
int x_start = x_lod[i - 1]; int x_start = x_lod[i - 1];
int x_end = x_lod[i]; int x_end = x_lod[i];
int x_seq_len = x_end - x_start; int x_seq_len = x_end - x_start;
if (repeat_num > 0) { if (repeat_num > 0) {
auto x_sub_tensor = x.Slice(x_start, x_end);
x_sub_tensor.Resize({1, x_sub_tensor.numel()});
int out_start = out_offset; int out_start = out_offset;
if (out->lod().size() == 1) { if (out->lod().size() == 1) {
out_start = out->lod()[0][out_offset]; out_start = out->lod()[0][out_offset];
} }
auto out_sub_tensor = for (int j = 0; j < repeat_num; j++) {
out->Slice(out_start, out_start + x_seq_len * repeat_num); for (int k = 0; k < x_seq_len; k++) {
out_sub_tensor.Resize({repeat_num, x_sub_tensor.dims()[1]}); for (int l = 0; l < x_item_length; l++) {
EigenMatrix<T>::From(out_sub_tensor).device(eigen_place) = out_data[(out_start + j * x_seq_len + k) * x_item_length + l] =
EigenMatrix<T>::From(x_sub_tensor) x_data[(x_start + k) * x_item_length + l];
.broadcast(Eigen::array<int, 2>({{repeat_num, 1}})); }
}
}
} }
out_offset += repeat_num; out_offset += repeat_num;
} }
......
...@@ -30,8 +30,6 @@ class TopkOp : public framework::OperatorWithKernel { ...@@ -30,8 +30,6 @@ class TopkOp : public framework::OperatorWithKernel {
"Output(Indices) of TopkOp should not be null."); "Output(Indices) of TopkOp should not be null.");
auto input_dims = ctx->GetInputDim("X"); auto input_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(input_dims.size(), 2,
"Rank of TopK op's input must be 2.");
const int k = static_cast<int>(ctx->Attrs().Get<int>("k")); const int k = static_cast<int>(ctx->Attrs().Get<int>("k"));
PADDLE_ENFORCE_GE(k, 1, "k must >= 1"); PADDLE_ENFORCE_GE(k, 1, "k must >= 1");
......
...@@ -37,7 +37,7 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> { ...@@ -37,7 +37,7 @@ class CPUUniformRandomKernel : public framework::OpKernel<T> {
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"uniform_random_op's output only" "uniform_random_op's output only"
"supports SelectedRows and Tensor"); "supports SelectedRows and LoDTensor");
} }
T* data = tensor->mutable_data<T>(ctx.GetPlace()); T* data = tensor->mutable_data<T>(ctx.GetPlace());
unsigned int seed = static_cast<unsigned int>(ctx.Attr<int>("seed")); unsigned int seed = static_cast<unsigned int>(ctx.Attr<int>("seed"));
......
...@@ -54,7 +54,7 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> { ...@@ -54,7 +54,7 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
} else { } else {
PADDLE_THROW( PADDLE_THROW(
"uniform_random_op's output only" "uniform_random_op's output only"
"supports SelectedRows and Tensor"); "supports SelectedRows and LoDTensor");
} }
T* data = tensor->mutable_data<T>(context.GetPlace()); T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed")); unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
......
...@@ -43,9 +43,6 @@ void BindConstValue(pybind11::module* m) { ...@@ -43,9 +43,6 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker.def( op_proto_and_checker_maker.def(
"kOpRoleVarAttrName", "kOpRoleVarAttrName",
framework::OpProtoAndCheckerMaker::OpRoleVarAttrName); framework::OpProtoAndCheckerMaker::OpRoleVarAttrName);
op_proto_and_checker_maker.def(
"kOpCreationCallstackAttrName",
framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName);
} }
} // namespace pybind } // namespace pybind
......
...@@ -19,6 +19,7 @@ import hashlib ...@@ -19,6 +19,7 @@ import hashlib
import os import os
import errno import errno
import shutil import shutil
import six
import sys import sys
import importlib import importlib
import paddle.dataset import paddle.dataset
...@@ -94,6 +95,8 @@ def download(url, module_name, md5sum, save_name=None): ...@@ -94,6 +95,8 @@ def download(url, module_name, md5sum, save_name=None):
dl = 0 dl = 0
total_length = int(total_length) total_length = int(total_length)
for data in r.iter_content(chunk_size=4096): for data in r.iter_content(chunk_size=4096):
if six.PY2:
data = six.b(data)
dl += len(data) dl += len(data)
f.write(data) f.write(data)
done = int(50 * dl / total_length) done = int(50 * dl / total_length)
......
...@@ -35,6 +35,7 @@ import itertools ...@@ -35,6 +35,7 @@ import itertools
import functools import functools
from .common import download from .common import download
import tarfile import tarfile
import six
import scipy.io as scio import scipy.io as scio
from paddle.dataset.image import * from paddle.dataset.image import *
from paddle.reader import * from paddle.reader import *
...@@ -45,10 +46,10 @@ from six.moves import cPickle as pickle ...@@ -45,10 +46,10 @@ from six.moves import cPickle as pickle
from six.moves import zip from six.moves import zip
__all__ = ['train', 'test', 'valid'] __all__ = ['train', 'test', 'valid']
DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz' DATA_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/102flowers.tgz'
LABEL_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat' LABEL_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/imagelabels.mat'
SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat' SETID_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/setid.mat'
DATA_MD5 = '33bfc11892f1e405ca193ae9a9f2a118' DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d' LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c' SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
# In official 'readme', tstid is the flag of test data # In official 'readme', tstid is the flag of test data
...@@ -120,7 +121,10 @@ def reader_creator(data_file, ...@@ -120,7 +121,10 @@ def reader_creator(data_file,
file = file.strip() file = file.strip()
batch = None batch = None
with open(file, 'rb') as f: with open(file, 'rb') as f:
batch = pickle.load(f) if six.PY2:
batch = pickle.load(f)
else:
batch = pickle.load(f, encoding='bytes')
data = batch['data'] data = batch['data']
labels = batch['label'] labels = batch['label']
for sample, label in zip(data, batch['label']): for sample, label in zip(data, batch['label']):
......
...@@ -18,7 +18,6 @@ import collections ...@@ -18,7 +18,6 @@ import collections
import contextlib import contextlib
import re import re
import six import six
import traceback
import numpy as np import numpy as np
...@@ -506,10 +505,6 @@ class Operator(object): ...@@ -506,10 +505,6 @@ class Operator(object):
if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0: if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0:
del op_attrs[role_var_name] del op_attrs[role_var_name]
callstack_var_name = op_maker.kOpCreationCallstackAttrName()
op_attrs[callstack_var_name] = list(
reversed(traceback.format_stack()))[1:]
if len(self.desc.type()) != 0: if len(self.desc.type()) != 0:
return return
if type is None: if type is None:
......
...@@ -27,7 +27,6 @@ from . import utils ...@@ -27,7 +27,6 @@ from . import utils
import random import random
from .. import unique_name from .. import unique_name
from functools import reduce from functools import reduce
import warnings
__all__ = [ __all__ = [
'fc', 'fc',
...@@ -2048,7 +2047,7 @@ def batch_norm(input, ...@@ -2048,7 +2047,7 @@ def batch_norm(input,
param_attr(ParamAttr): The parameter attribute for Parameter `scale`. param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
bias_attr(ParamAttr): The parameter attribute for Parameter `bias`. bias_attr(ParamAttr): The parameter attribute for Parameter `bias`.
data_layout(string, default NCHW): NCHW|NHWC data_layout(string, default NCHW): NCHW|NHWC
in_place(bool, Default False): This argument is deprecated since 0.15.0. in_place(bool, Default False): Make the input and output of batch norm reuse memory.
use_mkldnn(bool, Default false): ${use_mkldnn_comment} use_mkldnn(bool, Default false): ${use_mkldnn_comment}
name(string, Default None): A name for this layer(optional). If set None, the layer name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
...@@ -2070,10 +2069,6 @@ def batch_norm(input, ...@@ -2070,10 +2069,6 @@ def batch_norm(input,
helper = LayerHelper('batch_norm', **locals()) helper = LayerHelper('batch_norm', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
if in_place:
raise warnings.warn("The argument in_place is deprecated since 0.15.0, "
"please do not set it True.")
input_shape = input.shape input_shape = input.shape
if data_layout == 'NCHW': if data_layout == 'NCHW':
channel_num = input_shape[1] channel_num = input_shape[1]
...@@ -2123,7 +2118,7 @@ def batch_norm(input, ...@@ -2123,7 +2118,7 @@ def batch_norm(input,
saved_mean = helper.create_tmp_variable(dtype=dtype, stop_gradient=True) saved_mean = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
saved_variance = helper.create_tmp_variable(dtype=dtype, stop_gradient=True) saved_variance = helper.create_tmp_variable(dtype=dtype, stop_gradient=True)
batch_norm_out = helper.create_tmp_variable(dtype) batch_norm_out = input if in_place else helper.create_tmp_variable(dtype)
helper.append_op( helper.append_op(
type="batch_norm", type="batch_norm",
......
...@@ -229,7 +229,7 @@ def img_conv_group(input, ...@@ -229,7 +229,7 @@ def img_conv_group(input,
use_mkldnn=use_mkldnn) use_mkldnn=use_mkldnn)
if conv_with_batchnorm[i]: if conv_with_batchnorm[i]:
tmp = layers.batch_norm(input=tmp, act=conv_act) tmp = layers.batch_norm(input=tmp, act=conv_act, in_place=True)
drop_rate = conv_batchnorm_drop_rate[i] drop_rate = conv_batchnorm_drop_rate[i]
if abs(drop_rate) > 1e-5: if abs(drop_rate) > 1e-5:
tmp = layers.dropout(x=tmp, dropout_prob=drop_rate) tmp = layers.dropout(x=tmp, dropout_prob=drop_rate)
......
...@@ -256,10 +256,7 @@ def main(net_type, use_cuda, is_local=True): ...@@ -256,10 +256,7 @@ def main(net_type, use_cuda, is_local=True):
save_dirname = "image_classification_" + net_type + ".inference.model" save_dirname = "image_classification_" + net_type + ".inference.model"
train(net_type, use_cuda, save_dirname, is_local) train(net_type, use_cuda, save_dirname, is_local)
infer(use_cuda, save_dirname)
# There is bug in fluid.InferenceTranspiler for VGG.
if net_type == "resnet":
infer(use_cuda, save_dirname)
class TestImageClassification(unittest.TestCase): class TestImageClassification(unittest.TestCase):
......
...@@ -18,6 +18,9 @@ import unittest ...@@ -18,6 +18,9 @@ import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.op import Operator
class TestFillConstantOp1(OpTest): class TestFillConstantOp1(OpTest):
def setUp(self): def setUp(self):
...@@ -47,5 +50,31 @@ class TestFillConstantOp2(OpTest): ...@@ -47,5 +50,31 @@ class TestFillConstantOp2(OpTest):
self.check_output() self.check_output()
class TestFillConstantOpWithSelectedRows(OpTest):
def check_with_place(self, place):
scope = core.Scope()
# create Out Variable
out = scope.var('Out').get_selected_rows()
# create and run fill_constant_op operator
fill_constant_op = Operator(
"fill_constant", shape=[123, 92], value=3.8, Out='Out')
fill_constant_op.run(scope, place)
# get result from Out
result_array = np.array(out.get_tensor())
full_array = np.full((123, 92), 3.8, 'float32')
self.assertTrue(np.array_equal(result_array, full_array))
def test_fill_constant_with_selected_rows(self):
places = [core.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(core.CUDAPlace(0))
for place in places:
self.check_with_place(place)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -67,10 +67,7 @@ class TestOperator(unittest.TestCase): ...@@ -67,10 +67,7 @@ class TestOperator(unittest.TestCase):
self.assertEqual(mul_op.output("Out"), ["mul.out"]) self.assertEqual(mul_op.output("Out"), ["mul.out"])
self.assertEqual( self.assertEqual(
set(mul_op.attr_names), set(mul_op.attr_names),
set([ set(["x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var"]))
"x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
"op_callstack"
]))
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)
self.assertEqual(mul_op.attr("x_num_col_dims"), 1) self.assertEqual(mul_op.attr("x_num_col_dims"), 1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册