提交 ffb50fdd 编写于 作者: W wangguibao

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

......@@ -52,6 +52,7 @@ ExternalProject_Add(
PREFIX ${ANAKIN_SOURCE_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS ${CMAKE_ARGS_PREFIX}
-DUSE_LOGGER=YES
-DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=${THIRD_PARTY_PATH}/install/protobuf
......
......@@ -21,7 +21,7 @@ paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'en
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
......@@ -145,14 +145,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False))
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
......@@ -160,6 +160,12 @@ paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shap
paddle.fluid.layers.sum ArgSpec(args=['x', 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
......@@ -225,12 +231,6 @@ paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords=
paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
......@@ -299,13 +299,17 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init',
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
......
......@@ -56,9 +56,9 @@ else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
endif()
if (NOT WIN32)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio version)
else()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto version)
endif (NOT WIN32)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
......@@ -141,12 +141,15 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
if(WITH_DISTRIBUTE)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else()
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass elementwise_add_op)
endif()
if (NOT WIN32)
......@@ -166,15 +169,8 @@ cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto)
cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
# cc_test(channel_test SRCS channel_test.cc)
cc_test(tuple_test SRCS tuple_test.cc )
if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32)
# disable test temporarily.
# TODO https://github.com/PaddlePaddle/Paddle/issues/11971
# cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op
# channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op
# conditional_block_op while_op assign_op print_op executor proto_desc)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <condition_variable> // NOLINT
#include <typeindex>
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
enum class ChannelAction {
SEND = 0,
RECEIVE = 1,
CLOSE = 2,
};
// Channel is the abstract class of buffered and un-buffered channels.
template <typename T>
class Channel {
public:
virtual bool CanSend() = 0;
virtual bool CanReceive() = 0;
virtual void Send(T*) = 0;
virtual bool Receive(T*) = 0;
virtual size_t Cap() = 0;
virtual void Lock() = 0;
virtual void Unlock() = 0;
virtual bool IsClosed() = 0;
virtual void Close() = 0;
virtual ~Channel() {}
virtual void AddToSendQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) = 0;
virtual void AddToReceiveQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) = 0;
virtual void RemoveFromSendQ(const void* referrer) = 0;
virtual void RemoveFromReceiveQ(const void* referrer) = 0;
};
// Forward declaration of channel implementations.
template <typename T>
class ChannelImpl;
template <typename T>
Channel<T>* MakeChannel(size_t buffer_size) {
return new ChannelImpl<T>(buffer_size);
}
template <typename T>
void CloseChannel(Channel<T>* ch) {
ch->Close();
}
/*
* The ChannelHolder class serves two main purposes:
* 1. It acts as a unified wrapper for the different kinds of
* channels, i.e. Buffered and Unbuffered channels. This is
* similar to the ReaderHolder class.
* 2. It also helps us in TypeHiding. This is similar to the
* PlaceHolder implementations in variable.h and tensor.h.
*/
class ChannelHolder {
public:
template <typename T>
void Reset(size_t buffer_size) {
holder_.reset(new PlaceholderImpl<T>(buffer_size));
}
template <typename T>
void Send(T* data) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
PADDLE_ENFORCE_EQ(
holder_->Type(), std::type_index(typeid(T)),
"Channel type is not same as the type of the data being sent");
// Static cast should be safe because we have ensured that types are same
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
PADDLE_ENFORCE_EQ(channel != nullptr, true, "Channel should not be null.");
channel->Send(data);
}
template <typename T>
bool Receive(T* data) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
PADDLE_ENFORCE_EQ(
holder_->Type(), std::type_index(typeid(T)),
"Channel type is not same as the type of the data being sent");
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
PADDLE_ENFORCE_EQ(channel != nullptr, true, "Channel should not be null.");
return channel->Receive(data);
}
bool IsClosed() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->IsClosed();
}
bool CanSend() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->CanSend();
}
bool CanReceive() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->CanReceive();
}
void close() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->Close();
}
size_t Cap() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->Cap();
}
void Lock() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->Lock();
}
void Unlock() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->Unlock();
}
template <typename T>
void AddToSendQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
if (channel != nullptr) {
channel->AddToSendQ(referrer, data, cond, cb);
}
}
template <typename T>
void AddToReceiveQ(const void* referrer, T* data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
Channel<T>* channel = static_cast<Channel<T>*>(holder_->Ptr());
if (channel != nullptr) {
channel->AddToReceiveQ(referrer, data, cond, cb);
}
}
void RemoveFromSendQ(const void* referrer) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->RemoveFromSendQ(referrer);
}
void RemoveFromReceiveQ(const void* referrer) {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
holder_->RemoveFromReceiveQ(referrer);
}
inline bool IsInitialized() const { return holder_ != nullptr; }
inline const std::type_index Type() {
PADDLE_ENFORCE_EQ(IsInitialized(), true,
"The Channel hasn't been initialized");
return holder_->Type();
}
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of ChannelHolder.
*/
struct Placeholder {
virtual ~Placeholder() {}
virtual const std::type_index Type() const = 0;
virtual void* Ptr() const = 0;
virtual bool IsClosed() = 0;
virtual bool CanSend() = 0;
virtual bool CanReceive() = 0;
virtual void RemoveFromSendQ(const void* referrer) = 0;
virtual void RemoveFromReceiveQ(const void* referrer) = 0;
virtual void Close() = 0;
virtual void Lock() = 0;
virtual void Unlock() = 0;
virtual size_t Cap() = 0;
};
template <typename T>
struct PlaceholderImpl : public Placeholder {
explicit PlaceholderImpl(size_t buffer_size)
: type_(std::type_index(typeid(T))) {
channel_.reset(MakeChannel<T>(buffer_size));
}
virtual const std::type_index Type() const { return type_; }
virtual void* Ptr() const { return static_cast<void*>(channel_.get()); }
virtual bool IsClosed() {
if (channel_) {
return channel_->IsClosed();
}
return false;
}
virtual bool CanSend() {
if (channel_) {
return channel_->CanSend();
}
return false;
}
virtual bool CanReceive() {
if (channel_) {
return channel_->CanReceive();
}
return false;
}
virtual void RemoveFromSendQ(const void* referrer) {
if (channel_) {
channel_->RemoveFromSendQ(referrer);
}
}
virtual void RemoveFromReceiveQ(const void* referrer) {
if (channel_) {
channel_->RemoveFromReceiveQ(referrer);
}
}
virtual void Close() {
if (channel_) channel_->Close();
}
virtual size_t Cap() {
if (channel_)
return channel_->Cap();
else
return -1;
}
virtual void Lock() {
if (channel_) channel_->Lock();
}
virtual void Unlock() {
if (channel_) channel_->Unlock();
}
std::unique_ptr<Channel<T>> channel_;
const std::type_index type_;
};
// Pointer to a PlaceholderImpl object
std::unique_ptr<Placeholder> holder_;
};
} // namespace framework
} // namespace paddle
#include "paddle/fluid/framework/channel_impl.h"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h> // for size_t
#include <atomic>
#include <condition_variable> // NOLINT
#include <deque>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T>
class ChannelImpl : public paddle::framework::Channel<T> {
friend Channel<T> *paddle::framework::MakeChannel<T>(size_t);
friend void paddle::framework::CloseChannel<T>(Channel<T> *);
public:
virtual bool CanSend();
virtual bool CanReceive();
virtual void Send(T *);
virtual bool Receive(T *);
virtual size_t Cap() { return cap_; }
virtual void Lock();
virtual void Unlock();
virtual bool IsClosed();
virtual void Close();
explicit ChannelImpl(size_t);
virtual ~ChannelImpl();
virtual void AddToSendQ(const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb);
virtual void AddToReceiveQ(const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb);
virtual void RemoveFromSendQ(const void *referrer);
virtual void RemoveFromReceiveQ(const void *referrer);
private:
struct QueueMessage {
T *data;
std::shared_ptr<std::condition_variable_any> cond;
bool chan_closed = false;
bool completed = false;
const void *referrer; // TODO(thuan): figure out better way to do this
std::function<bool(ChannelAction)> callback;
explicit QueueMessage(T *item)
: data(item), cond(std::make_shared<std::condition_variable_any>()) {}
QueueMessage(T *item, std::shared_ptr<std::condition_variable_any> cond)
: data(item), cond(cond) {}
void Wait(std::unique_lock<std::recursive_mutex> &lock) {
cond->wait(lock, [this]() { return completed; });
}
void Notify() {
completed = true;
cond->notify_all();
}
};
void send_return() {
send_ctr--;
destructor_cond_.notify_all();
}
bool recv_return(bool value) {
recv_ctr--;
destructor_cond_.notify_all();
return value;
}
std::shared_ptr<QueueMessage> get_first_message(
std::deque<std::shared_ptr<QueueMessage>> *queue, ChannelAction action) {
while (!queue->empty()) {
// Check whether this message was added by Select
// If this was added by Select then execute the callback
// to check if you can execute this message. The callback
// can return false if some other case was executed in Select.
// In that case just discard this QueueMessage and process next.
std::shared_ptr<QueueMessage> m = queue->front();
queue->pop_front();
if (m->callback == nullptr || m->callback(action)) return m;
}
return nullptr;
}
size_t cap_;
std::recursive_mutex mu_;
bool closed_;
std::deque<T> buf_;
std::deque<std::shared_ptr<QueueMessage>> recvq;
std::deque<std::shared_ptr<QueueMessage>> sendq;
std::atomic<unsigned> send_ctr{0};
std::atomic<unsigned> recv_ctr{0};
std::condition_variable_any destructor_cond_;
};
template <typename T>
ChannelImpl<T>::ChannelImpl(size_t capacity)
: cap_(capacity), closed_(false), send_ctr(0), recv_ctr(0) {
PADDLE_ENFORCE_GE(capacity, 0);
}
template <typename T>
bool ChannelImpl<T>::CanSend() {
std::lock_guard<std::recursive_mutex> lock{mu_};
return !closed_ && (!recvq.empty() || buf_.size() < cap_);
}
template <typename T>
bool ChannelImpl<T>::CanReceive() {
std::lock_guard<std::recursive_mutex> lock{mu_};
return !(closed_ && buf_.empty()) && (!sendq.empty() || buf_.size() > 0);
}
template <typename T>
void ChannelImpl<T>::Send(T *item) {
send_ctr++;
std::unique_lock<std::recursive_mutex> lock{mu_};
// If channel is closed, throw exception
if (closed_) {
send_return();
lock.unlock();
PADDLE_THROW("Cannot send on closed channel");
}
// If there is a receiver, directly pass the value we want
// to send to the receiver, bypassing the channel buffer if any
if (!recvq.empty()) {
std::shared_ptr<QueueMessage> m =
get_first_message(&recvq, ChannelAction::SEND);
if (m != nullptr) {
*(m->data) = std::move(*item);
m->Notify();
send_return();
return;
} else {
Send(item);
send_return();
return;
}
}
// Unbuffered channel will always bypass this
// If buffered channel has space in buffer,
// write the element to the buffer.
if (buf_.size() < cap_) {
// Copy to buffer
buf_.push_back(std::move(*item));
send_return();
return;
}
// Block on channel, because some receiver will complete
// the operation for us
auto m = std::make_shared<QueueMessage>(item);
sendq.push_back(m);
m->Wait(lock);
if (m->chan_closed) {
send_return();
lock.unlock();
PADDLE_THROW("Cannot send on closed channel");
}
send_return();
}
template <typename T>
bool ChannelImpl<T>::Receive(T *item) {
recv_ctr++;
std::unique_lock<std::recursive_mutex> lock{mu_};
// If channel is closed and buffer is empty or
// channel is unbuffered
if (closed_ && buf_.empty()) return recv_return(false);
// If there is a sender, directly receive the value we want
// from the sender. In case of a buffered channel, read from
// buffer and move front of send queue to the buffer
if (!sendq.empty()) {
std::shared_ptr<QueueMessage> m =
get_first_message(&sendq, ChannelAction::RECEIVE);
if (buf_.size() > 0) {
// Case 1 : Channel is Buffered
// Do Data transfer from front of buffer
// and add a QueueMessage to the buffer
*item = std::move(buf_.front());
buf_.pop_front();
// If first message from sendq is not null
// add it to the buffer and notify it
if (m != nullptr) {
// Copy to buffer
buf_.push_back(std::move(*(m->data)));
m->Notify();
} // Ignore if there is no first message
} else {
// Case 2: Channel is Unbuffered
// Do data transfer from front of SendQ
// If front is nullptr, then recursively call itself
if (m != nullptr) {
*item = std::move(*(m->data));
m->Notify();
} else {
return recv_return(Receive(item));
}
}
return recv_return(true);
}
// If this is a buffered channel and there are items in buffer
if (buf_.size() > 0) {
// Directly read from buffer
*item = std::move(buf_.front());
buf_.pop_front();
// return true
return recv_return(true);
}
// No sender available, block on this channel
// Some receiver will complete the option for us
auto m = std::make_shared<QueueMessage>(item);
recvq.push_back(m);
m->Wait(lock);
return recv_return(!m->chan_closed);
}
template <typename T>
void ChannelImpl<T>::Lock() {
mu_.lock();
}
template <typename T>
void ChannelImpl<T>::Unlock() {
mu_.unlock();
}
template <typename T>
bool ChannelImpl<T>::IsClosed() {
std::lock_guard<std::recursive_mutex> lock{mu_};
return closed_;
}
template <typename T>
void ChannelImpl<T>::Close() {
std::unique_lock<std::recursive_mutex> lock{mu_};
if (closed_) {
// TODO(abhinavarora): closing an already closed channel should panic
lock.unlock();
return;
}
closed_ = true;
// Empty the readers
while (!recvq.empty()) {
std::shared_ptr<QueueMessage> m = recvq.front();
recvq.pop_front();
m->chan_closed = true;
// Execute callback function (if any)
if (m->callback != nullptr) {
m->callback(ChannelAction::CLOSE);
}
m->Notify();
}
// Empty the senders
while (!sendq.empty()) {
std::shared_ptr<QueueMessage> m = sendq.front();
sendq.pop_front();
m->chan_closed = true;
// Execute callback function (if any)
if (m->callback != nullptr) {
m->callback(ChannelAction::CLOSE);
}
m->Notify();
}
}
template <typename T>
void ChannelImpl<T>::AddToSendQ(
const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
std::lock_guard<std::recursive_mutex> lock{mu_};
auto m = std::make_shared<QueueMessage>(data, cond);
m->referrer = referrer;
m->callback = cb;
sendq.push_back(m);
}
template <typename T>
void ChannelImpl<T>::AddToReceiveQ(
const void *referrer, T *data,
std::shared_ptr<std::condition_variable_any> cond,
std::function<bool(ChannelAction)> cb) {
std::lock_guard<std::recursive_mutex> lock{mu_};
auto m = std::make_shared<QueueMessage>(data, cond);
m->referrer = referrer;
m->callback = cb;
recvq.push_back(m);
}
template <typename T>
void ChannelImpl<T>::RemoveFromSendQ(const void *referrer) {
std::lock_guard<std::recursive_mutex> lock{mu_};
for (auto it = sendq.begin(); it != sendq.end();) {
std::shared_ptr<QueueMessage> sendMsg = (std::shared_ptr<QueueMessage>)*it;
if (sendMsg->referrer == referrer) {
it = sendq.erase(it);
} else {
++it;
}
}
}
template <typename T>
void ChannelImpl<T>::RemoveFromReceiveQ(const void *referrer) {
std::lock_guard<std::recursive_mutex> lock{mu_};
for (auto it = recvq.begin(); it != recvq.end();) {
std::shared_ptr<QueueMessage> recvMsg = (std::shared_ptr<QueueMessage>)*it;
if (recvMsg->referrer == referrer) {
it = recvq.erase(it);
} else {
++it;
}
}
}
template <typename T>
ChannelImpl<T>::~ChannelImpl() {
Close();
// The destructor must wait for all readers and writers to complete their task
// The channel has been closed, so we will not accept new readers and writers
std::unique_lock<std::recursive_mutex> lock{mu_};
destructor_cond_.wait(lock,
[this]() { return send_ctr == 0 && recv_ctr == 0; });
}
} // namespace framework
} // namespace paddle
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thread> // NOLINT
#include "gtest/gtest.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
USE_NO_KERNEL_OP(go);
USE_NO_KERNEL_OP(channel_close);
USE_NO_KERNEL_OP(channel_create);
USE_NO_KERNEL_OP(channel_recv);
USE_NO_KERNEL_OP(channel_send);
USE_NO_KERNEL_OP(elementwise_add);
USE_NO_KERNEL_OP(select);
USE_NO_KERNEL_OP(conditional_block);
USE_NO_KERNEL_OP(equal);
USE_NO_KERNEL_OP(assign);
USE_NO_KERNEL_OP(while);
USE_NO_KERNEL_OP(print);
namespace f = paddle::framework;
namespace p = paddle::platform;
namespace paddle {
namespace framework {
template <typename T>
LoDTensor *CreateVariable(Scope *scope, const p::CPUPlace &place,
std::string name, T value) {
// Create LoDTensor<int> of dim [1]
auto var = scope->Var(name);
auto tensor = var->GetMutable<LoDTensor>();
tensor->Resize({1});
T *expect = tensor->mutable_data<T>(place);
expect[0] = value;
return tensor;
}
void AddOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, AttributeMap attrs,
BlockDesc *block) {
// insert op
auto op = block->AppendOp();
op->SetType(type);
for (auto &kv : inputs) {
op->SetInput(kv.first, kv.second);
}
for (auto &kv : outputs) {
op->SetOutput(kv.first, kv.second);
}
op->SetAttrMap(attrs);
}
void AddCase(ProgramDesc *program, Scope *scope, p::CPUPlace *place,
BlockDesc *casesBlock, int caseId, int caseType,
std::string caseChannel, std::string caseVarName,
std::function<void(BlockDesc *, Scope *)> func) {
std::string caseCondName = std::string("caseCond") + std::to_string(caseId);
std::string caseCondXVarName =
std::string("caseCondX") + std::to_string(caseId);
BlockDesc *caseBlock = program->AppendBlock(*casesBlock);
func(caseBlock, scope);
CreateVariable(scope, *place, caseCondName, false);
CreateVariable(scope, *place, caseCondXVarName, caseId);
CreateVariable(scope, *place, caseVarName, caseId);
scope->Var("step_scope");
AddOp("equal", {{"X", {caseCondXVarName}}, {"Y", {"caseToExecute"}}},
{{"Out", {caseCondName}}}, {}, casesBlock);
AddOp("conditional_block", {{"X", {caseCondName}}, {"Params", {}}},
{{"Out", {}}, {"Scope", {"step_scope"}}},
{{"sub_block", caseBlock}, {"is_scalar_condition", true}}, casesBlock);
}
void AddFibonacciSelect(Scope *scope, p::CPUPlace *place, ProgramDesc *program,
BlockDesc *parentBlock, std::string dataChanName,
std::string quitChanName) {
BlockDesc *whileBlock = program->AppendBlock(*parentBlock);
CreateVariable(scope, *place, "whileExitCond", true);
CreateVariable(scope, *place, "caseToExecute", -1);
CreateVariable(scope, *place, "case1var", 0);
CreateVariable(scope, *place, "xtemp", 0);
// TODO(thuan): Need to create fibXToSend, since channel send moves the actual
// data,
// which causes the data to be no longer accessible to do the fib calculation
// TODO(abhinav): Change channel send to do a copy instead of a move!
CreateVariable(scope, *place, "fibXToSend", 0);
CreateVariable(scope, *place, "fibX", 0);
CreateVariable(scope, *place, "fibY", 1);
CreateVariable(scope, *place, "quitVar", 0);
BlockDesc *casesBlock = program->AppendBlock(*whileBlock);
std::function<void(BlockDesc * caseBlock)> f = [](BlockDesc *caseBlock) {};
// TODO(thuan): Remove this once we change channel send to do a copy instead
// of move
AddOp("assign", {{"X", {"fibX"}}}, {{"Out", {"fibXToSend"}}}, {}, whileBlock);
// Case 0: Send to dataChanName
std::function<void(BlockDesc * caseBlock, Scope * scope)> case0Func = [&](
BlockDesc *caseBlock, Scope *scope) {
AddOp("assign", {{"X", {"fibX"}}}, {{"Out", {"xtemp"}}}, {}, caseBlock);
AddOp("assign", {{"X", {"fibY"}}}, {{"Out", {"fibX"}}}, {}, caseBlock);
AddOp("elementwise_add", {{"X", {"xtemp"}}, {"Y", {"fibY"}}},
{{"Out", {"fibY"}}}, {}, caseBlock);
};
AddCase(program, scope, place, casesBlock, 0, 1, dataChanName, "fibXToSend",
case0Func);
std::string case0Config =
std::string("0,1,") + dataChanName + std::string(",fibXToSend");
// Case 1: Receive from quitChanName
std::function<void(BlockDesc * caseBlock, Scope * scope)> case2Func = [&](
BlockDesc *caseBlock, Scope *scope) {
// Exit the while loop after we receive from quit channel.
// We assign a false to "whileExitCond" variable, which will
// break out of while_op loop
CreateVariable(scope, *place, "whileFalse", false);
AddOp("assign", {{"X", {"whileFalse"}}}, {{"Out", {"whileExitCond"}}}, {},
caseBlock);
};
AddCase(program, scope, place, casesBlock, 1, 2, quitChanName, "quitVar",
case2Func);
std::string case1Config =
std::string("1,2,") + quitChanName + std::string(",quitVar");
// Select block
AddOp("select", {{"X", {dataChanName, quitChanName}},
{"case_to_execute", {"caseToExecute"}}},
{{"Out", {}}},
{{"sub_block", casesBlock},
{"cases", std::vector<std::string>{case0Config, case1Config}}},
whileBlock);
scope->Var("stepScopes");
AddOp("while",
{{"X", {dataChanName, quitChanName}}, {"Condition", {"whileExitCond"}}},
{{"Out", {}}, {"StepScopes", {"stepScopes"}}},
{{"sub_block", whileBlock}}, parentBlock);
}
TEST(Concurrency, Go_Op) {
Scope scope;
p::CPUPlace place;
// Initialize scope variables
p::CPUDeviceContext ctx(place);
// Create channel variable
scope.Var("Channel");
// Create Variables, x0 will be put into channel,
// result will be pulled from channel
CreateVariable(&scope, place, "Status", false);
CreateVariable(&scope, place, "x0", 99);
CreateVariable(&scope, place, "result", 0);
framework::Executor executor(place);
ProgramDesc program;
BlockDesc *block = program.MutableBlock(0);
// Create channel OP
AddOp("channel_create", {}, {{"Out", {"Channel"}}},
{{"capacity", 10}, {"data_type", f::proto::VarType::LOD_TENSOR}},
block);
// Create Go Op routine
BlockDesc *goOpBlock = program.AppendBlock(program.Block(0));
AddOp("channel_send", {{"Channel", {"Channel"}}, {"X", {"x0"}}},
{{"Status", {"Status"}}}, {}, goOpBlock);
// Create Go Op
AddOp("go", {{"X", {"Channel", "x0"}}}, {}, {{"sub_block", goOpBlock}},
block);
// Create Channel Receive Op
AddOp("channel_recv", {{"Channel", {"Channel"}}},
{{"Status", {"Status"}}, {"Out", {"result"}}}, {}, block);
// Create Channel Close Op
AddOp("channel_close", {{"Channel", {"Channel"}}}, {}, {}, block);
// Check the result tensor to make sure it is set to 0
const LoDTensor &tensor = (scope.FindVar("result"))->Get<LoDTensor>();
auto *initialData = tensor.data<int>();
EXPECT_EQ(initialData[0], 0);
executor.Run(program, &scope, 0, true, true);
// After we call executor.run, the Go operator should do a channel_send to
// set the "result" variable to 99.
auto *finalData = tensor.data<int>();
EXPECT_EQ(finalData[0], 99);
}
/**
* This test implements the fibonacci function using go_op and select_op
*/
TEST(Concurrency, Select) {
Scope scope;
p::CPUPlace place;
// Initialize scope variables
p::CPUDeviceContext ctx(place);
CreateVariable(&scope, place, "Status", false);
CreateVariable(&scope, place, "result", 0);
CreateVariable(&scope, place, "currentXFib", 0);
framework::Executor executor(place);
ProgramDesc program;
BlockDesc *block = program.MutableBlock(0);
// Create channel OP
std::string dataChanName = "Channel";
scope.Var(dataChanName);
AddOp("channel_create", {}, {{"Out", {dataChanName}}},
{{"capacity", 0}, {"data_type", f::proto::VarType::LOD_TENSOR}}, block);
std::string quitChanName = "Quit";
scope.Var(quitChanName);
AddOp("channel_create", {}, {{"Out", {quitChanName}}},
{{"capacity", 0}, {"data_type", f::proto::VarType::LOD_TENSOR}}, block);
// Create Go Op routine, which loops 10 times over fibonacci sequence
CreateVariable(&scope, place, "xReceiveVar", 0);
BlockDesc *goOpBlock = program.AppendBlock(program.Block(0));
for (int i = 0; i < 10; ++i) {
AddOp("channel_recv", {{"Channel", {dataChanName}}},
{{"Status", {"Status"}}, {"Out", {"currentXFib"}}}, {}, goOpBlock);
AddOp("print", {{"In", {"currentXFib"}}}, {{"Out", {"currentXFib"}}},
{{"first_n", 100},
{"summarize", -1},
{"print_tensor_name", false},
{"print_tensor_type", true},
{"print_tensor_shape", false},
{"print_tensor_lod", false},
{"print_phase", std::string("FORWARD")},
{"message", std::string("X: ")}},
goOpBlock);
}
CreateVariable(&scope, place, "quitSignal", 0);
AddOp("channel_send", {{"Channel", {quitChanName}}, {"X", {"quitSignal"}}},
{{"Status", {"Status"}}}, {}, goOpBlock);
// Create Go Op
AddOp("go", {{"X", {dataChanName, quitChanName}}}, {},
{{"sub_block", goOpBlock}}, block);
AddFibonacciSelect(&scope, &place, &program, block, dataChanName,
quitChanName);
// Create Channel Close Op
AddOp("channel_close", {{"Channel", {dataChanName}}}, {}, {}, block);
AddOp("channel_close", {{"Channel", {quitChanName}}}, {}, {}, block);
executor.Run(program, &scope, 0, true, true);
// After we call executor.run, "result" variable should be equal to 34
// (which is 10 loops through fibonacci sequence)
const LoDTensor &tensor = (scope.FindVar("currentXFib"))->Get<LoDTensor>();
auto *finalData = tensor.data<int>();
EXPECT_EQ(finalData[0], 34);
}
} // namespace framework
} // namespace paddle
......@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
......@@ -76,15 +75,13 @@ void InitializeVariable(Variable* var, proto::VarType::Type var_type) {
var->GetMutable<platform::PlaceList>();
} else if (var_type == proto::VarType::READER) {
var->GetMutable<ReaderHolder>();
} else if (var_type == proto::VarType::CHANNEL) {
var->GetMutable<ChannelHolder>();
} else if (var_type == proto::VarType::RAW) {
// GetMutable will be called in operator
} else {
PADDLE_THROW(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
"LOD_RANK_TABLE, PLACE_LIST, READER, RAW]",
var_type);
}
}
......
......@@ -126,7 +126,6 @@ message VarType {
LOD_TENSOR_ARRAY = 13;
PLACE_LIST = 14;
READER = 15;
CHANNEL = 16;
// Any runtime decided variable type is raw
// raw variables should manage their own allocations
// in operators like nccl_op
......@@ -158,12 +157,6 @@ message VarType {
message ReaderDesc { repeated LoDTensorDesc lod_tensor = 1; }
optional ReaderDesc reader = 5;
message ChannelDesc {
required Type data_type = 1;
required int64 capacity = 2;
}
optional ChannelDesc channel = 6;
message Tuple { repeated Type element_type = 1; }
optional Tuple tuple = 7;
}
......
......@@ -28,12 +28,13 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap
pass_library(graph_to_program_pass base)
pass_library(graph_viz_pass base)
pass_library(fc_fuse_pass inference)
if(WITH_MKLDNN)
pass_library(conv_relu_mkldnn_fuse_pass inference)
endif()
if (WITH_MKLDNN)
pass_library(conv_relu_mkldnn_fuse_pass inference)
endif ()
pass_library(attention_lstm_fuse_pass inference)
pass_library(infer_clean_graph_pass inference)
pass_library(fc_lstm_fuse_pass inference)
pass_library(embedding_fc_lstm_fuse_pass inference)
pass_library(fc_gru_fuse_pass inference)
pass_library(seq_concat_fc_fuse_pass inference)
......@@ -49,6 +50,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r
cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass)
cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector)
cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto)
if(WITH_MKLDNN)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
endif()
if (WITH_MKLDNN)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
endif ()
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h"
#include <algorithm>
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace paddle {
namespace framework {
namespace ir {
static int BuildFusion(Graph* graph, const std::string& name_scope,
Scope* scope, bool with_fc_bias) {
GraphPatternDetector gpd;
auto* pattern = gpd.mutable_pattern();
// Build pattern
PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "x"))
->assert_is_op_input("lookup_table")
->assert_var_not_persistable();
patterns::Embedding embedding_pattern(pattern, name_scope);
// TODO(jczaja): Intermediate can only be for val that are not used anywhere
// but lookup table output may go into other LSTM (for reverse
// direction)
auto* embedding_out = embedding_pattern(x);
patterns::FC fc_pattern(pattern, name_scope);
// fc_out is a tmp var, will be removed after fuse, so marked as intermediate.
auto* fc_out = fc_pattern(embedding_out, with_fc_bias)->AsIntermediate();
patterns::LSTM lstm_pattern(pattern, name_scope);
lstm_pattern(fc_out);
// Create New OpDesc
auto embedding_lstm_creator = [&](Node* embedding, Node* W, Node* lstm,
Node* input, Node* weight_x, Node* weight_h,
Node* bias, Node* hidden, Node* cell,
Node* xx, Node* fc_bias) {
OpDesc op_desc;
op_desc.SetType("fused_embedding_fc_lstm");
#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()});
SET_IN(Ids, input);
SET_IN(WeightH, weight_h);
// Neet to have this passed as We need Wc data for peephole connections
SET_IN(Bias, bias);
#undef SET_IN
// Multiply embeddings with Weights
PADDLE_ENFORCE(scope);
const std::string& embeddings = patterns::UniqueKey("Embeddings");
auto* embeddings_var = scope->Var(embeddings);
PADDLE_ENFORCE(embeddings_var);
auto* embeddings_tensor =
embeddings_var->GetMutable<framework::LoDTensor>();
// Get WeightX size: [single_embedding, fc_size]
// and embedding size: [dict_size, single_embedding]
// and create new size of embeddings eg. [dict_size , hidden_size]
auto* embedding_var = scope->FindVar(W->Name());
PADDLE_ENFORCE(embedding_var);
const auto& embedding_tensor = embedding_var->Get<framework::LoDTensor>();
const auto& weightx_tensor =
scope->FindVar(weight_x->Name())->Get<framework::LoDTensor>();
embeddings_tensor->Resize(
{embedding_tensor.dims()[0], weightx_tensor.dims()[1]});
// Multiplie embeddings via WeightsX and add bias
auto embedding_data = embedding_tensor.data<float>();
auto weightx_data = weightx_tensor.data<float>();
auto embeddings_data =
embeddings_tensor->mutable_data<float>(platform::CPUPlace());
// Adding biases to GEMM result to be
auto* lstm_bias_var = scope->FindVar(bias->Name());
PADDLE_ENFORCE(lstm_bias_var);
const auto& lstm_bias_tensor = lstm_bias_var->Get<framework::LoDTensor>();
auto alpha = 1.0f;
auto beta = 1.0f;
int m = embedding_tensor.dims()[0];
int n = weightx_tensor.dims()[1];
int k = embedding_tensor.dims()[1];
// Copy only gate biases values (only actual bias data, not peephole
// weights)
std::vector<float> combined_biases;
combined_biases.reserve(n);
std::copy_n(lstm_bias_tensor.data<float>(), n,
std::back_inserter(combined_biases));
if (with_fc_bias) {
// Add FC-bias with LSTM-bias (into GEMM result to be)
auto* fc_bias_var = scope->FindVar(fc_bias->Name());
const auto& fc_bias_tensor = fc_bias_var->Get<framework::LoDTensor>();
for (int i = 0; i < fc_bias_tensor.numel(); i++) {
combined_biases[i] += fc_bias_tensor.data<float>()[i];
}
}
// broadcast biases
std::vector<float> ones(m, 1.0f);
paddle::operators::math::CBlas<float>::GEMM(
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, 1, alpha, &ones[0], 1,
&combined_biases[0], n, 0.0f, embeddings_data, n);
// Wx*embeddings + biases
paddle::operators::math::CBlas<float>::GEMM(
CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha,
embedding_data, k, weightx_data, n, beta, embeddings_data, n);
op_desc.SetInput("Embeddings", {embeddings});
// Create temp variables.
const std::string BatchedInput = patterns::UniqueKey("BatchedInput");
const std::string BatchedCellPreAct =
patterns::UniqueKey("BatchedCellPreAct");
const std::string BatchedGate = patterns::UniqueKey("BatchedGate");
scope->Var(BatchedInput)->GetMutable<framework::LoDTensor>();
scope->Var(BatchedCellPreAct)->GetMutable<framework::LoDTensor>();
scope->Var(BatchedGate)->GetMutable<framework::LoDTensor>();
op_desc.SetInput("H0", {});
op_desc.SetInput("C0", {});
op_desc.SetOutput("Hidden", {hidden->Name()});
op_desc.SetOutput("Cell", {cell->Name()});
op_desc.SetOutput("XX", {xx->Name()});
op_desc.SetOutput("BatchedGate", {BatchedGate});
op_desc.SetOutput("BatchCellPreAct", {BatchedCellPreAct});
op_desc.SetOutput("BatchedInput", {BatchedInput});
op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse"));
op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes"));
// TODO(TJ): get from attr
op_desc.SetAttr("use_seq", true);
PADDLE_ENFORCE(graph->Has(kParamScopeAttr));
auto* scope = graph->Get<Scope*>(kParamScopeAttr);
#define OP_SET_OUT(x) \
const std::string x = patterns::UniqueKey(#x); \
op_desc.SetOutput(#x, {x}); \
scope->Var(x)->GetMutable<LoDTensor>()
OP_SET_OUT(BatchedCell);
OP_SET_OUT(BatchedHidden);
OP_SET_OUT(ReorderedH0);
OP_SET_OUT(ReorderedC0);
#undef OP_SET_OUT
auto* op = graph->CreateOpNode(&op_desc);
IR_NODE_LINK_TO(input, op);
IR_NODE_LINK_TO(weight_x, op);
IR_NODE_LINK_TO(weight_h, op);
IR_NODE_LINK_TO(bias, op);
IR_NODE_LINK_TO(op, hidden);
return op;
};
int fusion_count{0};
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_IR_NODE_FROM_SUBGRAPH(lstm, lstm, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(Hidden, Hidden, lstm_pattern);
GET_IR_NODE_FROM_SUBGRAPH(lookup_table, lookup_table, embedding_pattern);
GET_IR_NODE_FROM_SUBGRAPH(W, W, embedding_pattern);
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
// TODO(jczaja): Add support for is_sparse / is_distributed
auto is_sparse = boost::get<bool>(lookup_table->Op()->GetAttr("is_sparse"));
auto is_distributed =
boost::get<bool>(lookup_table->Op()->GetAttr("is_distributed"));
if (is_sparse == true || is_distributed == true) {
return;
}
if (with_fc_bias) {
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern);
embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight,
Bias, Hidden, Cell, fc_out, fc_bias);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
std::unordered_set<const Node*> marked_nodes(
//{lookup_table, mul, lstm, elementwise_add, fc_bias, W});
{mul, lstm, elementwise_add, fc_bias});
GraphSafeRemoveNodes(graph, marked_nodes);
} else {
GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern);
embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight,
Bias, Hidden, Cell, fc_out, nullptr);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
// std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul,
// lstm});
std::unordered_set<const Node*> marked_nodes({mul, lstm});
GraphSafeRemoveNodes(graph, marked_nodes);
}
++fusion_count;
};
gpd(graph, handler);
return fusion_count;
}
std::unique_ptr<ir::Graph> EmbeddingFCLSTMFusePass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
FusePassBase::Init(name_scope_, graph.get());
int fusion_count = BuildFusion(graph.get(), name_scope_, param_scope(),
true /*with_fc_bias*/);
AddStatis(fusion_count);
return graph;
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(embedding_fc_lstm_fuse_pass,
paddle::framework::ir::EmbeddingFCLSTMFusePass);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
// Fusing of Embedding , FC and LSTM op
// Just FC without bias
class EmbeddingFCLSTMFusePass : public FusePassBase {
public:
virtual ~EmbeddingFCLSTMFusePass() {}
protected:
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
const std::string name_scope_{"embedding_fc_lstm_fuse"};
};
} // namespace ir
} // namespace framework
} // namespace paddle
......@@ -692,6 +692,24 @@ PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x,
}
}
PDNode *patterns::Embedding::operator()(PDNode *x) {
x->assert_is_op_input("lookup_table", "Ids");
auto *lookup_table_op =
pattern->NewNode(lookup_table_repr())->assert_is_op("lookup_table");
#define NEW_NODE(arg__, io__) \
auto *arg__ = pattern->NewNode(arg__##_repr()) \
->assert_is_op_##io__("lookup_table", #arg__);
NEW_NODE(W, input);
NEW_NODE(Out, output);
#undef NEW_NODE
lookup_table_op->LinksFrom({x, W});
lookup_table_op->LinksTo({Out});
return Out;
}
PDNode *patterns::LSTM::operator()(PDNode *x) {
x->assert_is_op_input("lstm", "Input");
auto *lstm_op = pattern->NewNode(lstm_repr())->assert_is_op("lstm");
......
......@@ -418,6 +418,23 @@ struct FC : public PatternBase {
PATTERN_DECL_NODE(Out);
};
// Embedding
struct Embedding : public PatternBase {
Embedding(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "embedding") {}
PDNode* operator()(PDNode* x);
// declare operator node's name
PATTERN_DECL_NODE(lookup_table);
// Inputs
//
PATTERN_DECL_NODE(Ids);
PATTERN_DECL_NODE(W); // embeddings
// Outputs
PATTERN_DECL_NODE(Out);
};
struct LSTM : public PatternBase {
LSTM(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "lstm") {}
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace framework {
// These code can be shared with Executor.
static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
if (var_type == proto::VarType::LOD_TENSOR) {
var->GetMutable<LoDTensor>();
} else if (var_type == proto::VarType::SELECTED_ROWS) {
var->GetMutable<SelectedRows>();
} else if (var_type == proto::VarType::FEED_MINIBATCH) {
var->GetMutable<FeedFetchList>();
} else if (var_type == proto::VarType::FETCH_LIST) {
var->GetMutable<FeedFetchList>();
} else if (var_type == proto::VarType::STEP_SCOPES) {
var->GetMutable<std::vector<framework::Scope>>();
} else if (var_type == proto::VarType::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>();
} else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
var->GetMutable<LoDTensorArray>();
} else if (var_type == proto::VarType::PLACE_LIST) {
var->GetMutable<platform::PlaceList>();
} else if (var_type == proto::VarType::READER) {
var->GetMutable<ReaderHolder>();
} else if (var_type == proto::VarType::RAW) {
// GetMutable will be called in operator
} else {
PADDLE_THROW(
"Variable type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
"LOD_RANK_TABLE, PLACE_LIST, READER, CHANNEL, RAW]",
var_type);
}
}
void NaiveExecutor::Prepare(Scope *parent_scope,
const ProgramDesc &program_desc, int block_id,
bool with_feed_fetch_ops) {
if (!parent_scope) {
scope_ = new framework::Scope;
} else {
scope_ = &parent_scope->NewScope();
}
CreateVariables(program_desc, scope_, block_id);
CreateOps(program_desc, block_id, with_feed_fetch_ops);
}
void NaiveExecutor::Run() {
for (auto &op : ops_) {
VLOG(4) << "run " << op->Type();
op->Run(*scope_, place_);
}
}
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
int block_id) {
PADDLE_ENFORCE(scope);
auto &global_block = desc.Block(block_id);
const Scope *ancestor_scope = scope;
while (ancestor_scope->parent()) {
ancestor_scope = ancestor_scope->parent();
}
if (ancestor_scope != scope) {
for (auto &var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
// Create persistable vars in ancestor scope.
if (var->Persistable()) {
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
} else { // Create temporary variables in local scope.
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
}
}
} else {
for (auto &var : global_block.AllVars()) {
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
}
void NaiveExecutor::CreateOps(const ProgramDesc &desc, int block_id,
bool with_feed_fetch_ops) {
for (const auto &op_desc : desc.Block(block_id).AllOps()) {
if (!with_feed_fetch_ops &&
(op_desc->Type() == "feed" || op_desc->Type() == "fetch")) {
string::PrettyLogEndl(string::Style::detail(), "--- skip [%s], %s -> %s",
op_desc->Input("X")[0], op_desc->Type(),
op_desc->Output("Out")[0]);
continue;
}
ops_.emplace_back(OpRegistry::CreateOp(*op_desc));
}
}
LoDTensor *NaiveExecutor::FindTensor(const std::string &name) {
PADDLE_ENFORCE(scope_, "Need to init scope first");
auto *var = scope_->FindVar(name);
PADDLE_ENFORCE(var, "No variable [%s] in the scope");
auto *tensor = const_cast<LoDTensor *>(&var->Get<LoDTensor>());
return tensor;
}
void NaiveExecutor::CleanFeedFetchOps() {
std::vector<std::unique_ptr<OperatorBase>> ops;
for (auto &op : ops_) {
if (op->Type() != "feed" && op->Type() != "fetch") {
ops.emplace_back(std::move(op));
}
}
ops_.swap(ops);
}
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
namespace paddle {
namespace framework {
/*
* Simple, intuitive and effective. Only single thread is supported, and
* currently designed for inference.
*/
class NaiveExecutor {
public:
explicit NaiveExecutor(const platform::Place& place) : place_(place) {}
// Create child scope.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void Prepare(Scope* parent_scope, const ProgramDesc& program_desc,
int block_id, bool with_feed_fetch_ops);
// Run all the operators.
void Run();
// Get an tensor to operating directly, without the need for feed_ops.
LoDTensor* FindTensor(const std::string& name);
Scope* scope() { return scope_; }
void CleanFeedFetchOps();
protected:
void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id);
void CreateOps(const ProgramDesc& desc, int block_id,
bool with_feed_fetch_ops);
private:
const platform::Place place_;
// Catch the required resource to avoid recreate.
std::vector<std::unique_ptr<OperatorBase>> ops_;
Scope* scope_;
};
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/naive_executor.h"
#include <gtest/gtest.h>
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
namespace paddle {
namespace framework {
TEST(NaiveExecutor, Basic) {
ProgramDesc program;
auto* main_block = program.MutableBlock(0);
auto* a = main_block->Var("a"); // input
auto* b = main_block->Var("b"); // input
auto* c = main_block->Var("c"); // input
a->SetType(proto::VarType::LOD_TENSOR);
b->SetType(proto::VarType::LOD_TENSOR);
c->SetType(proto::VarType::LOD_TENSOR);
auto* add = main_block->AppendOp();
add->SetType("elementwise_add");
add->SetInput("X", {"a"});
add->SetInput("Y", {"b"});
add->SetOutput("Out", {"c"});
auto place = platform::CPUPlace();
NaiveExecutor exe(place);
exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/);
auto* a_tensor = exe.FindTensor("a");
auto* b_tensor = exe.FindTensor("b");
auto* c_tensor = exe.FindTensor("c");
a_tensor->Resize({1, 4});
b_tensor->Resize({1, 4});
c_tensor->Resize({1, 4});
b_tensor->mutable_data<float>(place);
a_tensor->mutable_data<float>(place);
float a_arr[] = {0, 1, 2, 3};
float b_arr[] = {0.0, .1, .2, .3};
std::copy_n(a_arr, 4, a_tensor->mutable_data<float>(place));
std::copy_n(b_arr, 4, b_tensor->mutable_data<float>(place));
exe.Run();
auto* c_data = c_tensor->mutable_data<float>(place);
for (int i = 0; i < 4; i++) {
EXPECT_NEAR(c_data[i], 1.1 * i, 1e-3);
}
}
} // namespace framework
} // namespace paddle
USE_OP(elementwise_add);
......@@ -154,9 +154,15 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::SetDeviceId(dev_id);
#endif
}
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
if (platform::IsProfileEnabled()) {
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
}
RunImpl(scope, place);
if (VLOG_IS_ON(3)) {
VLOG(3) << place << " " << DebugStringEx(&scope);
}
......
......@@ -156,10 +156,12 @@ ParallelExecutor::ParallelExecutor(
params, member_->local_scopes_, member_->use_cuda_);
#endif
// If the loss_var_name is given, the number of graph should be only one.
if (loss_var_name.size()) {
PADDLE_ENFORCE_EQ(ir::GraphNum(*graph), 1,
"The number of graph should be only one");
if (VLOG_IS_ON(5)) {
// If the loss_var_name is given, the number of graph should be only one.
if (loss_var_name.size()) {
PADDLE_ENFORCE_EQ(ir::GraphNum(*graph), 1,
"The number of graph should be only one");
}
}
if (exec_strategy.type_ == ExecutionStrategy::kDefault) {
......
......@@ -17,7 +17,6 @@ limitations under the License. */
#include <stdexcept>
#include <string>
#include <vector>
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/var_desc.h"
......
......@@ -88,13 +88,7 @@ std::vector<std::vector<int64_t>> VarDesc::GetShapes() const {
}
void VarDesc::SetDataType(proto::VarType::Type data_type) {
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
mutable_channel_desc()->set_data_type(data_type);
break;
default:
mutable_tensor_desc()->set_data_type(data_type);
}
mutable_tensor_desc()->set_data_type(data_type);
}
void VarDesc::SetDataTypes(
......@@ -115,13 +109,7 @@ void VarDesc::SetDataTypes(
}
proto::VarType::Type VarDesc::GetDataType() const {
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
return channel_desc().data_type();
break;
default:
return tensor_desc().data_type();
}
return tensor_desc().data_type();
}
std::vector<proto::VarType::Type> VarDesc::GetDataTypes() const {
......@@ -134,17 +122,6 @@ std::vector<proto::VarType::Type> VarDesc::GetDataTypes() const {
return res;
}
void VarDesc::SetCapacity(int64_t capacity) {
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
desc_.mutable_type()->mutable_channel()->set_capacity(capacity);
break;
default:
PADDLE_THROW("Setting 'capacity' is not supported by the type of var %s.",
this->Name());
}
}
void VarDesc::SetLoDLevel(int32_t lod_level) {
switch (desc_.type().type()) {
case proto::VarType::LOD_TENSOR:
......@@ -214,19 +191,6 @@ std::vector<int32_t> VarDesc::GetLoDLevels() const {
}
}
const proto::VarType::ChannelDesc &VarDesc::channel_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "The var's type hasn't been set.");
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
return desc_.type().channel();
default:
PADDLE_THROW(
"Getting 'channel_desc' is not supported by the type of var %s.",
this->Name());
}
}
const proto::VarType::TensorDesc &VarDesc::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "The var's type hasn't been set.");
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
......@@ -262,20 +226,6 @@ std::vector<proto::VarType::TensorDesc> VarDesc::tensor_descs() const {
}
}
proto::VarType::ChannelDesc *VarDesc::mutable_channel_desc() {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
switch (desc_.type().type()) {
case proto::VarType::CHANNEL:
return desc_.mutable_type()->mutable_channel();
default:
PADDLE_THROW(
"Getting 'mutable_channel_desc' is not supported by the type of var "
"%s.",
this->Name());
}
}
proto::VarType::TensorDesc *VarDesc::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
PADDLE_ENFORCE(desc_.type().has_type(), "The var type hasn't been set.");
......
......@@ -87,8 +87,6 @@ class VarDesc {
void SetDataTypes(
const std::vector<proto::VarType::Type> &multiple_data_type);
void SetCapacity(int64_t capacity);
proto::VarType::Type GetDataType() const;
std::vector<proto::VarType::Type> GetDataTypes() const;
......@@ -110,10 +108,8 @@ class VarDesc {
void SetPersistable(bool persistable) { desc_.set_persistable(persistable); }
private:
const proto::VarType::ChannelDesc &channel_desc() const;
const proto::VarType::TensorDesc &tensor_desc() const;
std::vector<proto::VarType::TensorDesc> tensor_descs() const;
proto::VarType::ChannelDesc *mutable_channel_desc();
proto::VarType::TensorDesc *mutable_tensor_desc();
std::vector<proto::VarType::TensorDesc *> mutable_tensor_descs();
......
......@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
......@@ -41,8 +40,6 @@ inline proto::VarType::Type ToVarType(std::type_index type) {
return proto::VarType_Type_SELECTED_ROWS;
} else if (IsType<ReaderHolder>(type)) {
return proto::VarType_Type_READER;
} else if (IsType<ChannelHolder>(type)) {
return proto::VarType_Type_CHANNEL;
} else {
PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
}
......@@ -66,9 +63,6 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
case proto::VarType_Type_READER:
visitor(var.Get<ReaderHolder>());
return;
case proto::VarType_Type_CHANNEL:
visitor(var.Get<ChannelHolder>());
return;
default:
PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
}
......
......@@ -53,7 +53,7 @@ if(NOT APPLE)
endif()
if(WITH_TESTING)
# tests/book depends the models that generated by python/paddle/fluid/tests/book
# tests/book depends the models that generated by python/paddle/fluid/tests/book
add_subdirectory(tests/book)
if(WITH_INFERENCE_API_TEST)
add_subdirectory(tests/api)
......
cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass)
set(analysis_deps
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log)
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log)
cc_library(analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
analyzer.cc
......
......@@ -41,12 +41,6 @@ class AnalysisPass {
// all passes have run.
virtual bool Finalize() { return false; }
// Get a Pass appropriate to print the Node this pass operates on.
virtual AnalysisPass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const {
return nullptr;
}
// Create a debugger Pass that draw the DFG by graphviz toolkit.
virtual AnalysisPass *CreateGraphvizDebugerPass() const { return nullptr; }
......
......@@ -64,14 +64,15 @@ class Analyzer : public OrderedRegistry<PassManager> {
// larger fusion.
const std::vector<std::string> all_ir_passes_{{
// Manual update the passes here.
"infer_clean_graph_pass", //
"attention_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
"infer_clean_graph_pass", //
"attention_lstm_fuse_pass", //
"embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
#ifdef PADDLE_WITH_MKLDNN
"conv_relu_mkldnn_fuse_pass", //
#endif
......
......@@ -18,10 +18,10 @@ if(APPLE)
endif(APPLE)
set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager ${GLOB_PASS_LIB})
set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor ${GLOB_PASS_LIB})
if(WITH_GPU AND TENSORRT_FOUND)
set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine)
set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine analysis_predictor)
endif()
function(inference_api_test TARGET_NAME)
......@@ -43,8 +43,10 @@ function(inference_api_test TARGET_NAME)
endif(WITH_TESTING)
endfunction(inference_api_test)
cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor)
cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis)
cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope)
cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor)
cc_library(zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api)
cc_library(zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api)
cc_test(test_paddle_inference_api
SRCS api_tester.cc
DEPS paddle_inference_api)
......@@ -52,18 +54,22 @@ cc_test(test_paddle_inference_api
inference_api_test(test_api_impl SRC api_impl_tester.cc
ARGS test_word2vec test_image_classification)
set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)
cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor ${inference_deps} paddle_inference_api
ARGS --dirname=${PYTHON_TESTS_DIR}/book)
if(WITH_GPU AND TENSORRT_FOUND)
cc_library(paddle_inference_tensorrt_subgraph_engine
SRCS api_tensorrt_subgraph_engine.cc
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter)
DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter zero_copy_tensor_dummy)
inference_api_test(test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec)
endif()
if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml)
cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber)
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml scope zero_copy_tensor_dummy)
cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber scope)
function(anakin_target target_name)
target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endfunction()
......
......@@ -16,11 +16,15 @@
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -28,8 +32,11 @@ DECLARE_bool(profile);
namespace paddle {
using contrib::AnalysisConfig;
bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope>& parent_scope) {
const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(3) << "Predictor::init()";
#if !defined(_WIN32)
if (FLAGS_profile) {
......@@ -43,7 +50,8 @@ bool AnalysisPredictor::Init(
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);
LOG(WARNING) << "ir optimize only supports CPU currently";
LOG(WARNING) << "ir optimize only supports CPU currently, enable_ir_optim "
"is turned false.";
config_.enable_ir_optim = false;
} else {
place_ = paddle::platform::CPUPlace();
......@@ -56,37 +64,134 @@ bool AnalysisPredictor::Init(
scope_.reset(new paddle::framework::Scope());
}
executor_.reset(new paddle::framework::Executor(place_));
executor_.reset(new paddle::framework::NaiveExecutor(place_));
// Initialize the inference program
if (!config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
config_.model_dir);
} else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
if (!program) {
if (!LoadProgramDesc()) return false;
OptimizeInferenceProgram();
} else {
LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
inference_program_ = program;
}
executor_->Prepare(scope_.get(), *inference_program_, 0,
config_.use_feed_fetch_ops);
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch();
return true;
}
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
// set feed variable
std::vector<framework::LoDTensor> feeds;
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed";
return false;
}
// Run the inference program
// if share variables, we need not create variables
executor_->Run();
OptimizeInferenceProgram();
if (config_._use_mkldnn) {
executor_->EnableMKLDNN(*inference_program_);
// get fetch variable
if (!GetFetch(output_data, scope)) {
LOG(ERROR) << "fail to get fetches";
return false;
}
ctx_ = executor_->Prepare(*inference_program_, 0);
VLOG(3) << "predict cost: " << timer.toc() << "ms";
return true;
}
VLOG(5) << "to create variables";
PADDLE_ENFORCE(scope_.get());
executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch();
bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size();
return false;
}
// Cache the inputs memory for better concurrency performance.
feed_tensors_.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) {
auto &input = feed_tensors_[i];
framework::DDim ddim = framework::make_ddim(inputs[i].shape);
void *input_ptr;
if (inputs[i].dtype == PaddleDType::INT64) {
input_ptr = input.mutable_data<int64_t>(ddim, platform::CPUPlace());
} else if (inputs[i].dtype == PaddleDType::FLOAT32) {
input_ptr = input.mutable_data<float>(ddim, platform::CPUPlace());
} else {
LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
return false;
}
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
inputs[i].data.length());
// TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
framework::LoD lod;
for (auto &level : inputs[i].lod) {
lod.emplace_back(level);
}
input.set_lod(lod);
int idx = -1;
if (config_.specify_input_name) {
idx = feed_names_[inputs[i].name];
} else {
idx = boost::get<int>(feeds_[i]->GetAttr("col"));
}
framework::SetFeedVariable(scope, input, "feed", idx);
}
return true;
}
template <typename T>
void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
PaddleTensor *output) {
// set shape.
auto shape = framework::vectorize(fetch.dims());
output->shape.assign(shape.begin(), shape.end());
// set data.
const T *data = fetch.data<T>();
int num_elems = inference::VecReduceToInt(shape);
output->data.Resize(num_elems * sizeof(T));
// The fetched tensor output by fetch op, should always in CPU memory, so just
// copy.
memcpy(output->data.data(), data, num_elems * sizeof(T));
// set lod
output->lod.clear();
for (auto &level : fetch.lod()) {
output->lod.emplace_back(level.begin(), level.end());
}
}
bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
PADDLE_ENFORCE((size_t)idx == i);
framework::LoDTensor &fetch =
framework::GetFetchVariable(*scope, "fetch", idx);
auto type = fetch.type();
auto output = &(outputs->at(i));
if (type == typeid(float)) {
GetFetchOne<float>(fetch, output);
output->dtype = PaddleDType::FLOAT32;
} else if (type == typeid(int64_t)) {
GetFetchOne<int64_t>(fetch, output);
output->dtype = PaddleDType::INT64;
} else {
LOG(ERROR) << "unknown type, only support float32 and int64 now.";
}
}
return true;
}
......@@ -107,6 +212,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
new std::string(config_.prog_file));
argument_.fluid_model_param_path.reset(new std::string(config_.param_file));
}
argument_.origin_program_desc.reset(
new ProgramDesc(*inference_program_->Proto()));
PADDLE_ENFORCE(
......@@ -127,9 +233,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
const contrib::AnalysisConfig& config) {
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(3) << "create AnalysisConfig";
if (config.use_gpu) {
// 1. GPU memeroy
......@@ -150,15 +255,90 @@ CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
}
std::unique_ptr<PaddlePredictor> predictor(new AnalysisPredictor(config));
if (!dynamic_cast<AnalysisPredictor*>(predictor.get())->Init(nullptr)) {
if (!dynamic_cast<AnalysisPredictor *>(predictor.get())->Init(nullptr)) {
return nullptr;
}
return predictor;
}
void AnalysisPredictor::PrepareFeedFetch() {
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
int idx = boost::get<int>(op->GetAttr("col"));
if (feeds_.size() <= static_cast<size_t>(idx)) {
feeds_.resize(idx + 1);
}
feeds_[idx] = op;
feed_names_[op->Output("Out")[0]] = idx;
} else if (op->Type() == "fetch") {
int idx = boost::get<int>(op->GetAttr("col"));
if (fetchs_.size() <= static_cast<size_t>(idx)) {
fetchs_.resize(idx + 1);
}
fetchs_[idx] = op;
}
}
}
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = true;
res->SetName(name);
return res;
}
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
const std::string &name) {
PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name);
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(executor_->scope())));
res->input_or_output_ = false;
res->SetName(name);
return res;
}
bool AnalysisPredictor::ZeroCopyRun() {
executor_->Run();
return true;
}
bool AnalysisPredictor::LoadProgramDesc() {
// Initialize the inference program
std::unique_ptr<framework::Executor> tmp_exe(
new framework::Executor(platform::CPUPlace()));
if (!config_.model_dir.empty()) {
// Parameters are saved in separate files sited in
// the specified `dirname`.
inference_program_ = paddle::inference::Load(
static_cast<framework::Executor *>(tmp_exe.get()), scope_.get(),
config_.model_dir);
} else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
inference_program_ = paddle::inference::Load(
static_cast<framework::Executor *>(tmp_exe.get()), scope_.get(),
config_.prog_file, config_.param_file);
} else {
LOG(ERROR) << string::Sprintf(
"not valid model path '%s' or program path '%s'.", config_.model_dir,
config_.param_file);
return false;
}
return true;
}
std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone() {
auto *x = new AnalysisPredictor(config_);
x->Init(scope_, inference_program_);
return std::unique_ptr<PaddlePredictor>(x);
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<contrib::AnalysisConfig>(
const contrib::AnalysisConfig& config) {
const contrib::AnalysisConfig &config) {
return CreatePaddlePredictor<contrib::AnalysisConfig,
PaddleEngineKind::kAnalysis>(config);
}
......
......@@ -12,42 +12,81 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/string/printf.h"
namespace paddle {
using inference::analysis::Argument;
using inference::analysis::Analyzer;
using framework::proto::ProgramDesc;
using framework::NaiveExecutor;
using contrib::AnalysisConfig;
/* This predictor is based on the original native predictor with IR and Analysis
* support. It will optimize IR and Parameters in the runtime.
* TODO(Superjomn) Replace the Navive predictor?
*/
class AnalysisPredictor : public NativePaddlePredictor {
class AnalysisPredictor : public PaddlePredictor {
public:
explicit AnalysisPredictor(const contrib::AnalysisConfig& config)
: NativePaddlePredictor(config), config_(config) {}
explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {}
bool Init(const std::shared_ptr<framework::Scope>& parent_scope);
bool Init(const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program = nullptr);
bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data,
int batch_size = -1) override {
return NativePaddlePredictor::Run(inputs, output_data, batch_size);
}
bool Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size = -1) override;
std::unique_ptr<ZeroCopyTensor> GetInputTensor(
const std::string &name) override;
std::unique_ptr<ZeroCopyTensor> GetOutputTensor(
const std::string &name) override;
bool ZeroCopyRun() override;
void PrepareFeedFetch();
void OptimizeInferenceProgram();
Argument& analysis_argument() { return argument_; }
Argument &analysis_argument() { return argument_; }
std::unique_ptr<PaddlePredictor> Clone() override;
framework::Scope *scope() { return executor_->scope(); }
framework::ProgramDesc &program() { return *inference_program_; }
protected:
bool LoadProgramDesc();
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
framework::Scope *scope);
bool GetFetch(std::vector<PaddleTensor> *output_data,
framework::Scope *scope);
template <typename T>
void GetFetchOne(const framework::LoDTensor &fetchs,
PaddleTensor *output_data);
private:
contrib::AnalysisConfig config_;
Argument argument_;
std::unique_ptr<NaiveExecutor> executor_;
platform::Place place_;
std::shared_ptr<framework::Scope> scope_;
framework::Scope *sub_scope_{nullptr};
std::shared_ptr<framework::ProgramDesc> inference_program_;
std::vector<framework::OpDesc *> feeds_;
std::map<std::string, size_t> feed_names_;
std::vector<framework::OpDesc *> fetchs_;
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, so cache them.
std::vector<framework::LoDTensor> feed_tensors_;
};
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string(dirname, "", "dirname to tests.");
namespace paddle {
namespace inference {
using contrib::AnalysisConfig;
TEST(AnalysisPredictor, ZeroCopy) {
AnalysisConfig config;
config.model_dir = FLAGS_dirname + "/word2vec.inference.model";
config.use_feed_fetch_ops = false;
auto predictor =
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
auto w0 = predictor->GetInputTensor("firstw");
auto w1 = predictor->GetInputTensor("secondw");
auto w2 = predictor->GetInputTensor("thirdw");
auto w3 = predictor->GetInputTensor("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PaddlePlace::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PaddlePlace::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PaddlePlace::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PaddlePlace::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->ZeroCopyRun();
auto out = predictor->GetOutputTensor("fc_1.tmp_2");
PaddlePlace place;
int size = 0;
auto* out_data = out->data<float>(&place, &size);
LOG(INFO) << "output size: " << size / sizeof(float);
LOG(INFO) << "output_data: " << out_data;
}
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle_inference_api.h"
namespace paddle {
......@@ -26,7 +32,7 @@ int PaddleDtypeSize(PaddleDType dtype) {
}
}
PaddleBuf::PaddleBuf(PaddleBuf&& other)
PaddleBuf::PaddleBuf(PaddleBuf &&other)
: data_(other.data_),
length_(other.length_),
memory_owned_(other.memory_owned_) {
......@@ -35,9 +41,9 @@ PaddleBuf::PaddleBuf(PaddleBuf&& other)
other.length_ = 0;
}
PaddleBuf::PaddleBuf(const PaddleBuf& other) { *this = other; }
PaddleBuf::PaddleBuf(const PaddleBuf &other) { *this = other; }
PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
if (!other.memory_owned_) {
data_ = other.data_;
length_ = other.length_;
......@@ -51,7 +57,7 @@ PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) {
return *this;
}
PaddleBuf& PaddleBuf::operator=(PaddleBuf&& other) {
PaddleBuf &PaddleBuf::operator=(PaddleBuf &&other) {
// only the buffer with external memory can be copied
data_ = other.data_;
length_ = other.length_;
......@@ -75,7 +81,7 @@ void PaddleBuf::Resize(size_t length) {
}
}
void PaddleBuf::Reset(void* data, size_t length) {
void PaddleBuf::Reset(void *data, size_t length) {
Free();
memory_owned_ = false;
data_ = data;
......@@ -85,7 +91,7 @@ void PaddleBuf::Reset(void* data, size_t length) {
void PaddleBuf::Free() {
if (memory_owned_ && data_) {
PADDLE_ENFORCE_GT(length_, 0);
free(static_cast<char*>(data_));
free(static_cast<char *>(data_));
data_ = nullptr;
length_ = 0;
}
......
......@@ -145,7 +145,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
VLOG(4) << "Run prepared context";
executor_->RunPreparedContext(ctx_.get(), scope,
false, /* don't create local scope each time*/
false /* don't create variable eatch time */);
false /* don't create variable each time */);
VLOG(4) << "Finish prepared context";
// get fetch variable
if (!GetFetch(output_data, scope)) {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
......@@ -30,6 +30,8 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -52,6 +54,8 @@ class NativePaddlePredictor : public PaddlePredictor {
~NativePaddlePredictor() override;
framework::Scope *scope() { return sub_scope_ ? sub_scope_ : scope_.get(); }
protected:
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
framework::Scope *scope);
......
......@@ -21,6 +21,12 @@ limitations under the License. */
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#ifdef __clang__
#define ACC_DIFF 4e-3
#else
#define ACC_DIFF 1e-3
#endif
DEFINE_string(dirname, "", "Directory of the inference model.");
namespace paddle {
......@@ -43,7 +49,7 @@ PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
NativeConfig GetConfig() {
NativeConfig config;
config.model_dir = FLAGS_dirname + "word2vec.inference.model";
config.model_dir = FLAGS_dirname + "/word2vec.inference.model";
LOG(INFO) << "dirname " << config.model_dir;
config.fraction_of_gpu_memory = 0.15;
#ifdef PADDLE_WITH_CUDA
......@@ -99,8 +105,8 @@ void MainWord2Vec(bool use_gpu) {
float* lod_data = output1.data<float>();
for (int i = 0; i < output1.numel(); ++i) {
EXPECT_LT(lod_data[i] - data[i], 1e-3);
EXPECT_GT(lod_data[i] - data[i], -1e-3);
EXPECT_LT(lod_data[i] - data[i], ACC_DIFF);
EXPECT_GT(lod_data[i] - data[i], -ACC_DIFF);
}
}
......@@ -110,7 +116,7 @@ void MainImageClassification(bool use_gpu) {
NativeConfig config = GetConfig();
config.use_gpu = use_gpu;
config.model_dir =
FLAGS_dirname + "image_classification_resnet.inference.model";
FLAGS_dirname + "/image_classification_resnet.inference.model";
const bool is_combined = false;
std::vector<std::vector<int64_t>> feed_target_shapes =
......@@ -144,7 +150,7 @@ void MainImageClassification(bool use_gpu) {
float* data = static_cast<float*>(outputs[0].data.data());
float* lod_data = output1.data<float>();
for (size_t j = 0; j < len / sizeof(float); ++j) {
EXPECT_NEAR(lod_data[j], data[j], 1e-3);
EXPECT_NEAR(lod_data[j], data[j], ACC_DIFF);
}
}
......@@ -199,7 +205,7 @@ void MainThreadsWord2Vec(bool use_gpu) {
float* ref_data = refs[tid].data<float>();
EXPECT_EQ(refs[tid].numel(), static_cast<int64_t>(len / sizeof(float)));
for (int i = 0; i < refs[tid].numel(); ++i) {
EXPECT_NEAR(ref_data[i], data[i], 1e-3);
EXPECT_NEAR(ref_data[i], data[i], ACC_DIFF);
}
});
}
......@@ -214,7 +220,7 @@ void MainThreadsImageClassification(bool use_gpu) {
NativeConfig config = GetConfig();
config.use_gpu = use_gpu;
config.model_dir =
FLAGS_dirname + "image_classification_resnet.inference.model";
FLAGS_dirname + "/image_classification_resnet.inference.model";
auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
std::vector<framework::LoDTensor> jobs(num_jobs);
......@@ -251,7 +257,7 @@ void MainThreadsImageClassification(bool use_gpu) {
float* ref_data = refs[tid].data<float>();
EXPECT_EQ((size_t)refs[tid].numel(), len / sizeof(float));
for (int i = 0; i < refs[tid].numel(); ++i) {
EXPECT_NEAR(ref_data[i], data[i], 1e-3);
EXPECT_NEAR(ref_data[i], data[i], ACC_DIFF);
}
});
}
......
......@@ -2,6 +2,9 @@ set -x
PADDLE_ROOT=$1
TURN_ON_MKL=$2 # use MKL or Openblas
TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
DATA_DIR=$4 # dataset
cd `dirname $0`
current_dir=`pwd`
if [ $2 == ON ]; then
# You can export yourself if move the install path
MKL_LIB=${PADDLE_ROOT}/build/fluid_install_dir/third_party/install/mklml/lib
......@@ -29,15 +32,15 @@ function download() {
fi
cd ..
}
mkdir -p data
cd data
mkdir -p $DATA_DIR
cd $DATA_DIR
vis_demo_list='se_resnext50 ocr mobilenet'
for vis_demo_name in $vis_demo_list; do
download $vis_demo_name
done
cd ..
# compile and test the demo
cd $current_dir
mkdir -p build
cd build
......@@ -73,9 +76,9 @@ for WITH_STATIC_LIB in ON OFF; do
for use_gpu in $use_gpu_list; do
for vis_demo_name in $vis_demo_list; do
./vis_demo \
--modeldir=../data/$vis_demo_name/model \
--data=../data/$vis_demo_name/data.txt \
--refer=../data/$vis_demo_name/result.txt \
--modeldir=$DATA_DIR/$vis_demo_name/model \
--data=$DATA_DIR/$vis_demo_name/data.txt \
--refer=$DATA_DIR/$vis_demo_name/result.txt \
--use_gpu=$use_gpu
if [ $? -ne 0 ]; then
echo "vis demo $vis_demo_name runs fail."
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
void ZeroCopyTensor::Reshape(const std::vector<int> &shape) {
PADDLE_ENFORCE(!name_.empty(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved.");
PADDLE_ENFORCE(input_or_output_,
"Can't reshape the output tensor, it is readonly");
PADDLE_ENFORCE(scope_);
auto *scope = static_cast<framework::Scope *>(scope_);
auto *var = scope->FindVar(name_);
PADDLE_ENFORCE(var, "No tensor called [%s] in the runtime scope", name_);
auto *tensor = var->GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim(shape));
}
template <typename T>
T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
auto *tensor = static_cast<framework::LoDTensor *>(FindTensor());
switch (static_cast<int>(place)) {
case static_cast<int>(PaddlePlace::kCPU): {
return tensor->mutable_data<T>(platform::CPUPlace());
}
case static_cast<int>(PaddlePlace::kGPU): {
return tensor->mutable_data<T>(platform::CUDAPlace());
}
default:
PADDLE_THROW("Unsupported place: %d", static_cast<int>(place));
break;
}
return nullptr;
}
template <typename T>
T *ZeroCopyTensor::data(PaddlePlace *place, int *size) {
auto *tensor = static_cast<framework::LoDTensor *>(FindTensor());
auto *res = tensor->data<T>();
if (platform::is_cpu_place(tensor->place())) {
*place = PaddlePlace::kCPU;
} else if (platform::is_gpu_place(tensor->place())) {
*place = PaddlePlace::kGPU;
} else {
*place = PaddlePlace::kUNK;
}
*size = tensor->numel();
return res;
}
template float *ZeroCopyTensor::data<float>(PaddlePlace *place, int *size);
template int64_t *ZeroCopyTensor::data<int64_t>(PaddlePlace *place, int *size);
template float *ZeroCopyTensor::mutable_data<float>(PaddlePlace place);
template int64_t *ZeroCopyTensor::mutable_data<int64_t>(PaddlePlace place);
void *ZeroCopyTensor::FindTensor() const {
PADDLE_ENFORCE(!name_.empty(),
"Need to SetName first, so that the corresponding tensor can "
"be retrieved.");
PADDLE_ENFORCE(scope_);
auto *scope = static_cast<framework::Scope *>(scope_);
auto *var = scope->FindVar(name_);
PADDLE_ENFORCE(var, "No tensor called [%s] in the runtime scope", name_);
auto *tensor = var->GetMutable<framework::LoDTensor>();
return tensor;
}
std::vector<int64_t> ZeroCopyTensor::shape() {
auto *tensor = static_cast<framework::LoDTensor *>(FindTensor());
PADDLE_ENFORCE(tensor, "not found tensor called %s in the scope", name_);
return framework::vectorize(tensor->dims());
}
void ZeroCopyTensor::SetLoD(const std::vector<std::vector<size_t>> &x) {
auto *tensor = static_cast<framework::LoDTensor *>(FindTensor());
framework::LoD lod;
for (auto &level : x) {
lod.emplace_back(level);
}
tensor->set_lod(lod);
}
std::vector<std::vector<size_t>> ZeroCopyTensor::lod() const {
std::vector<std::vector<size_t>> res;
auto *tensor = static_cast<framework::LoDTensor *>(FindTensor());
for (auto &level : tensor->lod()) {
res.emplace_back(level);
}
return res;
}
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace paddle {
void ZeroCopyTensor::Reshape(const std::vector<int> &shape) {}
template <typename T>
T *ZeroCopyTensor::mutable_data(PaddlePlace place) {
return nullptr;
}
template <typename T>
T *ZeroCopyTensor::data(PaddlePlace *place, int *size) {
return nullptr;
}
template float *ZeroCopyTensor::data<float>(PaddlePlace *place, int *size);
template int64_t *ZeroCopyTensor::data<int64_t>(PaddlePlace *place, int *size);
template float *ZeroCopyTensor::mutable_data(PaddlePlace place);
template int64_t *ZeroCopyTensor::mutable_data(PaddlePlace place);
void *ZeroCopyTensor::FindTensor() const { return nullptr; }
std::vector<int64_t> ZeroCopyTensor::shape() { return {}; }
void ZeroCopyTensor::SetLoD(const std::vector<std::vector<size_t>> &x) {}
std::vector<std::vector<size_t>> ZeroCopyTensor::lod() const {
return std::vector<std::vector<size_t>>();
}
} // namespace paddle
......@@ -21,8 +21,10 @@
#include <sstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/string/printf.h"
namespace paddle {
namespace inference {
......@@ -93,6 +95,20 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
template <typename T>
static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor,
const std::vector<std::vector<T>> &data) {
int size{0};
auto *ptr = tensor->mutable_data<T>(PaddlePlace::kCPU);
int c = 0;
for (const auto &f : data) {
for (T v : f) {
ptr[c++] = v;
}
}
return size;
}
static std::string DescribeTensor(const PaddleTensor &tensor) {
std::stringstream os;
os << "Tensor [" << tensor.name << "]\n";
......@@ -138,5 +154,127 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
}
}
template <typename T>
std::string LoDTensorSummary(const framework::LoDTensor &tensor) {
std::stringstream ss;
ss << "\n---- tensor ---" << '\n';
ss << "lod: [";
for (const auto &level : tensor.lod()) {
ss << "[ ";
for (auto i : level) {
ss << i << ", ";
}
ss << "]";
}
ss << "]\n";
ss << "shape: [";
int size = 1;
for (int i = 0; i < tensor.dims().size(); i++) {
int dim = tensor.dims()[i];
ss << dim << ", ";
size *= dim;
}
ss << "]\n";
ss << "data: ";
for (int i = 0; i < std::min(20, size); i++) {
ss << tensor.data<T>()[i] << " ";
}
ss << "\n";
return ss.str();
}
static bool CompareLoD(const framework::LoD &a, const framework::LoD &b) {
if (a.size() != b.size()) {
LOG(ERROR) << string::Sprintf("lod size not match %d != %d", a.size(),
b.size());
return false;
}
for (size_t i = 0; i < a.size(); i++) {
auto &al = a[i];
auto &bl = b[i];
if (al.size() != bl.size()) {
LOG(ERROR) << string::Sprintf("level size %d != %d", al.size(),
bl.size());
return false;
}
}
return true;
}
static bool CompareShape(const std::vector<int64_t> &a,
const std::vector<int64_t> &b) {
if (a.size() != b.size()) {
LOG(ERROR) << string::Sprintf("shape size not match %d != %d", a.size(),
b.size());
return false;
}
for (size_t i = 0; i < a.size(); i++) {
if (a[i] != b[i]) {
LOG(ERROR) << string::Sprintf("shape %d-th element not match %d != %d", i,
a[i], b[i]);
return false;
}
}
return true;
}
static bool CompareTensorData(const framework::LoDTensor &a,
const framework::LoDTensor &b) {
auto a_shape = framework::vectorize(a.dims());
auto b_shape = framework::vectorize(b.dims());
size_t a_size = std::accumulate(a_shape.begin(), a_shape.end(), 1,
[](int a, int b) { return a * b; });
size_t b_size = std::accumulate(b_shape.begin(), b_shape.end(), 1,
[](int a, int b) { return a * b; });
if (a_size != b_size) {
LOG(ERROR) << string::Sprintf("tensor data size not match, %d != %d",
a_size, b_size);
}
for (size_t i = 0; i < a_size; i++) {
if (a.type() == typeid(float)) {
const auto *a_data = a.data<float>();
const auto *b_data = b.data<float>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
LOG(ERROR) << string::Sprintf(
"tensor data %d-th element not match, %f != %f", i, a_data[i],
b_data[i]);
return false;
}
} else if (a.type() == typeid(int64_t)) {
const auto *a_data = a.data<int64_t>();
const auto *b_data = b.data<int64_t>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
LOG(ERROR) << string::Sprintf(
"tensor data %d-th element not match, %f != %f", i, a_data[i],
b_data[i]);
return false;
}
}
}
return true;
}
static bool CompareTensor(const framework::LoDTensor &a,
const framework::LoDTensor &b) {
if (!CompareLoD(a.lod(), b.lod())) {
return false;
}
if (!CompareShape(framework::vectorize(a.dims()),
framework::vectorize(b.dims()))) {
return false;
}
if (!CompareTensorData(a, b)) {
return false;
}
return true;
}
} // namespace inference
} // namespace paddle
......@@ -101,6 +101,40 @@ struct PaddleTensor {
std::vector<std::vector<size_t>> lod; // Tensor+LoD equals LoDTensor
};
enum class PaddlePlace { kUNK = -1, kCPU, kGPU };
// Tensor without copy, currently only supports AnalysisPredictor.
class ZeroCopyTensor {
public:
void Reshape(const std::vector<int>& shape);
// Get the memory in CPU or GPU with specific data type, should Reshape first
// to tell the data size.
// Once can directly call this data to feed the data.
// This is for write the input tensor.
template <typename T>
T* mutable_data(PaddlePlace place);
// Get the memory directly, will return the place and memory size by pointer.
// This is for reading the output tensor.
template <typename T>
T* data(PaddlePlace* place, int* size);
std::vector<int64_t> shape();
void SetLoD(const std::vector<std::vector<size_t>>& x);
std::vector<std::vector<size_t>> lod() const;
protected:
ZeroCopyTensor(void* scope) : scope_{scope} {}
void SetName(const std::string& name) { name_ = name; }
void* FindTensor() const;
private:
std::string name_;
bool input_or_output_;
friend class AnalysisPredictor;
void* scope_{nullptr};
};
/*
* A simple Inference API for Paddle.
*/
......@@ -120,6 +154,19 @@ class PaddlePredictor {
std::vector<PaddleTensor>* output_data,
int batch_size = -1) = 0;
// Zero copy input and output optimization.
// Get the input or output tensors, and operate on their memory directly,
// without copy.
virtual std::unique_ptr<ZeroCopyTensor> GetInputTensor(
const std::string& name) {
return nullptr;
}
virtual std::unique_ptr<ZeroCopyTensor> GetOutputTensor(
const std::string& name) {
return nullptr;
}
virtual bool ZeroCopyRun() { return false; }
// Clone a predictor that share the model weights, the Cloned predictor should
// be thread-safe.
virtual std::unique_ptr<PaddlePredictor> Clone() = 0;
......@@ -216,9 +263,14 @@ struct AnalysisConfig : public NativeConfig {
bool enable_ir_optim = true;
// Manually determine the IR passes to run.
IrPassMode ir_mode{IrPassMode::kExclude};
std::vector<std::string> ir_passes;
std::vector<std::string> ir_passes{"embedding_fc_lstm_fuse_pass"};
// NOT stable yet.
bool use_feed_fetch_ops{true};
// NOTE this is just for internal development, please not use it.
// NOTE this is just for internal development, please not use it. NOT
// stable
// yet.
bool _use_mkldnn{false};
};
......
......@@ -18,6 +18,8 @@ namespace paddle {
namespace inference {
namespace analysis {
using contrib::AnalysisConfig;
struct DataRecord {
std::vector<int64_t> data;
std::vector<size_t> lod;
......@@ -78,6 +80,7 @@ struct DataRecord {
}
}
}
DataRecord NextBatch() {
DataRecord data;
data.data = batched_datas[batch_iter];
......@@ -155,7 +158,9 @@ TEST(Analyzer_LAC, fuse_statis) {
SetConfig(&cfg);
int num_ops;
auto fuse_statis = GetFuseStatis(cfg, &num_ops);
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis(
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
......
......@@ -16,6 +16,7 @@
namespace paddle {
namespace inference {
using contrib::AnalysisConfig;
struct DataRecord {
std::vector<std::vector<int64_t>> word_data_all, mention_data_all;
......@@ -145,7 +146,9 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
SetConfig(&cfg);
int num_ops;
auto fuse_statis = GetFuseStatis(cfg, &num_ops);
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis(
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
......
......@@ -12,12 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_bool(with_precision_check, true, "turn on test");
namespace paddle {
namespace inference {
using namespace framework; // NOLINT
using namespace contrib; // NOLINT
struct DataRecord {
std::vector<std::vector<std::vector<float>>> link_step_data_all;
......@@ -29,10 +33,12 @@ struct DataRecord {
size_t batch_iter{0};
size_t batch_size{1};
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
Load(path);
}
DataRecord NextBatch() {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
......@@ -101,6 +107,7 @@ struct DataRecord {
num_samples = num_lines;
}
};
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
int batch_size) {
PaddleTensor lod_attention_tensor, init_zero_tensor, lod_tensor_tensor,
......@@ -149,7 +156,55 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void SetConfig(contrib::AnalysisConfig *cfg) {
void PrepareZeroCopyInputs(ZeroCopyTensor *lod_attention_tensor,
ZeroCopyTensor *cell_init_tensor,
ZeroCopyTensor *data_tensor,
ZeroCopyTensor *hidden_init_tensor,
ZeroCopyTensor *week_tensor,
ZeroCopyTensor *minute_tensor,
DataRecord *data_record, int batch_size) {
auto one_batch = data_record->NextBatch();
std::vector<int> rnn_link_data_shape(
{static_cast<int>(one_batch.rnn_link_data.size()),
static_cast<int>(one_batch.rnn_link_data.front().size())});
lod_attention_tensor->Reshape({1, 2});
lod_attention_tensor->SetLoD({one_batch.lod1, one_batch.lod2});
cell_init_tensor->Reshape({batch_size, 15});
cell_init_tensor->SetLoD({one_batch.lod3});
hidden_init_tensor->Reshape({batch_size, 15});
hidden_init_tensor->SetLoD({one_batch.lod3});
data_tensor->Reshape(rnn_link_data_shape);
data_tensor->SetLoD({one_batch.lod1});
week_tensor->Reshape(
{static_cast<int>(one_batch.rnn_week_datas.size()),
static_cast<int>(one_batch.rnn_week_datas.front().size())});
week_tensor->SetLoD({one_batch.lod3});
minute_tensor->Reshape(
{static_cast<int>(one_batch.rnn_minute_datas.size()),
static_cast<int>(one_batch.rnn_minute_datas.front().size())});
minute_tensor->SetLoD({one_batch.lod3});
// assign data
float arr0[] = {0, 0};
std::vector<float> zeros(batch_size * 15, 0);
std::copy_n(arr0, 2,
lod_attention_tensor->mutable_data<float>(PaddlePlace::kCPU));
std::copy_n(arr0, 2, data_tensor->mutable_data<float>(PaddlePlace::kCPU));
std::copy_n(zeros.begin(), zeros.size(),
cell_init_tensor->mutable_data<float>(PaddlePlace::kCPU));
std::copy_n(zeros.begin(), zeros.size(),
hidden_init_tensor->mutable_data<float>(PaddlePlace::kCPU));
ZeroCopyTensorAssignData(data_tensor, one_batch.rnn_link_data);
ZeroCopyTensorAssignData(week_tensor, one_batch.rnn_week_datas);
ZeroCopyTensorAssignData(minute_tensor, one_batch.rnn_minute_datas);
}
void SetConfig(AnalysisConfig *cfg) {
cfg->prog_file = FLAGS_infer_model + "/__model__";
cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false;
......@@ -187,7 +242,9 @@ TEST(Analyzer_rnn1, fuse_statis) {
SetConfig(&cfg);
int num_ops;
auto fuse_statis = GetFuseStatis(cfg, &num_ops);
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis(
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM
......@@ -214,7 +271,229 @@ TEST(Analyzer_rnn1, multi_thread) {
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, 4 /* num_threads */);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
}
bool CompareTensors(framework::Scope &a_scope, framework::Scope &b_scope,
const std::vector<std::string> &tensors) {
for (auto &x : tensors) {
auto *a_var = a_scope.FindVar(x);
auto *b_var = b_scope.FindVar(x);
if (a_var && b_var) {
if (a_var->Type() == typeid(framework::LoDTensor) ||
a_var->Type() == typeid(framework::Tensor)) {
LOG(INFO) << "comparing tensor " << x;
auto &a_t = a_var->Get<framework::LoDTensor>();
auto &b_t = b_var->Get<framework::LoDTensor>();
if (!inference::CompareTensor(a_t, b_t)) {
LOG(ERROR) << string::Sprintf("tensor %s not match in two scopes", x);
}
} else {
LOG(INFO) << "skip no tensor " << x;
}
} else {
LOG(INFO) << "skip tensor " << x;
}
}
return true;
}
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
// on the complex RNN1 model.
TEST(Analyzer_rnn1, ZeroCopy) {
AnalysisConfig config;
SetConfig(&config);
config.use_feed_fetch_ops = false;
PaddlePlace place;
int output_size{0};
auto predictor =
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
config.use_feed_fetch_ops = true;
auto native_predictor =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
config.use_feed_fetch_ops = true; // the analysis predictor needs feed/fetch.
auto analysis_predictor =
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
NEW_TENSOR(data_lod_attention);
NEW_TENSOR(cell_init);
NEW_TENSOR(data);
NEW_TENSOR(week);
NEW_TENSOR(minute);
NEW_TENSOR(hidden_init);
// Prepare data for AnalysisPredictor
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
PrepareZeroCopyInputs(data_lod_attention_tensor.get(), cell_init_tensor.get(),
data_tensor.get(), hidden_init_tensor.get(),
week_tensor.get(), minute_tensor.get(), &data,
FLAGS_batch_size);
// Prepare data for NativePredictor
std::vector<std::vector<PaddleTensor>> native_inputs;
SetInput(&native_inputs);
std::vector<PaddleTensor> native_outputs;
std::vector<PaddleTensor> analysis_outputs;
auto output_tensor = predictor->GetOutputTensor("final_output.tmp_1");
// Run analysis predictor
int num_ops;
auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_EQ(fuse_statis.at("fc_fuse"), 1);
ASSERT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM
ASSERT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1);
ASSERT_EQ(num_ops,
13); // After graph optimization, only 13 operators exists.
Timer timer;
double total_time{0};
double native_total_time{0};
double analysis_total_time{0.};
for (int i = 0; i < FLAGS_repeat; i++) {
timer.tic();
predictor->ZeroCopyRun();
total_time += timer.toc();
}
auto *output_data = output_tensor->data<float>(&place, &output_size);
ASSERT_GT(output_size, 0); // more than one output!
for (int i = 0; i < FLAGS_repeat; i++) {
// Run native predictor.
timer.tic();
ASSERT_TRUE(native_predictor->Run(native_inputs.front(), &native_outputs));
native_total_time += timer.toc();
}
for (int i = 0; i < FLAGS_repeat; i++) {
timer.tic();
ASSERT_TRUE(
analysis_predictor->Run(native_inputs.front(), &analysis_outputs));
analysis_total_time += timer.toc();
}
if (!FLAGS_with_precision_check) {
return;
}
int native_output_size = VecReduceToInt(native_outputs.front().shape);
EXPECT_EQ(native_output_size, output_size);
// Compare tensors between analysis and zerocopy
auto *p0 = static_cast<AnalysisPredictor *>(predictor.get());
auto *p1 = static_cast<AnalysisPredictor *>(analysis_predictor.get());
auto *p2 = static_cast<NativePaddlePredictor *>(native_predictor.get());
std::vector<std::string> tensor_names;
for (auto &var_desc : p0->program().Block(0).AllVars()) {
tensor_names.push_back(var_desc->Name());
}
LOG(INFO) << "Comparing tensors";
ASSERT_TRUE(
CompareTensors(*p0->scope(), *p1->scope(), {"final_output.tmp_1"}));
ASSERT_TRUE(
CompareTensors(*p0->scope(), *p2->scope(), {"final_output.tmp_1"}));
LOG(INFO) << "output1 " << inference::LoDTensorSummary<float>(
p0->scope()
->FindVar("final_output.tmp_1")
->Get<framework::LoDTensor>());
LOG(INFO) << "output2 " << inference::LoDTensorSummary<float>(
p1->scope()
->FindVar("final_output.tmp_1")
->Get<framework::LoDTensor>());
LOG(INFO) << "output3 " << inference::LoDTensorSummary<float>(
p2->scope()
->FindVar("final_output.tmp_1")
->Get<framework::LoDTensor>());
for (int i = 0; i < output_size; i++) {
LOG(INFO) << output_data[i] << " "
<< static_cast<float *>(native_outputs.front().data.data())[i]
<< " "
<< static_cast<float *>(analysis_outputs.front().data.data())[i];
EXPECT_NEAR(output_data[i],
static_cast<float *>(native_outputs.front().data.data())[i],
1e-3);
}
LOG(INFO) << "batch_size: " << FLAGS_batch_size;
LOG(INFO) << "zero average time: "
<< total_time / (FLAGS_repeat * FLAGS_batch_size);
LOG(INFO) << "analysis average time: "
<< analysis_total_time / (FLAGS_repeat * FLAGS_batch_size);
LOG(INFO) << "native average time: "
<< native_total_time / (FLAGS_repeat * FLAGS_batch_size);
}
TEST(Analyzer_rnn1, ZeroCopyMultiThread) {
AnalysisConfig config;
SetConfig(&config);
config.use_feed_fetch_ops = false;
#define NEW_TENSOR(name__) \
auto name__##_tensor = predictor->GetInputTensor(#name__);
auto base_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
double total_time_of_threads{0};
std::vector<std::thread> threads;
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
predictors.emplace_back(CreatePaddlePredictor<AnalysisConfig>(config));
}
for (int tid = 0; tid < FLAGS_num_threads; tid++) {
threads.emplace_back([config, &total_time_of_threads, &predictors, tid] {
// auto predictor = base_predictor->Clone();
auto &predictor = predictors[tid];
NEW_TENSOR(data_lod_attention);
NEW_TENSOR(cell_init);
NEW_TENSOR(data);
NEW_TENSOR(week);
NEW_TENSOR(minute);
NEW_TENSOR(hidden_init);
// Prepare data for AnalysisPredictor
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
Timer timer;
double total_time{0};
for (int i = 0; i < FLAGS_repeat; i++) {
PrepareZeroCopyInputs(data_lod_attention_tensor.get(),
cell_init_tensor.get(), data_tensor.get(),
hidden_init_tensor.get(), week_tensor.get(),
minute_tensor.get(), &data, FLAGS_batch_size);
timer.tic();
predictor->ZeroCopyRun();
total_time += timer.toc();
}
total_time_of_threads += total_time;
LOG(INFO) << "thread time: " << total_time / FLAGS_repeat;
});
}
for (auto &t : threads) {
t.join();
}
LOG(INFO) << "average time: "
<< total_time_of_threads / FLAGS_num_threads / FLAGS_repeat;
}
} // namespace inference
......
......@@ -182,7 +182,8 @@ TEST(Analyzer_seq_conv1, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
auto fuse_statis = GetFuseStatis(cfg, &num_ops);
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
GetFuseStatis(predictor.get(), &num_ops);
}
// Compare result of NativeConfig and AnalysisConfig
......
......@@ -104,5 +104,18 @@ TEST(Analyzer_Text_Classification, compare) {
CompareNativeAndAnalysis(cfg, input_slots_all);
}
TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
AnalysisConfig cfg;
SetConfig(&cfg);
// Enable embedding_fc_lstm_fuse_pass (disabled by default)
auto it = std::find(cfg.ir_passes.begin(), cfg.ir_passes.end(),
"embedding_fc_lstm_fuse_pass");
if (it != cfg.ir_passes.end()) cfg.ir_passes.erase(it);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
}
} // namespace inference
} // namespace paddle
......@@ -19,6 +19,7 @@ limitations under the License. */
namespace paddle {
namespace inference {
namespace analysis {
using contrib::AnalysisConfig;
struct Record {
std::vector<float> data;
......@@ -114,7 +115,8 @@ TEST(Analyzer_vis, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
GetFuseStatis(cfg, &num_ops);
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
GetFuseStatis(predictor.get(), &num_ops);
}
// Compare result of NativeConfig and AnalysisConfig
......
......@@ -86,11 +86,9 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); }
std::unordered_map<std::string, int> GetFuseStatis(AnalysisConfig config,
std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
int *num_ops) {
auto predictor = CreateTestPredictor(config);
AnalysisPredictor *analysis_predictor =
dynamic_cast<AnalysisPredictor *>(predictor.get());
auto *analysis_predictor = static_cast<AnalysisPredictor *>(predictor);
auto &fuse_statis = analysis_predictor->analysis_argument()
.Get<std::unordered_map<std::string, int>>(
framework::ir::kFuseStatisAttr);
......
......@@ -36,6 +36,8 @@ namespace memory {
using BuddyAllocator = detail::BuddyAllocator;
BuddyAllocator* GetCPUBuddyAllocator() {
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static std::once_flag init_flag;
static detail::BuddyAllocator* a = nullptr;
......@@ -48,6 +50,25 @@ BuddyAllocator* GetCPUBuddyAllocator() {
return a;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct NaiveAllocator {
void* Alloc(size_t size) { return malloc(size); }
void Free(void* p) {
PADDLE_ENFORCE(p);
free(p);
}
static NaiveAllocator* Instance() {
static NaiveAllocator x;
return &x;
}
private:
std::mutex lock_;
};
template <>
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
......
......@@ -301,6 +301,7 @@ op_library(fusion_lstm_op DEPS cpu_lstm_compute)
if (WITH_GPU)
op_library(conv_op DEPS vol2col depthwise_conv im2col)
op_library(layer_norm_op DEPS cub)
op_library(reduce_mean_op DEPS cub)
else()
op_library(conv_op DEPS vol2col im2col)
endif()
......@@ -313,11 +314,6 @@ op_library(save_combine_op DEPS lod_tensor)
op_library(load_combine_op DEPS lod_tensor)
op_library(concat_op DEPS concat)
# FIXME(thuan): Move CSP operators to paddle/fluid/framework/operators/concurrency
add_subdirectory(concurrency)
op_library(channel_send_op DEPS concurrency)
op_library(channel_recv_op DEPS concurrency)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS})
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/op_registry.h"
namespace pf = paddle::framework;
static constexpr char kChannel[] = "Channel";
namespace paddle {
namespace operators {
class ChannelCloseOp : public framework::OperatorBase {
public:
ChannelCloseOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
auto &inp = *scope.FindVar(Input(kChannel));
// Get the mutable version of the channel variable and closes it.
pf::ChannelHolder *ch = inp.GetMutable<framework::ChannelHolder>();
ch->close();
}
};
class ChannelCloseOpOpInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
PADDLE_ENFORCE(context->HasInput("Channel"),
"The input of ChannelClose op must be set");
}
};
class ChannelCloseOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput(kChannel,
"The Channel Variable that should be closed by"
" the ChannelClose Op.");
AddComment(R"DOC(
Channel Close Operator.
This operator closes an open channel.
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(channel_close, paddle::operators::ChannelCloseOp,
paddle::framework::EmptyGradOpMaker,
paddle::operators::ChannelCloseOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
namespace pf = paddle::framework;
static constexpr char kOutput[] = "Out";
namespace paddle {
namespace operators {
class ChannelCreateOp : public framework::OperatorBase {
public:
ChannelCreateOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
auto &out = *scope.FindVar(Output(kOutput));
// Determine the datatype and capacity of the channel to be created
// from the attributes provided.
auto dtype =
static_cast<framework::proto::VarType::Type>(Attr<int>("data_type"));
auto capacity = Attr<int>("capacity");
// Based on the datatype, create a new channel holder initialized with
// the given capacity. When capacity is 0, an unbuffered channel is
// created.
pf::ChannelHolder *ch = out.GetMutable<framework::ChannelHolder>();
if (dtype == framework::proto::VarType::LOD_TENSOR) {
ch->Reset<pf::LoDTensor>(capacity);
} else if (dtype == framework::proto::VarType::SELECTED_ROWS) {
ch->Reset<pf::SelectedRows>(capacity);
} else if (dtype == framework::proto::VarType::LOD_RANK_TABLE) {
ch->Reset<pf::LoDRankTable>(capacity);
} else if (dtype == framework::proto::VarType::LOD_TENSOR_ARRAY) {
ch->Reset<pf::LoDTensorArray>(capacity);
} else if (dtype == framework::proto::VarType::READER) {
ch->Reset<pf::ReaderHolder>(capacity);
} else if (dtype == framework::proto::VarType::CHANNEL) {
ch->Reset<pf::ChannelHolder>(capacity);
} else if (dtype == framework::proto::VarType::BOOL) {
ch->Reset<bool>(capacity);
} else if (dtype == framework::proto::VarType::INT32) {
ch->Reset<int>(capacity);
} else if (dtype == framework::proto::VarType::INT64) {
ch->Reset<int64_t>(capacity);
} else if (dtype == framework::proto::VarType::FP32) {
ch->Reset<float>(capacity);
} else if (dtype == framework::proto::VarType::FP64) {
ch->Reset<double>(capacity);
} else {
PADDLE_THROW(
"Data type %d is not in "
"[LOD_TENSOR, SELECTED_ROWS, LOD_RANK_TABLE, LOD_TENSOR_ARRAY, "
"READER, CHANNEL, BOOL, INT32, INT64, FP32, FP64]",
dtype);
}
}
};
class ChannelCreateOpOpInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
PADDLE_ENFORCE(context->HasOutput(kOutput),
"The output of ChannelCreate op must be set");
context->SetOutputDim(kOutput, {1});
}
};
class ChannelCreateOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddOutput(kOutput,
"The object of a Channel type created by ChannelCreate Op.");
AddAttr<int>("capacity", "The size of the buffer of Channel.")
.SetDefault(0);
AddAttr<int>("data_type", "The data type of elements inside the Channel.");
AddComment(R"DOC(
Channel Create Operator.
This operator creates an object of the VarType Channel and returns it.
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(channel_create, paddle::operators::ChannelCreateOp,
paddle::framework::EmptyGradOpMaker,
paddle::operators::ChannelCreateOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include <paddle/fluid/framework/lod_rank_table.h>
#include <paddle/fluid/framework/lod_tensor_array.h>
#include <paddle/fluid/framework/reader.h>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/concurrency/channel_util.h"
#include "paddle/fluid/operators/math/math_function.h"
static constexpr char Channel[] = "Channel";
static constexpr char Status[] = "Status";
static constexpr char Out[] = "Out";
namespace paddle {
namespace operators {
void SetReceiveStatus(const platform::Place &dev_place,
framework::Variable *status_var, bool status) {
auto cpu = platform::CPUPlace();
auto status_tensor =
status_var->GetMutable<framework::LoDTensor>()->mutable_data<bool>({1},
cpu);
status_tensor[0] = status;
}
class ChannelRecvOp : public framework::OperatorBase {
public:
ChannelRecvOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const {
PADDLE_ENFORCE(ctx->HasInput(Channel),
"Input(Channel) of ChannelRecvOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput(Out),
"Input(Channel) of ChannelRecvOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput(Status),
"Output(Status) of ChannelRecvOp should not be null.");
ctx->SetOutputDim("Status", {1});
}
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
// Get the channel holder created by channel_create op, passed as input.
framework::ChannelHolder *ch =
scope.FindVar(Input(Channel))->GetMutable<framework::ChannelHolder>();
auto output_var = scope.FindVar(Output(Out));
// Receive the data from the channel.
bool ok = concurrency::ChannelReceive(ch, output_var);
// Set the status output of the `ChannelReceive` call.
SetReceiveStatus(dev_place, scope.FindVar(Output(Status)), ok);
}
};
class ChannelRecvOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput(Channel,
"(Channel) A variable which \"receives\" the a value sent"
"to it by a channel_send op.")
.AsDuplicable();
AddOutput(Out,
"(Variable) Output Variable that will hold the data received"
" from the Channel")
.AsDuplicable();
AddOutput(Status,
"(Tensor) An LoD Tensor that returns a boolean status of the"
"result of the receive operation.")
.AsDuplicable();
AddComment(R"DOC(
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(channel_recv, paddle::operators::ChannelRecvOp,
paddle::framework::EmptyGradOpMaker,
paddle::operators::ChannelRecvOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include <paddle/fluid/framework/lod_rank_table.h>
#include <paddle/fluid/framework/lod_tensor_array.h>
#include <paddle/fluid/framework/reader.h>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/concurrency/channel_util.h"
#include "paddle/fluid/operators/math/math_function.h"
static constexpr char Channel[] = "Channel";
static constexpr char X[] = "X";
namespace paddle {
namespace operators {
class ChannelSendOp : public framework::OperatorBase {
public:
ChannelSendOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const {
PADDLE_ENFORCE(ctx->HasInput(Channel),
"Input(Channel) of ChannelSendOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput(X),
"Input(X) of ChannelSendOp should not be null.");
}
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
// Get the channel holder created by channel_create op, passed as input.
framework::ChannelHolder *ch =
scope.FindVar(Input(Channel))->GetMutable<framework::ChannelHolder>();
auto input_var = scope.FindVar(Input(X));
// Send the input data through the channel.
concurrency::ChannelSend(ch, input_var);
}
};
class ChannelSendOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput(Channel,
"(Channel) A variable which \"sends\" the passed in value to "
"a listening receiver.")
.AsDuplicable();
AddInput(X, "(Variable) The value which gets sent by the channel.")
.AsDuplicable();
AddComment(R"DOC(
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(channel_send, paddle::operators::ChannelSendOp,
paddle::framework::EmptyGradOpMaker,
paddle::operators::ChannelSendOpMaker);
cc_library(concurrency SRCS channel_util.cc DEPS device_context framework_proto boost eigen3)
......@@ -380,7 +380,8 @@ class DepthwiseConvKernel : public framework::OpKernel<T> {
math::DepthwiseConvFunctor<DeviceContext, T> depthwiseConv;
auto& dev_ctx = context.template device_context<DeviceContext>();
depthwiseConv(dev_ctx, *input, filter, strides, paddings, output);
depthwiseConv(dev_ctx, *input, filter, strides, paddings, dilations,
output);
}
};
......@@ -415,14 +416,14 @@ class DepthwiseConvGradKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(context.GetPlace());
set_zero(dev_ctx, input_grad, static_cast<T>(0));
depthwiseConvInputGrad(dev_ctx, *input, filter, *output_grad, strides,
paddings, input_grad);
paddings, dilations, input_grad);
}
if (filter_grad) {
filter_grad->mutable_data<T>(context.GetPlace());
set_zero(dev_ctx, filter_grad, static_cast<T>(0));
depthwiseConvFilterGrad(dev_ctx, *input, *output_grad, strides, paddings,
filter_grad);
dilations, filter_grad);
}
}
};
......
......@@ -345,7 +345,7 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
math::DepthwiseConvInputGradFunctor<DeviceContext, T>
depthwiseConvInputGrad;
depthwiseConvInputGrad(dev_ctx, *output, filter, *input, strides, paddings,
output);
dilations, output);
}
};
......@@ -367,10 +367,11 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel<T> {
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
if (input_grad) {
math::DepthwiseConvFunctor<DeviceContext, T> depthwiseConv;
depthwiseConv(dev_ctx, *output_grad, filter, strides, paddings,
depthwiseConv(dev_ctx, *output_grad, filter, strides, paddings, dilations,
input_grad);
}
......@@ -382,7 +383,7 @@ class DepthwiseConvTransposeGradKernel : public framework::OpKernel<T> {
math::DepthwiseConvFilterGradFunctor<DeviceContext, T>
depthwiseConvFilterGrad;
depthwiseConvFilterGrad(dev_ctx, *output_grad, *input, strides, paddings,
filter_grad);
dilations, filter_grad);
}
}
};
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册