提交 193aeaea 编写于 作者: L Luo Tao

Merge branch 'develop' into demo

...@@ -95,7 +95,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -95,7 +95,7 @@ ParallelExecutor::ParallelExecutor(
} }
if (member_->local_scopes_.size() != 1 && local_scopes.empty()) { if (member_->local_scopes_.size() != 1 && local_scopes.empty()) {
BCastParamsToGPUs(bcast_vars); BCastParamsToDevs(bcast_vars);
} }
// Startup Program has been run. All local scopes has correct parameters. // Startup Program has been run. All local scopes has correct parameters.
...@@ -131,7 +131,7 @@ ParallelExecutor::ParallelExecutor( ...@@ -131,7 +131,7 @@ ParallelExecutor::ParallelExecutor(
member_->places_, std::move(member_->executor_))); member_->places_, std::move(member_->executor_)));
} }
void ParallelExecutor::BCastParamsToGPUs( void ParallelExecutor::BCastParamsToDevs(
const std::unordered_set<std::string> &vars) const { const std::unordered_set<std::string> &vars) const {
// the the initializing bcast, all vars would be bcast from device(0), // the the initializing bcast, all vars would be bcast from device(0),
// otherwise // otherwise
...@@ -202,7 +202,11 @@ void ParallelExecutor::BCastParamsToGPUs( ...@@ -202,7 +202,11 @@ void ParallelExecutor::BCastParamsToGPUs(
#endif #endif
} else { } else {
platform::CPUPlace cpu; platform::CPUPlace cpu;
for (size_t i = 1; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
if ((initializing && i == 0) ||
(!initializing && static_cast<int>(i) == var_dev_id))
continue;
auto local_scope = member_->local_scopes_[i]; auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>(); auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
t->Resize(dims); t->Resize(dims);
......
...@@ -66,7 +66,7 @@ class ParallelExecutor { ...@@ -66,7 +66,7 @@ class ParallelExecutor {
void Run(const std::vector<std::string> &fetch_tensors, void Run(const std::vector<std::string> &fetch_tensors,
const std::string &fetched_var_name); const std::string &fetched_var_name);
void BCastParamsToGPUs(const std::unordered_set<std::string> &vars) const; void BCastParamsToDevs(const std::unordered_set<std::string> &vars) const;
private: private:
ParallelExecutorPrivate *member_; ParallelExecutorPrivate *member_;
......
...@@ -29,11 +29,11 @@ enum ReaderStatus { kRunning, kStopped }; ...@@ -29,11 +29,11 @@ enum ReaderStatus { kRunning, kStopped };
class ReaderBase { class ReaderBase {
public: public:
void ReadNext(std::vector<LoDTensor>* out); virtual void ReadNext(std::vector<LoDTensor>* out);
void Shutdown(); virtual void Shutdown();
void Start(); virtual void Start();
// Return the readers which are the end of decorating chain. Basically // Return the readers which are the end of decorating chain. Basically
// they are readers just before read op. // they are readers just before read op.
...@@ -42,7 +42,7 @@ class ReaderBase { ...@@ -42,7 +42,7 @@ class ReaderBase {
virtual ~ReaderBase(); virtual ~ReaderBase();
protected: protected:
virtual void ReadNextImpl(std::vector<LoDTensor>* out) = 0; virtual void ReadNextImpl(std::vector<LoDTensor>* out) {}
virtual void ShutdownImpl() {} virtual void ShutdownImpl() {}
......
...@@ -81,6 +81,15 @@ class BlockingQueue { ...@@ -81,6 +81,15 @@ class BlockingQueue {
} }
} }
void ReOpen() {
std::lock_guard<std::mutex> lock(mutex_);
closed_ = false;
std::deque<T> new_deque;
queue_.swap(new_deque);
send_cv_.notify_all();
receive_cv_.notify_all();
}
void Close() { void Close() {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
closed_ = true; closed_ = true;
......
...@@ -27,19 +27,17 @@ class PyReader : public framework::FileReader { ...@@ -27,19 +27,17 @@ class PyReader : public framework::FileReader {
queue_ = queue; queue_ = queue;
} }
void ReadNextImpl(std::vector<framework::LoDTensor>* out) override { void ReadNext(std::vector<framework::LoDTensor>* out) override {
bool success; bool success;
*out = queue_->Pop(&success); *out = queue_->Pop(&success);
if (!success) out->clear(); if (!success) out->clear();
} }
private: void Shutdown() override { queue_->Close(); }
void ShutdownImpl() override { /* TODO */
}
void StartImpl() override { /* TODO */ void Start() override { queue_->ReOpen(); }
}
private:
std::shared_ptr<LoDTensorBlockingQueue> queue_; std::shared_ptr<LoDTensorBlockingQueue> queue_;
}; };
......
...@@ -58,12 +58,15 @@ class LoDTensorBlockingQueue { ...@@ -58,12 +58,15 @@ class LoDTensorBlockingQueue {
inline size_t Size() const { return queue_.Size(); } inline size_t Size() const { return queue_.Size(); }
inline void Close() { return queue_.Close(); } inline void ReOpen() { queue_.ReOpen(); }
inline void Close() { queue_.Close(); }
inline bool IsClosed() const { return queue_.IsClosed(); } inline bool IsClosed() const { return queue_.IsClosed(); }
private: private:
void CheckDims(const std::vector<framework::LoDTensor>& lod_tensor_vec) { void CheckDims(
const std::vector<framework::LoDTensor>& lod_tensor_vec) const {
PADDLE_ENFORCE(dims_.size() == lod_tensor_vec.size(), PADDLE_ENFORCE(dims_.size() == lod_tensor_vec.size(),
"Expect input size is %d but found %s", dims_.size(), "Expect input size is %d but found %s", dims_.size(),
lod_tensor_vec.size()); lod_tensor_vec.size());
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <Python.h> #include <Python.h>
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <memory>
#include <mutex> // NOLINT // for call_once #include <mutex> // NOLINT // for call_once
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -86,37 +87,37 @@ PYBIND11_PLUGIN(core) { ...@@ -86,37 +87,37 @@ PYBIND11_PLUGIN(core) {
py::class_<Tensor>(m, "Tensor", py::buffer_protocol()) py::class_<Tensor>(m, "Tensor", py::buffer_protocol())
.def_buffer( .def_buffer(
[](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
.def("get_dims", .def("_get_dims",
[](const Tensor &self) { return vectorize(self.dims()); }) [](const Tensor &self) { return vectorize(self.dims()); })
.def("set_dims", .def("_set_dims",
[](Tensor &self, const std::vector<int64_t> &dim) { [](Tensor &self, const std::vector<int64_t> &dim) {
self.Resize(make_ddim(dim)); self.Resize(make_ddim(dim));
}) })
.def("set_layout", .def("_set_layout",
[](Tensor &self, const std::string &layout) { [](Tensor &self, const std::string &layout) {
self.set_layout(StringToDataLayout(layout)); self.set_layout(StringToDataLayout(layout));
}) })
.def("alloc_float", .def("_alloc_float",
[](Tensor &self, paddle::platform::CUDAPlace &place) { [](Tensor &self, paddle::platform::CUDAPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("alloc_float", .def("_alloc_float",
[](Tensor &self, paddle::platform::CPUPlace &place) { [](Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
.def("alloc_int", .def("_alloc_int",
[](Tensor &self, paddle::platform::CPUPlace &place) { [](Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<int>(place); self.mutable_data<int>(place);
}) })
.def("alloc_int", .def("_alloc_int",
[](Tensor &self, paddle::platform::CUDAPlace &place) { [](Tensor &self, paddle::platform::CUDAPlace &place) {
self.mutable_data<int>(place); self.mutable_data<int>(place);
}) })
.def("alloc_int", .def("_alloc_int",
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<int>(place); self.mutable_data<int>(place);
}) })
.def("alloc_float", .def("_alloc_float",
[](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) {
self.mutable_data<float>(place); self.mutable_data<float>(place);
}) })
...@@ -144,11 +145,11 @@ PYBIND11_PLUGIN(core) { ...@@ -144,11 +145,11 @@ PYBIND11_PLUGIN(core) {
.def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>) .def("set", PyCUDAPinnedTensorSetFromArray<uint8_t>)
#endif #endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
.def("set_float_element", TensorSetElement<float>) .def("_set_float_element", TensorSetElement<float>)
.def("get_float_element", TensorGetElement<float>) .def("_get_float_element", TensorGetElement<float>)
.def("set_double_element", TensorSetElement<double>) .def("_set_double_element", TensorSetElement<double>)
.def("get_double_element", TensorGetElement<double>) .def("_get_double_element", TensorGetElement<double>)
.def("dtype", [](Tensor &self) { return ToDataType(self.type()); }); .def("_dtype", [](Tensor &self) { return ToDataType(self.type()); });
py::class_<LoDTensor, Tensor>(m, "LoDTensor") py::class_<LoDTensor, Tensor>(m, "LoDTensor")
.def_buffer( .def_buffer(
...@@ -310,7 +311,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -310,7 +311,8 @@ All parameter, weight, gradient are variables in Paddle.
::paddle::operators::reader::LoDTensorBlockingQueue; ::paddle::operators::reader::LoDTensorBlockingQueue;
using LoDTensorBlockingQueueHolder = using LoDTensorBlockingQueueHolder =
::paddle::operators::reader::LoDTensorBlockingQueueHolder; ::paddle::operators::reader::LoDTensorBlockingQueueHolder;
py::class_<LoDTensorBlockingQueue>(m, "LoDTensorBlockingQueue", "") py::class_<LoDTensorBlockingQueue, std::shared_ptr<LoDTensorBlockingQueue>>(
m, "LoDTensorBlockingQueue", "")
.def("push", .def("push",
[](LoDTensorBlockingQueue &self, [](LoDTensorBlockingQueue &self,
const std::vector<framework::LoDTensor> &lod_tensor_vec) { const std::vector<framework::LoDTensor> &lod_tensor_vec) {
...@@ -325,7 +327,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -325,7 +327,7 @@ All parameter, weight, gradient are variables in Paddle.
m.def("init_lod_tensor_blocking_queue", m.def("init_lod_tensor_blocking_queue",
[](Variable &var, size_t capacity, [](Variable &var, size_t capacity,
const std::vector<std::vector<int64_t>> &shapes) const std::vector<std::vector<int64_t>> &shapes)
-> LoDTensorBlockingQueue * { -> std::shared_ptr<LoDTensorBlockingQueue> {
std::vector<DDim> dims(shapes.size()); std::vector<DDim> dims(shapes.size());
std::transform(shapes.begin(), shapes.end(), dims.begin(), std::transform(shapes.begin(), shapes.end(), dims.begin(),
[](const std::vector<int64_t> &shape) { [](const std::vector<int64_t> &shape) {
...@@ -333,9 +335,9 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -333,9 +335,9 @@ All parameter, weight, gradient are variables in Paddle.
}); });
auto *holder = var.GetMutable<LoDTensorBlockingQueueHolder>(); auto *holder = var.GetMutable<LoDTensorBlockingQueueHolder>();
holder->InitOnce(capacity, dims); holder->InitOnce(capacity, dims);
return holder->GetQueue().get(); return holder->GetQueue();
}, },
py::return_value_policy::reference); py::return_value_policy::copy);
py::class_<Scope>(m, "Scope", "") py::class_<Scope>(m, "Scope", "")
.def("var", .def("var",
...@@ -543,6 +545,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -543,6 +545,8 @@ All parameter, weight, gradient are variables in Paddle.
}); });
py::class_<LoDTensorArray>(m, "LoDTensorArray") py::class_<LoDTensorArray>(m, "LoDTensorArray")
.def("__init__",
[](LoDTensorArray &instance) { new (&instance) LoDTensorArray(); })
.def("__getitem__", .def("__getitem__",
[](LoDTensorArray &self, size_t i) { return &self.at(i); }, [](LoDTensorArray &self, size_t i) { return &self.at(i); },
py::return_value_policy::reference) py::return_value_policy::reference)
...@@ -665,7 +669,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -665,7 +669,7 @@ All parameter, weight, gradient are variables in Paddle.
const std::string &, Scope *, std::vector<Scope *> &, const std::string &, Scope *, std::vector<Scope *> &,
const ExecutionStrategy &, const BuildStrategy &, size_t, const ExecutionStrategy &, const BuildStrategy &, size_t,
size_t>()) size_t>())
.def("bcast_params", &ParallelExecutor::BCastParamsToGPUs) .def("bcast_params", &ParallelExecutor::BCastParamsToDevs)
// NOTE: even we return a vec<Scope*>* to Python use reference policy. // NOTE: even we return a vec<Scope*>* to Python use reference policy.
// We still cannot get local_scope from this vector, since the element // We still cannot get local_scope from this vector, since the element
// of vec<Scope*> will be freed by Python GC. We can only return Scope* // of vec<Scope*> will be freed by Python GC. We can only return Scope*
......
...@@ -66,6 +66,17 @@ paddle_error paddle_arguments_get_value(paddle_arguments args, ...@@ -66,6 +66,17 @@ paddle_error paddle_arguments_get_value(paddle_arguments args,
return kPD_NO_ERROR; return kPD_NO_ERROR;
} }
PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args,
uint64_t ID,
paddle_matrix mat) {
if (args == nullptr || mat == nullptr) return kPD_NULLPTR;
auto m = paddle::capi::cast<paddle::capi::CMatrix>(mat);
auto a = castArg(args);
if (ID >= a->args.size()) return kPD_OUT_OF_RANGE;
m->mat = a->args[ID].in;
return kPD_NO_ERROR;
}
paddle_error paddle_arguments_get_ids(paddle_arguments args, paddle_error paddle_arguments_get_ids(paddle_arguments args,
uint64_t ID, uint64_t ID,
paddle_ivector ids) { paddle_ivector ids) {
......
...@@ -87,6 +87,18 @@ PD_API paddle_error paddle_arguments_get_value(paddle_arguments args, ...@@ -87,6 +87,18 @@ PD_API paddle_error paddle_arguments_get_value(paddle_arguments args,
uint64_t ID, uint64_t ID,
paddle_matrix mat); paddle_matrix mat);
/**
* @brief paddle_arguments_get_prob Get the prob matrix of beam search, which
* slot ID is `ID`
* @param [in] args arguments array
* @param [in] ID array index
* @param [out] mat matrix pointer
* @return paddle_error
*/
PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args,
uint64_t ID,
paddle_matrix mat);
/** /**
* @brief PDArgsGetIds Get the integer vector of one argument in array, which * @brief PDArgsGetIds Get the integer vector of one argument in array, which
* index is `ID`. * index is `ID`.
......
...@@ -44,7 +44,7 @@ import metrics ...@@ -44,7 +44,7 @@ import metrics
import transpiler import transpiler
from param_attr import ParamAttr, WeightNormParamAttr from param_attr import ParamAttr, WeightNormParamAttr
from data_feeder import DataFeeder from data_feeder import DataFeeder
from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope from core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope
from transpiler import DistributeTranspiler, InferenceTranspiler, \ from transpiler import DistributeTranspiler, InferenceTranspiler, \
memory_optimize, release_memory memory_optimize, release_memory
from concurrency import (Go, make_channel, channel_send, channel_recv, from concurrency import (Go, make_channel, channel_send, channel_recv,
...@@ -72,6 +72,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \ ...@@ -72,6 +72,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \
'backward', 'backward',
'regularizer', 'regularizer',
'LoDTensor', 'LoDTensor',
'LoDTensorArray',
'CPUPlace', 'CPUPlace',
'CUDAPlace', 'CUDAPlace',
'CUDAPinnedPlace', 'CUDAPinnedPlace',
......
...@@ -31,7 +31,7 @@ class BaseErrorClipAttr(object): ...@@ -31,7 +31,7 @@ class BaseErrorClipAttr(object):
def __str__(self): def __str__(self):
raise NotImplementedError() raise NotImplementedError()
def append_clip_op(self, block, grad_name): def _append_clip_op(self, block, grad_name):
raise NotImplementedError() raise NotImplementedError()
...@@ -67,7 +67,7 @@ class ErrorClipByValue(BaseErrorClipAttr): ...@@ -67,7 +67,7 @@ class ErrorClipByValue(BaseErrorClipAttr):
def __str__(self): def __str__(self):
return "ByValue, min=%f, max=%f" % (self.min, self.max) return "ByValue, min=%f, max=%f" % (self.min, self.max)
def append_clip_op(self, block, grad_name): def _append_clip_op(self, block, grad_name):
clip_op_desc = block.desc.append_op() clip_op_desc = block.desc.append_op()
clip_op_desc.set_type("clip") clip_op_desc.set_type("clip")
clip_op_desc.set_input("X", [grad_name]) clip_op_desc.set_input("X", [grad_name])
...@@ -90,17 +90,17 @@ def error_clip_callback(block, context): ...@@ -90,17 +90,17 @@ def error_clip_callback(block, context):
"Variable's error_clip should be an instance of BaseErrorClipAttr or None." "Variable's error_clip should be an instance of BaseErrorClipAttr or None."
) )
if error_clip is not None: if error_clip is not None:
error_clip.append_clip_op(block, grad_n) error_clip._append_clip_op(block, grad_n)
class BaseGradientClipAttr(object): class BaseGradientClipAttr(object):
def __str__(self): def __str__(self):
raise NotImplementedError() raise NotImplementedError()
def process_context(self, context, param, grad): def _process_context(self, context, param, grad):
raise NotImplementedError() raise NotImplementedError()
def create_operators(self, param, grad): def _create_operators(self, param, grad):
raise NotImplementedError() raise NotImplementedError()
...@@ -108,10 +108,10 @@ class NullGradientClipAttr(BaseGradientClipAttr): ...@@ -108,10 +108,10 @@ class NullGradientClipAttr(BaseGradientClipAttr):
def __str__(self): def __str__(self):
return "Null" return "Null"
def process_context(self, context, param, grad): def _process_context(self, context, param, grad):
pass pass
def create_operators(self, param, grad): def _create_operators(self, param, grad):
return param, grad return param, grad
...@@ -153,10 +153,10 @@ class GradientClipByValue(BaseGradientClipAttr): ...@@ -153,10 +153,10 @@ class GradientClipByValue(BaseGradientClipAttr):
def __str__(self): def __str__(self):
return "ByValue, min=%f, max=%f" % (self.min, self.max) return "ByValue, min=%f, max=%f" % (self.min, self.max)
def process_context(self, context, param, grad): def _process_context(self, context, param, grad):
pass pass
def create_operators(self, param, grad): def _create_operators(self, param, grad):
new_grad = layers.clip(x=grad, min=self.min, max=self.max) new_grad = layers.clip(x=grad, min=self.min, max=self.max)
return param, new_grad return param, new_grad
...@@ -199,10 +199,10 @@ class GradientClipByNorm(BaseGradientClipAttr): ...@@ -199,10 +199,10 @@ class GradientClipByNorm(BaseGradientClipAttr):
def __str__(self): def __str__(self):
return "ByNorm, clip_norm=%f" % self.clip_norm return "ByNorm, clip_norm=%f" % self.clip_norm
def process_context(self, context, param, grad): def _process_context(self, context, param, grad):
pass pass
def create_operators(self, param, grad): def _create_operators(self, param, grad):
new_grad = layers.clip_by_norm(x=grad, max_norm=self.clip_norm) new_grad = layers.clip_by_norm(x=grad, max_norm=self.clip_norm)
return param, new_grad return param, new_grad
...@@ -257,7 +257,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): ...@@ -257,7 +257,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
return "ByGlobalNorm, group_name=%s, clip_norm=%f" % (self.group_name, return "ByGlobalNorm, group_name=%s, clip_norm=%f" % (self.group_name,
self.clip_norm) self.clip_norm)
def process_context(self, context, param, grad): def _process_context(self, context, param, grad):
if self.group_name not in context: if self.group_name not in context:
context[self.group_name] = [] context[self.group_name] = []
context[self.group_name + "_clip_value"] = self.clip_norm context[self.group_name + "_clip_value"] = self.clip_norm
...@@ -274,7 +274,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): ...@@ -274,7 +274,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
self.context = context self.context = context
def create_operators(self, param, grad): def _create_operators(self, param, grad):
group_scale_name = self.group_name + "_scale" group_scale_name = self.group_name + "_scale"
if group_scale_name not in self.context: if group_scale_name not in self.context:
group_norm_var = layers.sums(input=self.context[self.group_name]) group_norm_var = layers.sums(input=self.context[self.group_name])
...@@ -336,12 +336,12 @@ def append_gradient_clip_ops(param_grad): ...@@ -336,12 +336,12 @@ def append_gradient_clip_ops(param_grad):
"clip attribute should be an instance of BaseGradientClipAttr" "clip attribute should be an instance of BaseGradientClipAttr"
) )
clip_attr.process_context(context=context, param=p, grad=g) clip_attr._process_context(context=context, param=p, grad=g)
res = [] res = []
for p, g in param_grad: for p, g in param_grad:
with p.block.program.optimized_guard(p): with p.block.program.optimized_guard(p):
res.append(clip_attr.create_operators(param=p, grad=g)) res.append(clip_attr._create_operators(param=p, grad=g))
return res return res
......
...@@ -68,11 +68,11 @@ class LayerHelper(object): ...@@ -68,11 +68,11 @@ class LayerHelper(object):
@property @property
def param_attr(self): def param_attr(self):
return ParamAttr.to_attr(self.kwargs.get('param_attr', None)) return ParamAttr._to_attr(self.kwargs.get('param_attr', None))
@property @property
def bias_attr(self): def bias_attr(self):
return ParamAttr.to_attr(self.kwargs.get('bias_attr', None)) return ParamAttr._to_attr(self.kwargs.get('bias_attr', None))
def multiple_param_attr(self, length): def multiple_param_attr(self, length):
param_attr = self.param_attr param_attr = self.param_attr
...@@ -262,11 +262,11 @@ class LayerHelper(object): ...@@ -262,11 +262,11 @@ class LayerHelper(object):
g_param = self.startup_program.global_block().create_parameter( g_param = self.startup_program.global_block().create_parameter(
dtype=dtype, dtype=dtype,
shape=g_param_shape, shape=g_param_shape,
**g_param_attr.to_kwargs(with_initializer=False)) **g_param_attr._to_kwargs(with_initializer=False))
v_param = self.startup_program.global_block().create_parameter( v_param = self.startup_program.global_block().create_parameter(
dtype=dtype, dtype=dtype,
shape=v_param_shape, shape=v_param_shape,
**v_param_attr.to_kwargs(with_initializer=True)) **v_param_attr._to_kwargs(with_initializer=True))
__norm_except_dim( __norm_except_dim(
x=v_param, x=v_param,
out=g_param, out=g_param,
...@@ -275,9 +275,9 @@ class LayerHelper(object): ...@@ -275,9 +275,9 @@ class LayerHelper(object):
# Add weight normalization to main_program # Add weight normalization to main_program
g_param = self.main_program.global_block().create_parameter( g_param = self.main_program.global_block().create_parameter(
dtype=dtype, shape=g_param_shape, **g_param_attr.to_kwargs()) dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs())
v_param = self.main_program.global_block().create_parameter( v_param = self.main_program.global_block().create_parameter(
dtype=dtype, shape=v_param_shape, **v_param_attr.to_kwargs()) dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs())
w_param = __weight_normalize(g_param, v_param, dim=attr.dim) w_param = __weight_normalize(g_param, v_param, dim=attr.dim)
return w_param return w_param
...@@ -296,11 +296,11 @@ class LayerHelper(object): ...@@ -296,11 +296,11 @@ class LayerHelper(object):
if default_initializer is None and attr.initializer is None: if default_initializer is None and attr.initializer is None:
if is_bias: if is_bias:
attr.set_default_bias_initializer() attr._set_default_bias_initializer()
else: else:
attr.set_default_param_initializer() attr._set_default_param_initializer()
else: else:
attr.set_default_initializer(default_initializer) attr._set_default_initializer(default_initializer)
# If weight normalization is set, insert extra parameters and ops. # If weight normalization is set, insert extra parameters and ops.
# Refer to https://arxiv.org/pdf/1602.07868.pdf # Refer to https://arxiv.org/pdf/1602.07868.pdf
...@@ -310,9 +310,9 @@ class LayerHelper(object): ...@@ -310,9 +310,9 @@ class LayerHelper(object):
return param return param
self.startup_program.global_block().create_parameter( self.startup_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True)) dtype=dtype, shape=shape, **attr._to_kwargs(with_initializer=True))
return self.main_program.global_block().create_parameter( return self.main_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr.to_kwargs()) dtype=dtype, shape=shape, **attr._to_kwargs())
def get_parameter(self, name): def get_parameter(self, name):
param = self.main_program.global_block().var(name) param = self.main_program.global_block().var(name)
......
...@@ -24,7 +24,8 @@ from layer_function_generator import generate_layer_fn, templatedoc ...@@ -24,7 +24,8 @@ from layer_function_generator import generate_layer_fn, templatedoc
__all__ = [ __all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv', 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv',
'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch', 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch',
'double_buffer', 'random_data_generator', 'Preprocessor', 'load' 'double_buffer', 'random_data_generator', 'py_reader', 'Preprocessor',
'load'
] ]
...@@ -445,6 +446,88 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): ...@@ -445,6 +446,88 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
return monkey_patch_reader_methods(main_prog_var) return monkey_patch_reader_methods(main_prog_var)
def py_reader(capacity, shapes, dtypes, lod_levels=None):
"""
Create a reader and blocking queue for data feeding in Python
This layer returns a Reader Variable and a BlockingQueue.
The BlockingQueue provides `push()` method to push a `LoDTensorArray`
object into the queue in Python side. In C++ side, the Reader
Variable would invoke `pop()` method of the queue to retrieve the
feeding data. The process of feeding data in Python side and fetching
data in C++ side can run in parallel. The BlockingQueue should be closed
using `close()` method when unused.
Args:
capacity(int): The maximum capacity of the BlockingQueue.
shapes(list): List of tuples which declaring data shapes.
dtypes(list): List of strs which declaring data type.
lod_levels(list): List of ints which declaring data lod_level.
Returns:
tuple(Variable, BlockingQueue):
A Reader Variable from which we can get feeding data.
A BlockingQueue object for data feeding.
Examples:
.. code-block:: python
reader, queue = fluid.layers.py_reader(
capacity=10,
shapes=[[-1,3,224,224], [-1,1]],
dtypes=['float32', 'int64'])
# Via the reader, we can use 'read_file' layer to get data:
image, label = fluid.layers.read_file(reader)
# Via the blocking queue, we can feed data using threads
def feed_data(queue, feed_images, feed_labels):
for feed_image, feed_label in zip(feed_images, feed_labels):
data = core.LoDTensorArray()
data.append(feed_image)
data.append(feed_label)
queue.push(data)
thread = threading.Thread(target=feed_data, args=(queue, feed_images, feed_labels))
thread.start()
"""
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
for shape in shapes:
shape_concat.extend(shape)
ranks.append(len(shape))
if lod_levels is None:
lod_levels = [0] * len(shapes)
queue_name = unique_name('lod_tensor_blocking_queue')
var = global_scope().var(queue_name)
feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, shapes)
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=unique_name('create_py_reader'))
startup_blk.append_op(
type='create_py_reader',
inputs={'blocking_queue': queue_name},
outputs={'Out': [startup_var]},
attrs={
'shape_concat': shape_concat,
'lod_levels': lod_levels,
'ranks': ranks
})
startup_var.desc.set_dtypes(dtypes)
startup_var.persistable = True
main_prog_var = _copy_reader_var_(default_main_program().current_block(),
startup_var)
return monkey_patch_reader_methods(main_prog_var), feed_queue
def open_files(filenames, def open_files(filenames,
shapes, shapes,
lod_levels, lod_levels,
......
...@@ -123,7 +123,7 @@ class Optimizer(object): ...@@ -123,7 +123,7 @@ class Optimizer(object):
""" """
pass pass
def _finish_update(self, block): def _finish_update(self, block, parameters):
"""Finish any custom updates needed """Finish any custom updates needed
before completing an optimization step before completing an optimization step
...@@ -132,7 +132,7 @@ class Optimizer(object): ...@@ -132,7 +132,7 @@ class Optimizer(object):
parameters: list of parameter variables for the optimizer parameters: list of parameter variables for the optimizer
Returns: Returns:
list of finish ops or None None
""" """
pass pass
...@@ -236,7 +236,8 @@ class Optimizer(object): ...@@ -236,7 +236,8 @@ class Optimizer(object):
# Get custom finish ops for subclasses # Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies # FIXME: Need to fix this once we figure out how to handle dependencies
self._finish_update(loss.block) self._finish_update(loss.block,
[p[0] for p in parameters_and_grads])
end = len(global_block.ops) end = len(global_block.ops)
return global_block.slice_ops(start, end) return global_block.slice_ops(start, end)
...@@ -486,6 +487,8 @@ class AdamOptimizer(Optimizer): ...@@ -486,6 +487,8 @@ class AdamOptimizer(Optimizer):
""" """
_moment1_acc_str = "moment1" _moment1_acc_str = "moment1"
_moment2_acc_str = "moment2" _moment2_acc_str = "moment2"
_beta1_pow_acc_str = "beta1_pow_acc"
_beta2_pow_acc_str = "beta2_pow_acc"
def __init__(self, def __init__(self,
learning_rate=0.001, learning_rate=0.001,
...@@ -507,32 +510,22 @@ class AdamOptimizer(Optimizer): ...@@ -507,32 +510,22 @@ class AdamOptimizer(Optimizer):
def _create_accumulators(self, block, parameters): def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
main_block = block.program.global_block()
# Create beta1 and beta2 power tensors
beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta1_pow_acc'),
dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
self.helper.set_variable_initializer(
self._beta1_pow_acc, initializer=Constant(self._beta1))
self._beta2_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta2_pow_acc'),
dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
self.helper.set_variable_initializer(
self._beta2_pow_acc, initializer=Constant(self._beta2))
# Create accumulator tensors for first and second moments # Create accumulator tensors for first and second moments
for p in parameters: for p in parameters:
self._add_accumulator(self._moment1_acc_str, p) self._add_accumulator(self._moment1_acc_str, p)
self._add_accumulator(self._moment2_acc_str, p) self._add_accumulator(self._moment2_acc_str, p)
self._add_accumulator(
name=self._beta1_pow_acc_str,
param=p,
dtype='float32',
fill_value=self._beta1,
shape=[1])
self._add_accumulator(
name=self._beta2_pow_acc_str,
param=p,
dtype='float32',
fill_value=self._beta2,
shape=[1])
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
...@@ -541,6 +534,11 @@ class AdamOptimizer(Optimizer): ...@@ -541,6 +534,11 @@ class AdamOptimizer(Optimizer):
param_and_grad[0]) param_and_grad[0])
moment2 = self._get_accumulator(self._moment2_acc_str, moment2 = self._get_accumulator(self._moment2_acc_str,
param_and_grad[0]) param_and_grad[0])
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param_and_grad[0])
beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
param_and_grad[0])
# create the adam optimize op # create the adam optimize op
adam_op = block.append_op( adam_op = block.append_op(
type=self.type, type=self.type,
...@@ -550,8 +548,8 @@ class AdamOptimizer(Optimizer): ...@@ -550,8 +548,8 @@ class AdamOptimizer(Optimizer):
"LearningRate": self._create_param_lr(param_and_grad), "LearningRate": self._create_param_lr(param_and_grad),
"Moment1": moment1, "Moment1": moment1,
"Moment2": moment2, "Moment2": moment2,
"Beta1Pow": self._beta1_pow_acc, "Beta1Pow": beta1_pow_acc,
"Beta2Pow": self._beta2_pow_acc "Beta2Pow": beta2_pow_acc
}, },
outputs={ outputs={
"ParamOut": param_and_grad[0], "ParamOut": param_and_grad[0],
...@@ -566,24 +564,28 @@ class AdamOptimizer(Optimizer): ...@@ -566,24 +564,28 @@ class AdamOptimizer(Optimizer):
return adam_op return adam_op
def _finish_update(self, block): def _finish_update(self, block, parameters):
"""Update Beta1 and Beta2 Power accumulators """Update Beta1 and Beta2 Power accumulators
""" """
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
main_block = block.program.global_block() main_block = block.program.global_block()
scale_beta1 = main_block.append_op( for param in parameters:
type="scale", with param.block.program.optimized_guard(param):
inputs={"X": self._beta1_pow_acc}, beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
outputs={"Out": self._beta1_pow_acc}, param)
attrs={"scale": self._beta1}) beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
param)
scale_beta2 = main_block.append_op( main_block.append_op(
type="scale", type="scale",
inputs={"X": self._beta2_pow_acc}, inputs={"X": beta1_pow_acc},
outputs={"Out": self._beta2_pow_acc}, outputs={"Out": beta1_pow_acc},
attrs={"scale": self._beta2}) attrs={"scale": self._beta1})
return [scale_beta1, scale_beta2] main_block.append_op(
type="scale",
inputs={"X": beta2_pow_acc},
outputs={"Out": beta2_pow_acc},
attrs={"scale": self._beta2})
class AdamaxOptimizer(Optimizer): class AdamaxOptimizer(Optimizer):
...@@ -626,6 +628,7 @@ class AdamaxOptimizer(Optimizer): ...@@ -626,6 +628,7 @@ class AdamaxOptimizer(Optimizer):
""" """
_moment_acc_str = "moment" _moment_acc_str = "moment"
_inf_norm_acc_str = "inf_norm" _inf_norm_acc_str = "inf_norm"
_beta1_pow_acc_str = "beta1_pow_acc"
def __init__(self, def __init__(self,
learning_rate=0.001, learning_rate=0.001,
...@@ -645,21 +648,16 @@ class AdamaxOptimizer(Optimizer): ...@@ -645,21 +648,16 @@ class AdamaxOptimizer(Optimizer):
self._epsilon = epsilon self._epsilon = epsilon
def _create_accumulators(self, block, parameters): def _create_accumulators(self, block, parameters):
# Create beta1 power accumulator tensor
beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta1_pow_acc'),
dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
self.helper.set_variable_initializer(
self._beta1_pow_acc, initializer=Constant(self._beta1))
# Create accumulator tensors for first moment and infinity norm # Create accumulator tensors for first moment and infinity norm
for p in parameters: for p in parameters:
self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._moment_acc_str, p)
self._add_accumulator(self._inf_norm_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p)
self._add_accumulator(
name=self._beta1_pow_acc_str,
param=p,
dtype='float32',
fill_value=self._beta1,
shape=[1])
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
...@@ -667,6 +665,8 @@ class AdamaxOptimizer(Optimizer): ...@@ -667,6 +665,8 @@ class AdamaxOptimizer(Optimizer):
moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0])
inf_norm = self._get_accumulator(self._inf_norm_acc_str, inf_norm = self._get_accumulator(self._inf_norm_acc_str,
param_and_grad[0]) param_and_grad[0])
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param_and_grad[0])
# create the adamax optimize op # create the adamax optimize op
adamax_op = block.append_op( adamax_op = block.append_op(
type=self.type, type=self.type,
...@@ -676,7 +676,7 @@ class AdamaxOptimizer(Optimizer): ...@@ -676,7 +676,7 @@ class AdamaxOptimizer(Optimizer):
"LearningRate": self._create_param_lr(param_and_grad), "LearningRate": self._create_param_lr(param_and_grad),
"Moment": moment, "Moment": moment,
"InfNorm": inf_norm, "InfNorm": inf_norm,
"Beta1Pow": self._beta1_pow_acc "Beta1Pow": beta1_pow_acc
}, },
outputs={ outputs={
"ParamOut": param_and_grad[0], "ParamOut": param_and_grad[0],
...@@ -691,18 +691,20 @@ class AdamaxOptimizer(Optimizer): ...@@ -691,18 +691,20 @@ class AdamaxOptimizer(Optimizer):
return adamax_op return adamax_op
def _finish_update(self, block): def _finish_update(self, block, parameters):
"""Update Beta1 Power accumulator """Update Beta1 Power accumulator
""" """
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
main_block = block.program.global_block() main_block = block.program.global_block()
scale_beta1 = main_block.append_op( for param in parameters:
type="scale", with param.block.program.optimized_guard(param):
inputs={"X": self._beta1_pow_acc}, beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
outputs={"Out": self._beta1_pow_acc}, param)
attrs={"scale": self._beta1}) main_block.append_op(
type="scale",
return [scale_beta1] inputs={"X": beta1_pow_acc},
outputs={"Out": beta1_pow_acc},
attrs={"scale": self._beta1})
class DecayedAdagradOptimizer(Optimizer): class DecayedAdagradOptimizer(Optimizer):
...@@ -1156,7 +1158,8 @@ class ModelAverage(Optimizer): ...@@ -1156,7 +1158,8 @@ class ModelAverage(Optimizer):
self.params_grads.append((param, grad)) self.params_grads.append((param, grad))
for param, grad in self.params_grads: for param, grad in self.params_grads:
self._append_average_accumulate_op(param) with param.block.program.optimized_guard(param):
self._append_average_accumulate_op(param)
self.apply_program = Program() self.apply_program = Program()
block = self.apply_program.global_block() block = self.apply_program.global_block()
......
...@@ -67,7 +67,7 @@ class ParamAttr(object): ...@@ -67,7 +67,7 @@ class ParamAttr(object):
self.gradient_clip = gradient_clip self.gradient_clip = gradient_clip
self.model_average = do_model_average self.model_average = do_model_average
def set_default_initializer(self, initializer): def _set_default_initializer(self, initializer):
""" """
Set the default initializer, the initializer should be Constant, Set the default initializer, the initializer should be Constant,
Uniform, Normal, Xavier, MSRA. Uniform, Normal, Xavier, MSRA.
...@@ -88,7 +88,7 @@ class ParamAttr(object): ...@@ -88,7 +88,7 @@ class ParamAttr(object):
self.initializer = initializer self.initializer = initializer
def set_default_param_initializer(self): def _set_default_param_initializer(self):
""" """
Set the default initializer for the parameter with Xavier. Set the default initializer for the parameter with Xavier.
...@@ -98,9 +98,9 @@ class ParamAttr(object): ...@@ -98,9 +98,9 @@ class ParamAttr(object):
Returns: Returns:
None. None.
""" """
self.set_default_initializer(Xavier()) self._set_default_initializer(Xavier())
def set_default_bias_initializer(self): def _set_default_bias_initializer(self):
""" """
Set the default initializer for the bias with Constant(0.0). Set the default initializer for the bias with Constant(0.0).
...@@ -110,10 +110,10 @@ class ParamAttr(object): ...@@ -110,10 +110,10 @@ class ParamAttr(object):
Returns: Returns:
None. None.
""" """
self.set_default_initializer(Constant(0.0)) self._set_default_initializer(Constant(0.0))
@staticmethod @staticmethod
def to_attr(arg): def _to_attr(arg):
""" """
Create ParamAttr[s]. Create ParamAttr[s].
...@@ -131,7 +131,7 @@ class ParamAttr(object): ...@@ -131,7 +131,7 @@ class ParamAttr(object):
if arg is None: if arg is None:
return ParamAttr() return ParamAttr()
elif isinstance(arg, list) or isinstance(arg, tuple): elif isinstance(arg, list) or isinstance(arg, tuple):
return [ParamAttr.to_attr(a) for a in arg] return [ParamAttr._to_attr(a) for a in arg]
elif isinstance(arg, ParamAttr): elif isinstance(arg, ParamAttr):
return arg return arg
elif isinstance(arg, str) or isinstance(arg, unicode): elif isinstance(arg, str) or isinstance(arg, unicode):
...@@ -141,11 +141,11 @@ class ParamAttr(object): ...@@ -141,11 +141,11 @@ class ParamAttr(object):
elif isinstance(arg, WeightDecayRegularizer): elif isinstance(arg, WeightDecayRegularizer):
return ParamAttr(regularizer=arg) return ParamAttr(regularizer=arg)
elif isinstance(arg, bool): elif isinstance(arg, bool):
return ParamAttr.to_attr(None) if arg else False return ParamAttr._to_attr(None) if arg else False
else: else:
raise TypeError("{0} cast to ParamAttr".format(type(arg))) raise TypeError("{0} cast to ParamAttr".format(type(arg)))
def to_kwargs(self, with_initializer=False): def _to_kwargs(self, with_initializer=False):
""" """
Returns the attributes of this parameter. Returns the attributes of this parameter.
......
...@@ -15,10 +15,7 @@ ...@@ -15,10 +15,7 @@
import framework import framework
from . import core from . import core
__all__ = [ __all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer']
'append_regularization_ops', 'L1Decay', 'L2Decay', 'L1DecayRegularizer',
'L2DecayRegularizer'
]
def append_regularization_ops(parameters_and_grads, regularization=None): def append_regularization_ops(parameters_and_grads, regularization=None):
......
...@@ -60,8 +60,8 @@ def get_numeric_gradient(place, ...@@ -60,8 +60,8 @@ def get_numeric_gradient(place,
return np.array(sum).mean() return np.array(sum).mean()
tensor_to_check = scope.find_var(input_to_check).get_tensor() tensor_to_check = scope.find_var(input_to_check).get_tensor()
tensor_size = product(tensor_to_check.get_dims()) tensor_size = product(tensor_to_check.shape())
tensor_to_check_dtype = tensor_to_check.dtype() tensor_to_check_dtype = tensor_to_check._dtype()
if tensor_to_check_dtype == core.VarDesc.VarType.FP32: if tensor_to_check_dtype == core.VarDesc.VarType.FP32:
tensor_to_check_dtype = np.float32 tensor_to_check_dtype = np.float32
elif tensor_to_check_dtype == core.VarDesc.VarType.FP64: elif tensor_to_check_dtype == core.VarDesc.VarType.FP64:
...@@ -74,15 +74,15 @@ def get_numeric_gradient(place, ...@@ -74,15 +74,15 @@ def get_numeric_gradient(place,
def __get_elem__(tensor, i): def __get_elem__(tensor, i):
if tensor_to_check_dtype == np.float32: if tensor_to_check_dtype == np.float32:
return tensor.get_float_element(i) return tensor._get_float_element(i)
else: else:
return tensor.get_double_element(i) return tensor._get_double_element(i)
def __set_elem__(tensor, i, e): def __set_elem__(tensor, i, e):
if tensor_to_check_dtype == np.float32: if tensor_to_check_dtype == np.float32:
tensor.set_float_element(i, e) tensor._set_float_element(i, e)
else: else:
tensor.set_double_element(i, e) tensor._set_double_element(i, e)
# we only compute gradient of one element each time. # we only compute gradient of one element each time.
# we use a for loop to compute the gradient of every element. # we use a for loop to compute the gradient of every element.
...@@ -107,7 +107,7 @@ def get_numeric_gradient(place, ...@@ -107,7 +107,7 @@ def get_numeric_gradient(place,
__set_elem__(tensor_to_check, i, origin) __set_elem__(tensor_to_check, i, origin)
gradient_flat[i] = (y_pos - y_neg) / delta / 2 gradient_flat[i] = (y_pos - y_neg) / delta / 2
return gradient_flat.reshape(tensor_to_check.get_dims()) return gradient_flat.reshape(tensor_to_check.shape())
class OpTest(unittest.TestCase): class OpTest(unittest.TestCase):
...@@ -125,7 +125,7 @@ class OpTest(unittest.TestCase): ...@@ -125,7 +125,7 @@ class OpTest(unittest.TestCase):
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
'''Restore random seeds''' """Restore random seeds"""
np.random.set_state(cls._np_rand_state) np.random.set_state(cls._np_rand_state)
random.setstate(cls._py_rand_state) random.setstate(cls._py_rand_state)
......
...@@ -129,7 +129,6 @@ def create_or_get_tensor(scope, var_name, var, place): ...@@ -129,7 +129,6 @@ def create_or_get_tensor(scope, var_name, var, place):
if var is not None: if var is not None:
assert isinstance(var, np.ndarray) assert isinstance(var, np.ndarray)
tensor.set_recursive_sequence_lengths([]) tensor.set_recursive_sequence_lengths([])
tensor.set_dims(var.shape)
tensor.set(var, place) tensor.set(var, place)
return tensor return tensor
......
...@@ -65,10 +65,10 @@ class TestDyRnnStaticInput(unittest.TestCase): ...@@ -65,10 +65,10 @@ class TestDyRnnStaticInput(unittest.TestCase):
return self._lodtensor_to_ndarray(fetch_outs[0]) return self._lodtensor_to_ndarray(fetch_outs[0])
def _lodtensor_to_ndarray(self, lod_tensor): def _lodtensor_to_ndarray(self, lod_tensor):
dims = lod_tensor.get_dims() dims = lod_tensor.shape()
ndarray = np.zeros(shape=dims).astype('float32') ndarray = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)): for i in xrange(np.product(dims)):
ndarray.ravel()[i] = lod_tensor.get_float_element(i) ndarray.ravel()[i] = lod_tensor._get_float_element(i)
return ndarray, lod_tensor.recursive_sequence_lengths() return ndarray, lod_tensor.recursive_sequence_lengths()
def build_graph(self, only_forward=False): def build_graph(self, only_forward=False):
...@@ -185,19 +185,19 @@ class TestDyRnnStaticInput(unittest.TestCase): ...@@ -185,19 +185,19 @@ class TestDyRnnStaticInput(unittest.TestCase):
actual_gradients, actual_lod = self.fetch_value(static_input_grad) actual_gradients, actual_lod = self.fetch_value(static_input_grad)
static_input_shape = self.static_input_tensor.get_dims() static_input_shape = self.static_input_tensor.shape()
numeric_gradients = np.zeros(shape=static_input_shape).astype('float32') numeric_gradients = np.zeros(shape=static_input_shape).astype('float32')
# calculate numeric gradients # calculate numeric gradients
tensor_size = np.product(static_input_shape) tensor_size = np.product(static_input_shape)
for i in xrange(tensor_size): for i in xrange(tensor_size):
origin = self.static_input_tensor.get_float_element(i) origin = self.static_input_tensor._get_float_element(i)
x_pos = origin + self._delta x_pos = origin + self._delta
self.static_input_tensor.set_float_element(i, x_pos) self.static_input_tensor._set_float_element(i, x_pos)
y_pos = self.fetch_value(loss)[0][0] y_pos = self.fetch_value(loss)[0][0]
x_neg = origin - self._delta x_neg = origin - self._delta
self.static_input_tensor.set_float_element(i, x_neg) self.static_input_tensor._set_float_element(i, x_neg)
y_neg = self.fetch_value(loss)[0][0] y_neg = self.fetch_value(loss)[0][0]
self.static_input_tensor.set_float_element(i, origin) self.static_input_tensor._set_float_element(i, origin)
numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2 numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001)) self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
self.assertTrue( self.assertTrue(
......
...@@ -287,7 +287,7 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -287,7 +287,7 @@ class TestAdamOptimizer(unittest.TestCase):
# Check accumulators # Check accumulators
accumulators = adam_optimizer.get_accumulators() accumulators = adam_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 2) self.assertEqual(len(accumulators), 4)
self.assertTrue(adam_optimizer.get_moment1_str() in accumulators) self.assertTrue(adam_optimizer.get_moment1_str() in accumulators)
self.assertTrue(adam_optimizer.get_moment2_str() in accumulators) self.assertTrue(adam_optimizer.get_moment2_str() in accumulators)
moment1_acc = accumulators[adam_optimizer.get_moment1_str()] moment1_acc = accumulators[adam_optimizer.get_moment1_str()]
...@@ -354,7 +354,7 @@ class TestAdamaxOptimizer(unittest.TestCase): ...@@ -354,7 +354,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
# Check accumulators # Check accumulators
accumulators = adamax_optimizer.get_accumulators() accumulators = adamax_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 2) self.assertEqual(len(accumulators), 3)
self.assertTrue(adamax_optimizer.get_moment_str() in accumulators) self.assertTrue(adamax_optimizer.get_moment_str() in accumulators)
self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators) self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators)
moment_acc = accumulators[adamax_optimizer.get_moment_str()] moment_acc = accumulators[adamax_optimizer.get_moment_str()]
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
import numpy as np
from threading import Thread
def feed_data(feed_queue, inputs):
for in_data in inputs:
feed_queue.push(in_data)
class TestPyReader(unittest.TestCase):
def setUp(self):
self.capacity = 10
self.batch_size_min = 10
self.batch_size_max = 20
self.shapes = [(-1, 3, 2, 1), (-1, 1)]
self.lod_levels = [0, 0]
self.dtypes = ['float32', 'int64']
self.iterations = 20
def test_single_thread_main(self):
self.main(use_thread=False)
def test_multiple_thread_main(self):
self.main(use_thread=True)
def main(self, use_thread=False):
with fluid.program_guard(fluid.Program(), fluid.Program()):
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
) else fluid.CPUPlace()
executor = fluid.Executor(place)
data_file, feed_queue = fluid.layers.py_reader(
capacity=self.capacity,
dtypes=self.dtypes,
lod_levels=self.lod_levels,
shapes=self.shapes)
read_out_data = fluid.layers.read_file(data_file)
self.inputs = []
for i in range(self.iterations):
in_data = fluid.LoDTensorArray()
batch_size = np.random.random_integers(self.batch_size_min,
self.batch_size_max)
for shape, dtype in zip(self.shapes, self.dtypes):
next_data = np.random.uniform(
low=0, high=1000,
size=(batch_size, ) + shape[1:]).astype(dtype)
in_data.append(executor.as_lodtensor(next_data))
self.inputs.append(in_data)
executor.run(fluid.default_startup_program())
self.outputs = []
if use_thread:
thread = Thread(
target=feed_data, args=(feed_queue, self.inputs))
thread.start()
for in_data in self.inputs:
self.outputs.append(
executor.run(fetch_list=list(read_out_data)))
else:
for in_data in self.inputs:
feed_queue.push(in_data)
self.outputs.append(
executor.run(fetch_list=list(read_out_data)))
feed_queue.close()
self.validate()
def validate(self):
self.assertEqual(len(self.inputs), len(self.outputs))
for in_data_list, out_data_list in zip(self.inputs, self.outputs):
self.assertEqual(len(in_data_list), len(out_data_list))
in_data_list_np = [
np.array(in_lod_tensor) for in_lod_tensor in in_data_list
]
for in_data, out_data in zip(in_data_list_np, out_data_list):
self.assertTrue((in_data == out_data).all())
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
import paddle.fluid.core as core
import numpy as np
import threading
import multiprocessing
import os
def as_tensor(np_array_or_tensor, place=None):
if isinstance(np_array_or_tensor, fluid.LoDTensor):
return np_array_or_tensor
if place is None:
place = fluid.CPUPlace()
tensor = fluid.LoDTensor()
tensor.set(np_array_or_tensor, place)
return tensor
def as_numpy(tensor_or_numpy):
return tensor_or_numpy if isinstance(
tensor_or_numpy, np.ndarray) else np.array(tensor_or_numpy)
def feed_data(feed_queue, reader):
data_generator = reader()
while True:
data = next(data_generator, None)
if data is None or not feed_queue.push(data):
break
def simple_fc_net(in_size,
class_num,
hidden_sizes,
batch_size,
queue_capacity,
use_double_buffer=False):
reader, feed_queue = fluid.layers.py_reader(
capacity=queue_capacity,
shapes=[[-1, in_size], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
reader = fluid.layers.batch(reader, batch_size=batch_size)
if use_double_buffer:
reader = fluid.layers.double_buffer(reader)
in_data, label = fluid.layers.read_file(reader)
hidden = in_data
for hidden_size in hidden_sizes:
hidden = fluid.layers.fc(
hidden,
size=hidden_size,
act='tanh',
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=1.0)))
predict_label = fluid.layers.fc(hidden, size=class_num, act='softmax')
loss = fluid.layers.mean(
fluid.layers.cross_entropy(
input=predict_label, label=label))
optimizer = fluid.optimizer.Adam()
optimizer.minimize(loss)
return in_data, label, loss, optimizer, feed_queue
class TestPyReaderUsingExecutor(unittest.TestCase):
def setUp(self):
self.in_size = 1000
self.hidden_sizes = [50, 30, 20]
self.class_num = 10
self.batch_size = 32
self.iterations = 10
self.queue_capacity = 50
def test(self):
for use_cuda in [False, True]:
for use_parallel_executor in [False, True]:
for use_double_buffer in [False, True]:
print('Test Parameters:'),
print({
'use_cuda': use_cuda,
'use_parallel_executor': use_parallel_executor,
'use_double_buffer': use_double_buffer
})
self.main(use_cuda, use_parallel_executor,
use_double_buffer)
def random_reader(self):
def reader():
self.inputs = []
cnt = 0
while True:
tensors = fluid.LoDTensorArray()
in_data = np.random.uniform(
low=0, high=1, size=(1, self.in_size)).astype('float32')
tensors.append(as_tensor(in_data))
label = np.random.random_integers(
low=0, high=self.class_num - 1, size=(1, 1)).astype('int64')
tensors.append(as_tensor(label))
if cnt < self.iterations * self.batch_size * self.batch_size_times:
if cnt % (self.batch_size * self.batch_size_times) == 0:
self.inputs.append([in_data, label])
else:
self.inputs[-1][0] = np.concatenate(
(self.inputs[-1][0], in_data), axis=0)
self.inputs[-1][1] = np.concatenate(
(self.inputs[-1][1], label), axis=0)
elif not self.use_double_buffer:
break
yield tensors
cnt += 1
yield None
return reader
def main(self,
use_cuda=True,
use_parallel_executor=False,
use_double_buffer=False):
assert not use_cuda or use_cuda and core.is_compiled_with_cuda()
self.use_cuda = use_cuda
self.use_parallel_executor = use_parallel_executor
self.use_double_buffer = use_double_buffer
startup_program = fluid.Program()
main_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
in_data, label, loss, optimizer, feed_queue = simple_fc_net(
in_size=self.in_size,
class_num=self.class_num,
hidden_sizes=self.hidden_sizes,
batch_size=self.batch_size,
queue_capacity=self.queue_capacity,
use_double_buffer=self.use_double_buffer)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
startup_exe = fluid.Executor(place)
startup_exe.run(startup_program)
if use_parallel_executor:
main_exe = fluid.ParallelExecutor(use_cuda, loss_name=loss.name)
if use_cuda:
self.batch_size_times = core.get_cuda_device_count()
else:
self.batch_size_times = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
else:
main_exe = startup_exe
self.batch_size_times = 1
reader = self.random_reader()
thread = threading.Thread(
target=feed_data, args=(feed_queue, reader))
thread.start()
self.outputs = []
for _ in range(self.iterations):
fetches = main_exe.run(fetch_list=[in_data.name, label.name])
fetches = [as_numpy(fetch) for fetch in fetches]
self.outputs.append(fetches)
feed_queue.close()
self.validate()
def validate(self):
self.assertEqual(len(self.inputs), len(self.outputs))
for batch_in, batch_out in zip(self.inputs, self.outputs):
self.assertEqual(len(batch_in), len(batch_out))
if self.use_parallel_executor and not self.use_double_buffer:
self.validate_unordered_batch(batch_in, batch_out)
else:
for in_data, out_data in zip(batch_in, batch_out):
self.assertEqual(in_data.shape, out_data.shape)
if not self.use_parallel_executor:
self.assertTrue((in_data == out_data).all())
def validate_unordered_batch(self, batch_in, batch_out):
out_index_left_set = set(range(self.batch_size * self.batch_size_times))
mapping_num = 0
for i in range(self.batch_size * self.batch_size_times):
for j in out_index_left_set:
flag = True
for k in range(len(batch_in)):
in_data = batch_in[k][i]
out_data = batch_out[k][j]
if (in_data != out_data).any():
flag = False
break
if flag:
out_index_left_set.remove(j)
mapping_num += 1
break
self.assertEqual(mapping_num, self.batch_size * self.batch_size_times)
if __name__ == '__main__':
unittest.main()
...@@ -40,12 +40,12 @@ class TestSelectedRows(unittest.TestCase): ...@@ -40,12 +40,12 @@ class TestSelectedRows(unittest.TestCase):
# compare tensor # compare tensor
self.assertAlmostEqual(2.0, self.assertAlmostEqual(2.0,
selected_rows.get_tensor().get_float_element(0)) selected_rows.get_tensor()._get_float_element(0))
self.assertAlmostEqual(1.0, self.assertAlmostEqual(1.0,
selected_rows.get_tensor().get_float_element(1)) selected_rows.get_tensor()._get_float_element(1))
self.assertAlmostEqual( self.assertAlmostEqual(
4.0, 4.0,
selected_rows.get_tensor().get_float_element(2 * row_numel + 8)) selected_rows.get_tensor()._get_float_element(2 * row_numel + 8))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -45,8 +45,8 @@ class TestShrinkRNNMemoryBase(unittest.TestCase): ...@@ -45,8 +45,8 @@ class TestShrinkRNNMemoryBase(unittest.TestCase):
def sum_lodtensor(self, tensor): def sum_lodtensor(self, tensor):
sum_res = 0.0 sum_res = 0.0
for i in xrange(np.product(tensor.get_dims())): for i in xrange(np.product(tensor.shape())):
sum_res += tensor.get_float_element(i) sum_res += tensor._get_float_element(i)
return sum_res return sum_res
......
...@@ -25,8 +25,8 @@ class TestTensor(unittest.TestCase): ...@@ -25,8 +25,8 @@ class TestTensor(unittest.TestCase):
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([1000, 784]) tensor._set_dims([1000, 784])
tensor.alloc_int(place) tensor._alloc_int(place)
tensor_array = numpy.array(tensor) tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape) self.assertEqual((1000, 784), tensor_array.shape)
tensor_array[3, 9] = 1 tensor_array[3, 9] = 1
...@@ -44,8 +44,8 @@ class TestTensor(unittest.TestCase): ...@@ -44,8 +44,8 @@ class TestTensor(unittest.TestCase):
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([1000, 784]) tensor._set_dims([1000, 784])
tensor.alloc_float(place) tensor._alloc_float(place)
tensor_array = numpy.array(tensor) tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape) self.assertEqual((1000, 784), tensor_array.shape)
...@@ -63,8 +63,8 @@ class TestTensor(unittest.TestCase): ...@@ -63,8 +63,8 @@ class TestTensor(unittest.TestCase):
var_lod = scope.var("test_lod_tensor") var_lod = scope.var("test_lod_tensor")
lod_tensor = var_lod.get_tensor() lod_tensor = var_lod.get_tensor()
lod_tensor.set_dims([4, 4, 6]) lod_tensor._set_dims([4, 4, 6])
lod_tensor.alloc_int(place) lod_tensor._alloc_int(place)
array = numpy.array(lod_tensor) array = numpy.array(lod_tensor)
array[0, 0, 0] = 3 array[0, 0, 0] = 3
array[3, 3, 5] = 10 array[3, 3, 5] = 10
...@@ -84,8 +84,8 @@ class TestTensor(unittest.TestCase): ...@@ -84,8 +84,8 @@ class TestTensor(unittest.TestCase):
var_lod = scope.var("test_lod_tensor") var_lod = scope.var("test_lod_tensor")
lod_tensor = var_lod.get_tensor() lod_tensor = var_lod.get_tensor()
lod_tensor.set_dims([5, 2, 3, 4]) lod_tensor._set_dims([5, 2, 3, 4])
lod_tensor.alloc_float(place) lod_tensor._alloc_float(place)
tensor_array = numpy.array(lod_tensor) tensor_array = numpy.array(lod_tensor)
self.assertEqual((5, 2, 3, 4), tensor_array.shape) self.assertEqual((5, 2, 3, 4), tensor_array.shape)
...@@ -104,14 +104,13 @@ class TestTensor(unittest.TestCase): ...@@ -104,14 +104,13 @@ class TestTensor(unittest.TestCase):
self.assertListEqual(lod_py, lod) self.assertListEqual(lod_py, lod)
def test_lod_tensor_init(self): def test_lod_tensor_init(self):
scope = core.Scope()
place = core.CPUPlace() place = core.CPUPlace()
lod_py = [[2, 1], [1, 2, 2]] lod_py = [[2, 1], [1, 2, 2]]
lod_tensor = core.LoDTensor() lod_tensor = core.LoDTensor()
lod_tensor.set_dims([5, 2, 3, 4]) lod_tensor._set_dims([5, 2, 3, 4])
lod_tensor.set_recursive_sequence_lengths(lod_py) lod_tensor.set_recursive_sequence_lengths(lod_py)
lod_tensor.alloc_float(place) lod_tensor._alloc_float(place)
tensor_array = numpy.array(lod_tensor) tensor_array = numpy.array(lod_tensor)
tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 0] = 1.0
tensor_array[0, 0, 0, 1] = 2.0 tensor_array[0, 0, 0, 1] = 2.0
...@@ -129,9 +128,9 @@ class TestTensor(unittest.TestCase): ...@@ -129,9 +128,9 @@ class TestTensor(unittest.TestCase):
lod_py = [[2, 1], [1, 2, 2]] lod_py = [[2, 1], [1, 2, 2]]
lod_tensor = core.LoDTensor() lod_tensor = core.LoDTensor()
lod_tensor.set_dims([5, 2, 3, 4]) lod_tensor._set_dims([5, 2, 3, 4])
lod_tensor.set_recursive_sequence_lengths(lod_py) lod_tensor.set_recursive_sequence_lengths(lod_py)
lod_tensor.alloc_float(place) lod_tensor._alloc_float(place)
tensor_array = numpy.array(lod_tensor) tensor_array = numpy.array(lod_tensor)
tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 0] = 1.0
tensor_array[0, 0, 0, 1] = 2.0 tensor_array[0, 0, 0, 1] = 2.0
...@@ -149,15 +148,15 @@ class TestTensor(unittest.TestCase): ...@@ -149,15 +148,15 @@ class TestTensor(unittest.TestCase):
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([0, 1]) tensor._set_dims([0, 1])
tensor.alloc_float(place) tensor._alloc_float(place)
tensor_array = numpy.array(tensor) tensor_array = numpy.array(tensor)
self.assertEqual((0, 1), tensor_array.shape) self.assertEqual((0, 1), tensor_array.shape)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
gpu_place = core.CUDAPlace(0) gpu_place = core.CUDAPlace(0)
tensor.alloc_float(gpu_place) tensor._alloc_float(gpu_place)
tensor_array = numpy.array(tensor) tensor_array = numpy.array(tensor)
self.assertEqual((0, 1), tensor_array.shape) self.assertEqual((0, 1), tensor_array.shape)
......
...@@ -75,7 +75,7 @@ def set_input(scope, op, inputs, place): ...@@ -75,7 +75,7 @@ def set_input(scope, op, inputs, place):
if isinstance(var, tuple): if isinstance(var, tuple):
tensor.set_recursive_sequence_lengths(var[1]) tensor.set_recursive_sequence_lengths(var[1])
var = var[0] var = var[0]
tensor.set_dims(var.shape) tensor._set_dims(var.shape)
tensor.set(var, place) tensor.set(var, place)
elif isinstance(var, float): elif isinstance(var, float):
scope.find_var(var_name).set_float(var) scope.find_var(var_name).set_float(var)
......
...@@ -377,11 +377,6 @@ class DistributeTranspiler(object): ...@@ -377,11 +377,6 @@ class DistributeTranspiler(object):
# append it into the sub program. # append it into the sub program.
global_ops = [] global_ops = []
# HACK: optimization global ops only used to scale beta1 and beta2
# replace it with dependency engine.
for op in self.optimize_ops:
if self._is_adam_connected_op(op):
global_ops.append(op)
def __append_optimize_op__(op, block, grad_to_block_id, merged_var, def __append_optimize_op__(op, block, grad_to_block_id, merged_var,
lr_ops): lr_ops):
...@@ -1289,26 +1284,8 @@ class DistributeTranspiler(object): ...@@ -1289,26 +1284,8 @@ class DistributeTranspiler(object):
# If one op's input is another op's output or # If one op's input is another op's output or
# one op's output is another op's input, we say # one op's output is another op's input, we say
# the two operator is connected. # the two operator is connected.
def _append_inname_remove_beta(varname_list): if set(op1.desc.output_arg_names()) & set(op2.desc.input_arg_names()) or \
op_input_names = [] set(op1.desc.input_arg_names()) & set(op2.desc.output_arg_names()):
for in_name in varname_list:
# HACK: remove beta1 and beta2 to avoid let all
# ops connected.
if in_name.startswith("beta2_pow_acc") or \
in_name.startswith("beta1_pow_acc"):
continue
else:
op_input_names.append(in_name)
return op_input_names
op1_input_names = _append_inname_remove_beta(op1.desc.input_arg_names())
op1_output_names = op1.desc.output_arg_names()
op2_input_names = _append_inname_remove_beta(op2.desc.input_arg_names())
op2_output_names = op2.desc.output_arg_names()
if set(op1_output_names) & set(op2_input_names) or \
set(op1_input_names) & set(op2_output_names):
return True return True
return False return False
...@@ -1413,7 +1390,7 @@ class DistributeTranspiler(object): ...@@ -1413,7 +1390,7 @@ class DistributeTranspiler(object):
def _get_optimize_pass(self): def _get_optimize_pass(self):
""" """
Get optimizer operators, paramters and gradients from origin_program Get optimizer operators, parameters and gradients from origin_program
Returns: Returns:
opt_ops (list): optimize operators. opt_ops (list): optimize operators.
params_grads (dict): paramter->gradient. params_grads (dict): paramter->gradient.
...@@ -1436,20 +1413,6 @@ class DistributeTranspiler(object): ...@@ -1436,20 +1413,6 @@ class DistributeTranspiler(object):
origin_var_dict[param_name], origin_var_dict[param_name],
origin_var_dict[input_name] origin_var_dict[input_name]
]) ])
elif self._is_adam_connected_op(op):
opt_ops.append(op)
else: else:
pass pass
return opt_ops, params_grads return opt_ops, params_grads
def _is_adam_connected_op(self, op):
"""
A hack function to determinate whether the input operator
is connected to optimize operator.
"""
if op.type == "scale":
for in_name in op.input_arg_names:
if in_name.startswith("beta1_pow_acc") or \
in_name.startswith("beta2_pow_acc"):
return True
return False
...@@ -42,12 +42,12 @@ def get_patch(): ...@@ -42,12 +42,12 @@ def get_patch():
def is_taged(): def is_taged():
try: try:
cmd = ['git', 'describe', '--exact-match', '--tags'] cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null']
git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip()
except: except:
return False return False
if git_tag.replace('v', '') == '@PADDLE_VERSION@': if str(git_tag).replace('v', '') == '@PADDLE_VERSION@':
return True return True
else: else:
return False return False
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册