diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b53a6f43fbd1f23e69d23ad0fcc54d5c25d352a3..3a9027713afb5287c7addf8be745acfd185104ee 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -95,7 +95,7 @@ ParallelExecutor::ParallelExecutor( } if (member_->local_scopes_.size() != 1 && local_scopes.empty()) { - BCastParamsToGPUs(bcast_vars); + BCastParamsToDevs(bcast_vars); } // Startup Program has been run. All local scopes has correct parameters. @@ -131,7 +131,7 @@ ParallelExecutor::ParallelExecutor( member_->places_, std::move(member_->executor_))); } -void ParallelExecutor::BCastParamsToGPUs( +void ParallelExecutor::BCastParamsToDevs( const std::unordered_set &vars) const { // the the initializing bcast, all vars would be bcast from device(0), // otherwise @@ -202,7 +202,11 @@ void ParallelExecutor::BCastParamsToGPUs( #endif } else { platform::CPUPlace cpu; - for (size_t i = 1; i < member_->places_.size(); ++i) { + for (size_t i = 0; i < member_->places_.size(); ++i) { + if ((initializing && i == 0) || + (!initializing && static_cast(i) == var_dev_id)) + continue; + auto local_scope = member_->local_scopes_[i]; auto *t = local_scope->Var(var)->GetMutable(); t->Resize(dims); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 058f83f07c26224e3180d140630c08a24c40cd80..6985b6540690c6218bcee51ba0e69f3d34812bfc 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -66,7 +66,7 @@ class ParallelExecutor { void Run(const std::vector &fetch_tensors, const std::string &fetched_var_name); - void BCastParamsToGPUs(const std::unordered_set &vars) const; + void BCastParamsToDevs(const std::unordered_set &vars) const; private: ParallelExecutorPrivate *member_; diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h index 6c4432cb7a70853e19460b1980d621c02caed970..a8d04feb42456607159bcbede0574fe90dfe995c 100644 --- a/paddle/fluid/framework/reader.h +++ b/paddle/fluid/framework/reader.h @@ -29,11 +29,11 @@ enum ReaderStatus { kRunning, kStopped }; class ReaderBase { public: - void ReadNext(std::vector* out); + virtual void ReadNext(std::vector* out); - void Shutdown(); + virtual void Shutdown(); - void Start(); + virtual void Start(); // Return the readers which are the end of decorating chain. Basically // they are readers just before read op. @@ -42,7 +42,7 @@ class ReaderBase { virtual ~ReaderBase(); protected: - virtual void ReadNextImpl(std::vector* out) = 0; + virtual void ReadNextImpl(std::vector* out) {} virtual void ShutdownImpl() {} diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h index db8cf3b605c9175eeda4548b1e7c8203f26c5d89..28cc91a5ed5d74994e5b960a0a4dd3c6a5e6cdcc 100644 --- a/paddle/fluid/operators/reader/blocking_queue.h +++ b/paddle/fluid/operators/reader/blocking_queue.h @@ -81,6 +81,15 @@ class BlockingQueue { } } + void ReOpen() { + std::lock_guard lock(mutex_); + closed_ = false; + std::deque new_deque; + queue_.swap(new_deque); + send_cv_.notify_all(); + receive_cv_.notify_all(); + } + void Close() { std::lock_guard lock(mutex_); closed_ = true; diff --git a/paddle/fluid/operators/reader/create_py_reader_op.cc b/paddle/fluid/operators/reader/create_py_reader_op.cc index d41124279930e92138e7e6a5ab045659a415eb6d..833776f56eef0ffb2ae5e963919f0482bcd511b8 100644 --- a/paddle/fluid/operators/reader/create_py_reader_op.cc +++ b/paddle/fluid/operators/reader/create_py_reader_op.cc @@ -27,19 +27,17 @@ class PyReader : public framework::FileReader { queue_ = queue; } - void ReadNextImpl(std::vector* out) override { + void ReadNext(std::vector* out) override { bool success; *out = queue_->Pop(&success); if (!success) out->clear(); } - private: - void ShutdownImpl() override { /* TODO */ - } + void Shutdown() override { queue_->Close(); } - void StartImpl() override { /* TODO */ - } + void Start() override { queue_->ReOpen(); } + private: std::shared_ptr queue_; }; diff --git a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h index 30d962ba10a954a837f9771d21cedf0feb643439..311a429f9c307f3913a1ffe5dfb7d84119c9711e 100644 --- a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h +++ b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h @@ -58,12 +58,15 @@ class LoDTensorBlockingQueue { inline size_t Size() const { return queue_.Size(); } - inline void Close() { return queue_.Close(); } + inline void ReOpen() { queue_.ReOpen(); } + + inline void Close() { queue_.Close(); } inline bool IsClosed() const { return queue_.IsClosed(); } private: - void CheckDims(const std::vector& lod_tensor_vec) { + void CheckDims( + const std::vector& lod_tensor_vec) const { PADDLE_ENFORCE(dims_.size() == lod_tensor_vec.size(), "Expect input size is %d but found %s", dims_.size(), lod_tensor_vec.size()); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 96ab5d457be2da5ed0a6b9d93b5510d7399bf20e..d8dc421bed711cfc1a149592c24b11c4ef115ec9 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include #include +#include #include // NOLINT // for call_once #include #include @@ -86,37 +87,37 @@ PYBIND11_PLUGIN(core) { py::class_(m, "Tensor", py::buffer_protocol()) .def_buffer( [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) - .def("get_dims", + .def("_get_dims", [](const Tensor &self) { return vectorize(self.dims()); }) - .def("set_dims", + .def("_set_dims", [](Tensor &self, const std::vector &dim) { self.Resize(make_ddim(dim)); }) - .def("set_layout", + .def("_set_layout", [](Tensor &self, const std::string &layout) { self.set_layout(StringToDataLayout(layout)); }) - .def("alloc_float", + .def("_alloc_float", [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) - .def("alloc_float", + .def("_alloc_float", [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) - .def("alloc_int", + .def("_alloc_int", [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) - .def("alloc_int", + .def("_alloc_int", [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) - .def("alloc_int", + .def("_alloc_int", [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) - .def("alloc_float", + .def("_alloc_float", [](Tensor &self, paddle::platform::CUDAPinnedPlace &place) { self.mutable_data(place); }) @@ -144,11 +145,11 @@ PYBIND11_PLUGIN(core) { .def("set", PyCUDAPinnedTensorSetFromArray) #endif .def("shape", [](Tensor &self) { return vectorize(self.dims()); }) - .def("set_float_element", TensorSetElement) - .def("get_float_element", TensorGetElement) - .def("set_double_element", TensorSetElement) - .def("get_double_element", TensorGetElement) - .def("dtype", [](Tensor &self) { return ToDataType(self.type()); }); + .def("_set_float_element", TensorSetElement) + .def("_get_float_element", TensorGetElement) + .def("_set_double_element", TensorSetElement) + .def("_get_double_element", TensorGetElement) + .def("_dtype", [](Tensor &self) { return ToDataType(self.type()); }); py::class_(m, "LoDTensor") .def_buffer( @@ -310,7 +311,8 @@ All parameter, weight, gradient are variables in Paddle. ::paddle::operators::reader::LoDTensorBlockingQueue; using LoDTensorBlockingQueueHolder = ::paddle::operators::reader::LoDTensorBlockingQueueHolder; - py::class_(m, "LoDTensorBlockingQueue", "") + py::class_>( + m, "LoDTensorBlockingQueue", "") .def("push", [](LoDTensorBlockingQueue &self, const std::vector &lod_tensor_vec) { @@ -325,7 +327,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("init_lod_tensor_blocking_queue", [](Variable &var, size_t capacity, const std::vector> &shapes) - -> LoDTensorBlockingQueue * { + -> std::shared_ptr { std::vector dims(shapes.size()); std::transform(shapes.begin(), shapes.end(), dims.begin(), [](const std::vector &shape) { @@ -333,9 +335,9 @@ All parameter, weight, gradient are variables in Paddle. }); auto *holder = var.GetMutable(); holder->InitOnce(capacity, dims); - return holder->GetQueue().get(); + return holder->GetQueue(); }, - py::return_value_policy::reference); + py::return_value_policy::copy); py::class_(m, "Scope", "") .def("var", @@ -543,6 +545,8 @@ All parameter, weight, gradient are variables in Paddle. }); py::class_(m, "LoDTensorArray") + .def("__init__", + [](LoDTensorArray &instance) { new (&instance) LoDTensorArray(); }) .def("__getitem__", [](LoDTensorArray &self, size_t i) { return &self.at(i); }, py::return_value_policy::reference) @@ -665,7 +669,7 @@ All parameter, weight, gradient are variables in Paddle. const std::string &, Scope *, std::vector &, const ExecutionStrategy &, const BuildStrategy &, size_t, size_t>()) - .def("bcast_params", &ParallelExecutor::BCastParamsToGPUs) + .def("bcast_params", &ParallelExecutor::BCastParamsToDevs) // NOTE: even we return a vec* to Python use reference policy. // We still cannot get local_scope from this vector, since the element // of vec will be freed by Python GC. We can only return Scope* diff --git a/paddle/legacy/capi/Arguments.cpp b/paddle/legacy/capi/Arguments.cpp index 87fac3d6c6abe37b128213d4ffd66f8c1573a910..0ce1770c76c2e145d0b2bf71332cc4593517f195 100644 --- a/paddle/legacy/capi/Arguments.cpp +++ b/paddle/legacy/capi/Arguments.cpp @@ -66,6 +66,17 @@ paddle_error paddle_arguments_get_value(paddle_arguments args, return kPD_NO_ERROR; } +PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args, + uint64_t ID, + paddle_matrix mat) { + if (args == nullptr || mat == nullptr) return kPD_NULLPTR; + auto m = paddle::capi::cast(mat); + auto a = castArg(args); + if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; + m->mat = a->args[ID].in; + return kPD_NO_ERROR; +} + paddle_error paddle_arguments_get_ids(paddle_arguments args, uint64_t ID, paddle_ivector ids) { diff --git a/paddle/legacy/capi/arguments.h b/paddle/legacy/capi/arguments.h index 69a66bb012c318bc8317c246d690a7f4baffd248..ceb64ee6aa74a8ba4b5cb9045b366dcda8f8cc90 100644 --- a/paddle/legacy/capi/arguments.h +++ b/paddle/legacy/capi/arguments.h @@ -87,6 +87,18 @@ PD_API paddle_error paddle_arguments_get_value(paddle_arguments args, uint64_t ID, paddle_matrix mat); +/** + * @brief paddle_arguments_get_prob Get the prob matrix of beam search, which + * slot ID is `ID` + * @param [in] args arguments array + * @param [in] ID array index + * @param [out] mat matrix pointer + * @return paddle_error + */ +PD_API paddle_error paddle_arguments_get_prob(paddle_arguments args, + uint64_t ID, + paddle_matrix mat); + /** * @brief PDArgsGetIds Get the integer vector of one argument in array, which * index is `ID`. diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index b364fbcc0f5d614ff93b28d810409288d4fc2336..a06e041c1e8aaa8897ac77f2ec1275824849e7ef 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -44,7 +44,7 @@ import metrics import transpiler from param_attr import ParamAttr, WeightNormParamAttr from data_feeder import DataFeeder -from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope +from core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope from transpiler import DistributeTranspiler, InferenceTranspiler, \ memory_optimize, release_memory from concurrency import (Go, make_channel, channel_send, channel_recv, @@ -72,6 +72,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \ 'backward', 'regularizer', 'LoDTensor', + 'LoDTensorArray', 'CPUPlace', 'CUDAPlace', 'CUDAPinnedPlace', diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 18e2f3045e272fb4712391f87bffd3f367c1c744..2a8e3d410add466436524d8cc7714fce955af2b5 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -31,7 +31,7 @@ class BaseErrorClipAttr(object): def __str__(self): raise NotImplementedError() - def append_clip_op(self, block, grad_name): + def _append_clip_op(self, block, grad_name): raise NotImplementedError() @@ -67,7 +67,7 @@ class ErrorClipByValue(BaseErrorClipAttr): def __str__(self): return "ByValue, min=%f, max=%f" % (self.min, self.max) - def append_clip_op(self, block, grad_name): + def _append_clip_op(self, block, grad_name): clip_op_desc = block.desc.append_op() clip_op_desc.set_type("clip") clip_op_desc.set_input("X", [grad_name]) @@ -90,17 +90,17 @@ def error_clip_callback(block, context): "Variable's error_clip should be an instance of BaseErrorClipAttr or None." ) if error_clip is not None: - error_clip.append_clip_op(block, grad_n) + error_clip._append_clip_op(block, grad_n) class BaseGradientClipAttr(object): def __str__(self): raise NotImplementedError() - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): raise NotImplementedError() - def create_operators(self, param, grad): + def _create_operators(self, param, grad): raise NotImplementedError() @@ -108,10 +108,10 @@ class NullGradientClipAttr(BaseGradientClipAttr): def __str__(self): return "Null" - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): pass - def create_operators(self, param, grad): + def _create_operators(self, param, grad): return param, grad @@ -153,10 +153,10 @@ class GradientClipByValue(BaseGradientClipAttr): def __str__(self): return "ByValue, min=%f, max=%f" % (self.min, self.max) - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): pass - def create_operators(self, param, grad): + def _create_operators(self, param, grad): new_grad = layers.clip(x=grad, min=self.min, max=self.max) return param, new_grad @@ -199,10 +199,10 @@ class GradientClipByNorm(BaseGradientClipAttr): def __str__(self): return "ByNorm, clip_norm=%f" % self.clip_norm - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): pass - def create_operators(self, param, grad): + def _create_operators(self, param, grad): new_grad = layers.clip_by_norm(x=grad, max_norm=self.clip_norm) return param, new_grad @@ -257,7 +257,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): return "ByGlobalNorm, group_name=%s, clip_norm=%f" % (self.group_name, self.clip_norm) - def process_context(self, context, param, grad): + def _process_context(self, context, param, grad): if self.group_name not in context: context[self.group_name] = [] context[self.group_name + "_clip_value"] = self.clip_norm @@ -274,7 +274,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): self.context = context - def create_operators(self, param, grad): + def _create_operators(self, param, grad): group_scale_name = self.group_name + "_scale" if group_scale_name not in self.context: group_norm_var = layers.sums(input=self.context[self.group_name]) @@ -336,12 +336,12 @@ def append_gradient_clip_ops(param_grad): "clip attribute should be an instance of BaseGradientClipAttr" ) - clip_attr.process_context(context=context, param=p, grad=g) + clip_attr._process_context(context=context, param=p, grad=g) res = [] for p, g in param_grad: with p.block.program.optimized_guard(p): - res.append(clip_attr.create_operators(param=p, grad=g)) + res.append(clip_attr._create_operators(param=p, grad=g)) return res diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 86efd1ff51cf29485ee28b4d60ffb1439af1aad9..de752d1daeb6bc725cf6eff1bb74a786e2ad6b95 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -68,11 +68,11 @@ class LayerHelper(object): @property def param_attr(self): - return ParamAttr.to_attr(self.kwargs.get('param_attr', None)) + return ParamAttr._to_attr(self.kwargs.get('param_attr', None)) @property def bias_attr(self): - return ParamAttr.to_attr(self.kwargs.get('bias_attr', None)) + return ParamAttr._to_attr(self.kwargs.get('bias_attr', None)) def multiple_param_attr(self, length): param_attr = self.param_attr @@ -262,11 +262,11 @@ class LayerHelper(object): g_param = self.startup_program.global_block().create_parameter( dtype=dtype, shape=g_param_shape, - **g_param_attr.to_kwargs(with_initializer=False)) + **g_param_attr._to_kwargs(with_initializer=False)) v_param = self.startup_program.global_block().create_parameter( dtype=dtype, shape=v_param_shape, - **v_param_attr.to_kwargs(with_initializer=True)) + **v_param_attr._to_kwargs(with_initializer=True)) __norm_except_dim( x=v_param, out=g_param, @@ -275,9 +275,9 @@ class LayerHelper(object): # Add weight normalization to main_program g_param = self.main_program.global_block().create_parameter( - dtype=dtype, shape=g_param_shape, **g_param_attr.to_kwargs()) + dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs()) v_param = self.main_program.global_block().create_parameter( - dtype=dtype, shape=v_param_shape, **v_param_attr.to_kwargs()) + dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs()) w_param = __weight_normalize(g_param, v_param, dim=attr.dim) return w_param @@ -296,11 +296,11 @@ class LayerHelper(object): if default_initializer is None and attr.initializer is None: if is_bias: - attr.set_default_bias_initializer() + attr._set_default_bias_initializer() else: - attr.set_default_param_initializer() + attr._set_default_param_initializer() else: - attr.set_default_initializer(default_initializer) + attr._set_default_initializer(default_initializer) # If weight normalization is set, insert extra parameters and ops. # Refer to https://arxiv.org/pdf/1602.07868.pdf @@ -310,9 +310,9 @@ class LayerHelper(object): return param self.startup_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True)) + dtype=dtype, shape=shape, **attr._to_kwargs(with_initializer=True)) return self.main_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr.to_kwargs()) + dtype=dtype, shape=shape, **attr._to_kwargs()) def get_parameter(self, name): param = self.main_program.global_block().var(name) diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 977abde21f38a0d25a90bc14426fd817df2c8508..34cdac52d33000cfb87a97a1486abe7a4a583bbd 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -24,7 +24,8 @@ from layer_function_generator import generate_layer_fn, templatedoc __all__ = [ 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv', 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch', - 'double_buffer', 'random_data_generator', 'Preprocessor', 'load' + 'double_buffer', 'random_data_generator', 'py_reader', 'Preprocessor', + 'load' ] @@ -445,6 +446,88 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): return monkey_patch_reader_methods(main_prog_var) +def py_reader(capacity, shapes, dtypes, lod_levels=None): + """ + Create a reader and blocking queue for data feeding in Python + + This layer returns a Reader Variable and a BlockingQueue. + The BlockingQueue provides `push()` method to push a `LoDTensorArray` + object into the queue in Python side. In C++ side, the Reader + Variable would invoke `pop()` method of the queue to retrieve the + feeding data. The process of feeding data in Python side and fetching + data in C++ side can run in parallel. The BlockingQueue should be closed + using `close()` method when unused. + + Args: + capacity(int): The maximum capacity of the BlockingQueue. + shapes(list): List of tuples which declaring data shapes. + dtypes(list): List of strs which declaring data type. + lod_levels(list): List of ints which declaring data lod_level. + + Returns: + tuple(Variable, BlockingQueue): + A Reader Variable from which we can get feeding data. + + A BlockingQueue object for data feeding. + + Examples: + + .. code-block:: python + + reader, queue = fluid.layers.py_reader( + capacity=10, + shapes=[[-1,3,224,224], [-1,1]], + dtypes=['float32', 'int64']) + # Via the reader, we can use 'read_file' layer to get data: + image, label = fluid.layers.read_file(reader) + + # Via the blocking queue, we can feed data using threads + def feed_data(queue, feed_images, feed_labels): + for feed_image, feed_label in zip(feed_images, feed_labels): + data = core.LoDTensorArray() + data.append(feed_image) + data.append(feed_label) + queue.push(data) + + thread = threading.Thread(target=feed_data, args=(queue, feed_images, feed_labels)) + thread.start() + """ + dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] + shape_concat = [] + ranks = [] + + for shape in shapes: + shape_concat.extend(shape) + ranks.append(len(shape)) + + if lod_levels is None: + lod_levels = [0] * len(shapes) + + queue_name = unique_name('lod_tensor_blocking_queue') + var = global_scope().var(queue_name) + feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, shapes) + + startup_blk = default_startup_program().current_block() + startup_var = startup_blk.create_var(name=unique_name('create_py_reader')) + startup_blk.append_op( + type='create_py_reader', + inputs={'blocking_queue': queue_name}, + outputs={'Out': [startup_var]}, + attrs={ + 'shape_concat': shape_concat, + 'lod_levels': lod_levels, + 'ranks': ranks + }) + + startup_var.desc.set_dtypes(dtypes) + startup_var.persistable = True + + main_prog_var = _copy_reader_var_(default_main_program().current_block(), + startup_var) + + return monkey_patch_reader_methods(main_prog_var), feed_queue + + def open_files(filenames, shapes, lod_levels, diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index e2acf6d41a0085e6f741e46063b47d2ff1e769cb..214f47afa1e4e29d53b11ccc035b4d840f963591 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -123,7 +123,7 @@ class Optimizer(object): """ pass - def _finish_update(self, block): + def _finish_update(self, block, parameters): """Finish any custom updates needed before completing an optimization step @@ -132,7 +132,7 @@ class Optimizer(object): parameters: list of parameter variables for the optimizer Returns: - list of finish ops or None + None """ pass @@ -236,7 +236,8 @@ class Optimizer(object): # Get custom finish ops for subclasses # FIXME: Need to fix this once we figure out how to handle dependencies - self._finish_update(loss.block) + self._finish_update(loss.block, + [p[0] for p in parameters_and_grads]) end = len(global_block.ops) return global_block.slice_ops(start, end) @@ -486,6 +487,8 @@ class AdamOptimizer(Optimizer): """ _moment1_acc_str = "moment1" _moment2_acc_str = "moment2" + _beta1_pow_acc_str = "beta1_pow_acc" + _beta2_pow_acc_str = "beta2_pow_acc" def __init__(self, learning_rate=0.001, @@ -507,32 +510,22 @@ class AdamOptimizer(Optimizer): def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) - main_block = block.program.global_block() - # Create beta1 and beta2 power tensors - beta_shape = [1] - self._beta1_pow_acc = self.helper.create_global_variable( - name=unique_name.generate('beta1_pow_acc'), - dtype='float32' if self._dtype == None else self._dtype, - shape=beta_shape, - lod_level=0, - persistable=True) - self.helper.set_variable_initializer( - self._beta1_pow_acc, initializer=Constant(self._beta1)) - - self._beta2_pow_acc = self.helper.create_global_variable( - name=unique_name.generate('beta2_pow_acc'), - dtype='float32' if self._dtype == None else self._dtype, - shape=beta_shape, - lod_level=0, - persistable=True) - - self.helper.set_variable_initializer( - self._beta2_pow_acc, initializer=Constant(self._beta2)) - # Create accumulator tensors for first and second moments for p in parameters: self._add_accumulator(self._moment1_acc_str, p) self._add_accumulator(self._moment2_acc_str, p) + self._add_accumulator( + name=self._beta1_pow_acc_str, + param=p, + dtype='float32', + fill_value=self._beta1, + shape=[1]) + self._add_accumulator( + name=self._beta2_pow_acc_str, + param=p, + dtype='float32', + fill_value=self._beta2, + shape=[1]) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -541,6 +534,11 @@ class AdamOptimizer(Optimizer): param_and_grad[0]) moment2 = self._get_accumulator(self._moment2_acc_str, param_and_grad[0]) + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param_and_grad[0]) + beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, + param_and_grad[0]) + # create the adam optimize op adam_op = block.append_op( type=self.type, @@ -550,8 +548,8 @@ class AdamOptimizer(Optimizer): "LearningRate": self._create_param_lr(param_and_grad), "Moment1": moment1, "Moment2": moment2, - "Beta1Pow": self._beta1_pow_acc, - "Beta2Pow": self._beta2_pow_acc + "Beta1Pow": beta1_pow_acc, + "Beta2Pow": beta2_pow_acc }, outputs={ "ParamOut": param_and_grad[0], @@ -566,24 +564,28 @@ class AdamOptimizer(Optimizer): return adam_op - def _finish_update(self, block): + def _finish_update(self, block, parameters): """Update Beta1 and Beta2 Power accumulators """ assert isinstance(block, framework.Block) main_block = block.program.global_block() - scale_beta1 = main_block.append_op( - type="scale", - inputs={"X": self._beta1_pow_acc}, - outputs={"Out": self._beta1_pow_acc}, - attrs={"scale": self._beta1}) - - scale_beta2 = main_block.append_op( - type="scale", - inputs={"X": self._beta2_pow_acc}, - outputs={"Out": self._beta2_pow_acc}, - attrs={"scale": self._beta2}) - - return [scale_beta1, scale_beta2] + for param in parameters: + with param.block.program.optimized_guard(param): + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param) + beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str, + param) + main_block.append_op( + type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}) + + main_block.append_op( + type="scale", + inputs={"X": beta2_pow_acc}, + outputs={"Out": beta2_pow_acc}, + attrs={"scale": self._beta2}) class AdamaxOptimizer(Optimizer): @@ -626,6 +628,7 @@ class AdamaxOptimizer(Optimizer): """ _moment_acc_str = "moment" _inf_norm_acc_str = "inf_norm" + _beta1_pow_acc_str = "beta1_pow_acc" def __init__(self, learning_rate=0.001, @@ -645,21 +648,16 @@ class AdamaxOptimizer(Optimizer): self._epsilon = epsilon def _create_accumulators(self, block, parameters): - # Create beta1 power accumulator tensor - beta_shape = [1] - self._beta1_pow_acc = self.helper.create_global_variable( - name=unique_name.generate('beta1_pow_acc'), - dtype='float32' if self._dtype == None else self._dtype, - shape=beta_shape, - lod_level=0, - persistable=True) - self.helper.set_variable_initializer( - self._beta1_pow_acc, initializer=Constant(self._beta1)) - # Create accumulator tensors for first moment and infinity norm for p in parameters: self._add_accumulator(self._moment_acc_str, p) self._add_accumulator(self._inf_norm_acc_str, p) + self._add_accumulator( + name=self._beta1_pow_acc_str, + param=p, + dtype='float32', + fill_value=self._beta1, + shape=[1]) def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -667,6 +665,8 @@ class AdamaxOptimizer(Optimizer): moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) inf_norm = self._get_accumulator(self._inf_norm_acc_str, param_and_grad[0]) + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param_and_grad[0]) # create the adamax optimize op adamax_op = block.append_op( type=self.type, @@ -676,7 +676,7 @@ class AdamaxOptimizer(Optimizer): "LearningRate": self._create_param_lr(param_and_grad), "Moment": moment, "InfNorm": inf_norm, - "Beta1Pow": self._beta1_pow_acc + "Beta1Pow": beta1_pow_acc }, outputs={ "ParamOut": param_and_grad[0], @@ -691,18 +691,20 @@ class AdamaxOptimizer(Optimizer): return adamax_op - def _finish_update(self, block): + def _finish_update(self, block, parameters): """Update Beta1 Power accumulator """ assert isinstance(block, framework.Block) main_block = block.program.global_block() - scale_beta1 = main_block.append_op( - type="scale", - inputs={"X": self._beta1_pow_acc}, - outputs={"Out": self._beta1_pow_acc}, - attrs={"scale": self._beta1}) - - return [scale_beta1] + for param in parameters: + with param.block.program.optimized_guard(param): + beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str, + param) + main_block.append_op( + type="scale", + inputs={"X": beta1_pow_acc}, + outputs={"Out": beta1_pow_acc}, + attrs={"scale": self._beta1}) class DecayedAdagradOptimizer(Optimizer): @@ -1156,7 +1158,8 @@ class ModelAverage(Optimizer): self.params_grads.append((param, grad)) for param, grad in self.params_grads: - self._append_average_accumulate_op(param) + with param.block.program.optimized_guard(param): + self._append_average_accumulate_op(param) self.apply_program = Program() block = self.apply_program.global_block() diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 0a42b9fca8dba7a11b414990be6c04c93158864f..4a61f85ec4b5c5108ded31632af75dbbdaaaba71 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -67,7 +67,7 @@ class ParamAttr(object): self.gradient_clip = gradient_clip self.model_average = do_model_average - def set_default_initializer(self, initializer): + def _set_default_initializer(self, initializer): """ Set the default initializer, the initializer should be Constant, Uniform, Normal, Xavier, MSRA. @@ -88,7 +88,7 @@ class ParamAttr(object): self.initializer = initializer - def set_default_param_initializer(self): + def _set_default_param_initializer(self): """ Set the default initializer for the parameter with Xavier. @@ -98,9 +98,9 @@ class ParamAttr(object): Returns: None. """ - self.set_default_initializer(Xavier()) + self._set_default_initializer(Xavier()) - def set_default_bias_initializer(self): + def _set_default_bias_initializer(self): """ Set the default initializer for the bias with Constant(0.0). @@ -110,10 +110,10 @@ class ParamAttr(object): Returns: None. """ - self.set_default_initializer(Constant(0.0)) + self._set_default_initializer(Constant(0.0)) @staticmethod - def to_attr(arg): + def _to_attr(arg): """ Create ParamAttr[s]. @@ -131,7 +131,7 @@ class ParamAttr(object): if arg is None: return ParamAttr() elif isinstance(arg, list) or isinstance(arg, tuple): - return [ParamAttr.to_attr(a) for a in arg] + return [ParamAttr._to_attr(a) for a in arg] elif isinstance(arg, ParamAttr): return arg elif isinstance(arg, str) or isinstance(arg, unicode): @@ -141,11 +141,11 @@ class ParamAttr(object): elif isinstance(arg, WeightDecayRegularizer): return ParamAttr(regularizer=arg) elif isinstance(arg, bool): - return ParamAttr.to_attr(None) if arg else False + return ParamAttr._to_attr(None) if arg else False else: raise TypeError("{0} cast to ParamAttr".format(type(arg))) - def to_kwargs(self, with_initializer=False): + def _to_kwargs(self, with_initializer=False): """ Returns the attributes of this parameter. diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index dac474d5ee76590a75311d6bf2c4cb2fe85b6c40..53f35f5cc062b4da431be19e4484f316bb37be9f 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -15,10 +15,7 @@ import framework from . import core -__all__ = [ - 'append_regularization_ops', 'L1Decay', 'L2Decay', 'L1DecayRegularizer', - 'L2DecayRegularizer' -] +__all__ = ['L1Decay', 'L2Decay', 'L1DecayRegularizer', 'L2DecayRegularizer'] def append_regularization_ops(parameters_and_grads, regularization=None): diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index e056ef9952a519d6c4d580b27f1118a3a91f13af..6824ede82b74c4e9783682149db870a471c35079 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -60,8 +60,8 @@ def get_numeric_gradient(place, return np.array(sum).mean() tensor_to_check = scope.find_var(input_to_check).get_tensor() - tensor_size = product(tensor_to_check.get_dims()) - tensor_to_check_dtype = tensor_to_check.dtype() + tensor_size = product(tensor_to_check.shape()) + tensor_to_check_dtype = tensor_to_check._dtype() if tensor_to_check_dtype == core.VarDesc.VarType.FP32: tensor_to_check_dtype = np.float32 elif tensor_to_check_dtype == core.VarDesc.VarType.FP64: @@ -74,15 +74,15 @@ def get_numeric_gradient(place, def __get_elem__(tensor, i): if tensor_to_check_dtype == np.float32: - return tensor.get_float_element(i) + return tensor._get_float_element(i) else: - return tensor.get_double_element(i) + return tensor._get_double_element(i) def __set_elem__(tensor, i, e): if tensor_to_check_dtype == np.float32: - tensor.set_float_element(i, e) + tensor._set_float_element(i, e) else: - tensor.set_double_element(i, e) + tensor._set_double_element(i, e) # we only compute gradient of one element each time. # we use a for loop to compute the gradient of every element. @@ -107,7 +107,7 @@ def get_numeric_gradient(place, __set_elem__(tensor_to_check, i, origin) gradient_flat[i] = (y_pos - y_neg) / delta / 2 - return gradient_flat.reshape(tensor_to_check.get_dims()) + return gradient_flat.reshape(tensor_to_check.shape()) class OpTest(unittest.TestCase): @@ -125,7 +125,7 @@ class OpTest(unittest.TestCase): @classmethod def tearDownClass(cls): - '''Restore random seeds''' + """Restore random seeds""" np.random.set_state(cls._np_rand_state) random.setstate(cls._py_rand_state) diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index a62ee9596d0f6c58135b4a13249b638e84e63c3c..fcb2612326e74cf6417aa93f2691154c79b5e44c 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -129,7 +129,6 @@ def create_or_get_tensor(scope, var_name, var, place): if var is not None: assert isinstance(var, np.ndarray) tensor.set_recursive_sequence_lengths([]) - tensor.set_dims(var.shape) tensor.set(var, place) return tensor diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py index 92e718662dfd7998be3ede2994f160059679fa8a..31af1245720405ee067a0acf3575e3ae86372c13 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py @@ -65,10 +65,10 @@ class TestDyRnnStaticInput(unittest.TestCase): return self._lodtensor_to_ndarray(fetch_outs[0]) def _lodtensor_to_ndarray(self, lod_tensor): - dims = lod_tensor.get_dims() + dims = lod_tensor.shape() ndarray = np.zeros(shape=dims).astype('float32') for i in xrange(np.product(dims)): - ndarray.ravel()[i] = lod_tensor.get_float_element(i) + ndarray.ravel()[i] = lod_tensor._get_float_element(i) return ndarray, lod_tensor.recursive_sequence_lengths() def build_graph(self, only_forward=False): @@ -185,19 +185,19 @@ class TestDyRnnStaticInput(unittest.TestCase): actual_gradients, actual_lod = self.fetch_value(static_input_grad) - static_input_shape = self.static_input_tensor.get_dims() + static_input_shape = self.static_input_tensor.shape() numeric_gradients = np.zeros(shape=static_input_shape).astype('float32') # calculate numeric gradients tensor_size = np.product(static_input_shape) for i in xrange(tensor_size): - origin = self.static_input_tensor.get_float_element(i) + origin = self.static_input_tensor._get_float_element(i) x_pos = origin + self._delta - self.static_input_tensor.set_float_element(i, x_pos) + self.static_input_tensor._set_float_element(i, x_pos) y_pos = self.fetch_value(loss)[0][0] x_neg = origin - self._delta - self.static_input_tensor.set_float_element(i, x_neg) + self.static_input_tensor._set_float_element(i, x_neg) y_neg = self.fetch_value(loss)[0][0] - self.static_input_tensor.set_float_element(i, origin) + self.static_input_tensor._set_float_element(i, origin) numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2 self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001)) self.assertTrue( diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 43385691bb3960004b5b69a1c55e41dd4252fa71..18921d727f94a85b69259c07273f09c3e19390c6 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -287,7 +287,7 @@ class TestAdamOptimizer(unittest.TestCase): # Check accumulators accumulators = adam_optimizer.get_accumulators() - self.assertEqual(len(accumulators), 2) + self.assertEqual(len(accumulators), 4) self.assertTrue(adam_optimizer.get_moment1_str() in accumulators) self.assertTrue(adam_optimizer.get_moment2_str() in accumulators) moment1_acc = accumulators[adam_optimizer.get_moment1_str()] @@ -354,7 +354,7 @@ class TestAdamaxOptimizer(unittest.TestCase): # Check accumulators accumulators = adamax_optimizer.get_accumulators() - self.assertEqual(len(accumulators), 2) + self.assertEqual(len(accumulators), 3) self.assertTrue(adamax_optimizer.get_moment_str() in accumulators) self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators) moment_acc = accumulators[adamax_optimizer.get_moment_str()] diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py new file mode 100644 index 0000000000000000000000000000000000000000..05715464848d835684dd3cf0e99e5d4dd482e0b6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_py_reader_push_pop.py @@ -0,0 +1,99 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import numpy as np +from threading import Thread + + +def feed_data(feed_queue, inputs): + for in_data in inputs: + feed_queue.push(in_data) + + +class TestPyReader(unittest.TestCase): + def setUp(self): + self.capacity = 10 + self.batch_size_min = 10 + self.batch_size_max = 20 + self.shapes = [(-1, 3, 2, 1), (-1, 1)] + self.lod_levels = [0, 0] + self.dtypes = ['float32', 'int64'] + self.iterations = 20 + + def test_single_thread_main(self): + self.main(use_thread=False) + + def test_multiple_thread_main(self): + self.main(use_thread=True) + + def main(self, use_thread=False): + with fluid.program_guard(fluid.Program(), fluid.Program()): + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + executor = fluid.Executor(place) + + data_file, feed_queue = fluid.layers.py_reader( + capacity=self.capacity, + dtypes=self.dtypes, + lod_levels=self.lod_levels, + shapes=self.shapes) + + read_out_data = fluid.layers.read_file(data_file) + self.inputs = [] + + for i in range(self.iterations): + in_data = fluid.LoDTensorArray() + batch_size = np.random.random_integers(self.batch_size_min, + self.batch_size_max) + for shape, dtype in zip(self.shapes, self.dtypes): + next_data = np.random.uniform( + low=0, high=1000, + size=(batch_size, ) + shape[1:]).astype(dtype) + in_data.append(executor.as_lodtensor(next_data)) + + self.inputs.append(in_data) + + executor.run(fluid.default_startup_program()) + self.outputs = [] + if use_thread: + thread = Thread( + target=feed_data, args=(feed_queue, self.inputs)) + thread.start() + for in_data in self.inputs: + self.outputs.append( + executor.run(fetch_list=list(read_out_data))) + else: + for in_data in self.inputs: + feed_queue.push(in_data) + self.outputs.append( + executor.run(fetch_list=list(read_out_data))) + + feed_queue.close() + self.validate() + + def validate(self): + self.assertEqual(len(self.inputs), len(self.outputs)) + for in_data_list, out_data_list in zip(self.inputs, self.outputs): + self.assertEqual(len(in_data_list), len(out_data_list)) + in_data_list_np = [ + np.array(in_lod_tensor) for in_lod_tensor in in_data_list + ] + for in_data, out_data in zip(in_data_list_np, out_data_list): + self.assertTrue((in_data == out_data).all()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py new file mode 100644 index 0000000000000000000000000000000000000000..9a5b69eea46e74deeba87aefae4afac84c7745f1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py @@ -0,0 +1,224 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import paddle.fluid.core as core +import numpy as np +import threading +import multiprocessing +import os + + +def as_tensor(np_array_or_tensor, place=None): + if isinstance(np_array_or_tensor, fluid.LoDTensor): + return np_array_or_tensor + + if place is None: + place = fluid.CPUPlace() + + tensor = fluid.LoDTensor() + tensor.set(np_array_or_tensor, place) + return tensor + + +def as_numpy(tensor_or_numpy): + return tensor_or_numpy if isinstance( + tensor_or_numpy, np.ndarray) else np.array(tensor_or_numpy) + + +def feed_data(feed_queue, reader): + data_generator = reader() + while True: + data = next(data_generator, None) + if data is None or not feed_queue.push(data): + break + + +def simple_fc_net(in_size, + class_num, + hidden_sizes, + batch_size, + queue_capacity, + use_double_buffer=False): + reader, feed_queue = fluid.layers.py_reader( + capacity=queue_capacity, + shapes=[[-1, in_size], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + reader = fluid.layers.batch(reader, batch_size=batch_size) + if use_double_buffer: + reader = fluid.layers.double_buffer(reader) + + in_data, label = fluid.layers.read_file(reader) + + hidden = in_data + for hidden_size in hidden_sizes: + hidden = fluid.layers.fc( + hidden, + size=hidden_size, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + + predict_label = fluid.layers.fc(hidden, size=class_num, act='softmax') + loss = fluid.layers.mean( + fluid.layers.cross_entropy( + input=predict_label, label=label)) + + optimizer = fluid.optimizer.Adam() + optimizer.minimize(loss) + return in_data, label, loss, optimizer, feed_queue + + +class TestPyReaderUsingExecutor(unittest.TestCase): + def setUp(self): + self.in_size = 1000 + self.hidden_sizes = [50, 30, 20] + self.class_num = 10 + self.batch_size = 32 + self.iterations = 10 + self.queue_capacity = 50 + + def test(self): + for use_cuda in [False, True]: + for use_parallel_executor in [False, True]: + for use_double_buffer in [False, True]: + print('Test Parameters:'), + print({ + 'use_cuda': use_cuda, + 'use_parallel_executor': use_parallel_executor, + 'use_double_buffer': use_double_buffer + }) + self.main(use_cuda, use_parallel_executor, + use_double_buffer) + + def random_reader(self): + def reader(): + self.inputs = [] + cnt = 0 + while True: + tensors = fluid.LoDTensorArray() + in_data = np.random.uniform( + low=0, high=1, size=(1, self.in_size)).astype('float32') + tensors.append(as_tensor(in_data)) + label = np.random.random_integers( + low=0, high=self.class_num - 1, size=(1, 1)).astype('int64') + tensors.append(as_tensor(label)) + + if cnt < self.iterations * self.batch_size * self.batch_size_times: + if cnt % (self.batch_size * self.batch_size_times) == 0: + self.inputs.append([in_data, label]) + else: + self.inputs[-1][0] = np.concatenate( + (self.inputs[-1][0], in_data), axis=0) + self.inputs[-1][1] = np.concatenate( + (self.inputs[-1][1], label), axis=0) + elif not self.use_double_buffer: + break + + yield tensors + cnt += 1 + + yield None + + return reader + + def main(self, + use_cuda=True, + use_parallel_executor=False, + use_double_buffer=False): + assert not use_cuda or use_cuda and core.is_compiled_with_cuda() + + self.use_cuda = use_cuda + self.use_parallel_executor = use_parallel_executor + self.use_double_buffer = use_double_buffer + + startup_program = fluid.Program() + main_program = fluid.Program() + + with fluid.program_guard(main_program, startup_program): + in_data, label, loss, optimizer, feed_queue = simple_fc_net( + in_size=self.in_size, + class_num=self.class_num, + hidden_sizes=self.hidden_sizes, + batch_size=self.batch_size, + queue_capacity=self.queue_capacity, + use_double_buffer=self.use_double_buffer) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + startup_exe = fluid.Executor(place) + startup_exe.run(startup_program) + + if use_parallel_executor: + main_exe = fluid.ParallelExecutor(use_cuda, loss_name=loss.name) + if use_cuda: + self.batch_size_times = core.get_cuda_device_count() + else: + self.batch_size_times = int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + else: + main_exe = startup_exe + self.batch_size_times = 1 + + reader = self.random_reader() + thread = threading.Thread( + target=feed_data, args=(feed_queue, reader)) + thread.start() + + self.outputs = [] + for _ in range(self.iterations): + fetches = main_exe.run(fetch_list=[in_data.name, label.name]) + fetches = [as_numpy(fetch) for fetch in fetches] + self.outputs.append(fetches) + + feed_queue.close() + self.validate() + + def validate(self): + self.assertEqual(len(self.inputs), len(self.outputs)) + for batch_in, batch_out in zip(self.inputs, self.outputs): + self.assertEqual(len(batch_in), len(batch_out)) + if self.use_parallel_executor and not self.use_double_buffer: + self.validate_unordered_batch(batch_in, batch_out) + else: + for in_data, out_data in zip(batch_in, batch_out): + self.assertEqual(in_data.shape, out_data.shape) + if not self.use_parallel_executor: + self.assertTrue((in_data == out_data).all()) + + def validate_unordered_batch(self, batch_in, batch_out): + out_index_left_set = set(range(self.batch_size * self.batch_size_times)) + mapping_num = 0 + for i in range(self.batch_size * self.batch_size_times): + for j in out_index_left_set: + flag = True + for k in range(len(batch_in)): + in_data = batch_in[k][i] + out_data = batch_out[k][j] + if (in_data != out_data).any(): + flag = False + break + + if flag: + out_index_left_set.remove(j) + mapping_num += 1 + break + + self.assertEqual(mapping_num, self.batch_size * self.batch_size_times) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_selected_rows.py b/python/paddle/fluid/tests/unittests/test_selected_rows.py index 3d7b86787fbf0a855bcd86b8a873c9134cb1d5cc..f504a06ffff8cb636498652554fca05e22bb905d 100644 --- a/python/paddle/fluid/tests/unittests/test_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_selected_rows.py @@ -40,12 +40,12 @@ class TestSelectedRows(unittest.TestCase): # compare tensor self.assertAlmostEqual(2.0, - selected_rows.get_tensor().get_float_element(0)) + selected_rows.get_tensor()._get_float_element(0)) self.assertAlmostEqual(1.0, - selected_rows.get_tensor().get_float_element(1)) + selected_rows.get_tensor()._get_float_element(1)) self.assertAlmostEqual( 4.0, - selected_rows.get_tensor().get_float_element(2 * row_numel + 8)) + selected_rows.get_tensor()._get_float_element(2 * row_numel + 8)) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py index b779f0fb014bbba62927754ea6f36828a32e6c0a..24bc2cbaf86e8ed2c6a359c4c4d9a1e1507df746 100644 --- a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py +++ b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py @@ -45,8 +45,8 @@ class TestShrinkRNNMemoryBase(unittest.TestCase): def sum_lodtensor(self, tensor): sum_res = 0.0 - for i in xrange(np.product(tensor.get_dims())): - sum_res += tensor.get_float_element(i) + for i in xrange(np.product(tensor.shape())): + sum_res += tensor._get_float_element(i) return sum_res diff --git a/python/paddle/fluid/tests/unittests/test_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor.py index f17edd3025b17549892bbd47935a1d2452cefac3..5ccc876ae8e6e20f76c77c1892f4de59d72bffc8 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor.py @@ -25,8 +25,8 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() - tensor.set_dims([1000, 784]) - tensor.alloc_int(place) + tensor._set_dims([1000, 784]) + tensor._alloc_int(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) tensor_array[3, 9] = 1 @@ -44,8 +44,8 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() - tensor.set_dims([1000, 784]) - tensor.alloc_float(place) + tensor._set_dims([1000, 784]) + tensor._alloc_float(place) tensor_array = numpy.array(tensor) self.assertEqual((1000, 784), tensor_array.shape) @@ -63,8 +63,8 @@ class TestTensor(unittest.TestCase): var_lod = scope.var("test_lod_tensor") lod_tensor = var_lod.get_tensor() - lod_tensor.set_dims([4, 4, 6]) - lod_tensor.alloc_int(place) + lod_tensor._set_dims([4, 4, 6]) + lod_tensor._alloc_int(place) array = numpy.array(lod_tensor) array[0, 0, 0] = 3 array[3, 3, 5] = 10 @@ -84,8 +84,8 @@ class TestTensor(unittest.TestCase): var_lod = scope.var("test_lod_tensor") lod_tensor = var_lod.get_tensor() - lod_tensor.set_dims([5, 2, 3, 4]) - lod_tensor.alloc_float(place) + lod_tensor._set_dims([5, 2, 3, 4]) + lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) self.assertEqual((5, 2, 3, 4), tensor_array.shape) @@ -104,14 +104,13 @@ class TestTensor(unittest.TestCase): self.assertListEqual(lod_py, lod) def test_lod_tensor_init(self): - scope = core.Scope() place = core.CPUPlace() lod_py = [[2, 1], [1, 2, 2]] lod_tensor = core.LoDTensor() - lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor._set_dims([5, 2, 3, 4]) lod_tensor.set_recursive_sequence_lengths(lod_py) - lod_tensor.alloc_float(place) + lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 1] = 2.0 @@ -129,9 +128,9 @@ class TestTensor(unittest.TestCase): lod_py = [[2, 1], [1, 2, 2]] lod_tensor = core.LoDTensor() - lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor._set_dims([5, 2, 3, 4]) lod_tensor.set_recursive_sequence_lengths(lod_py) - lod_tensor.alloc_float(place) + lod_tensor._alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 1] = 2.0 @@ -149,15 +148,15 @@ class TestTensor(unittest.TestCase): tensor = var.get_tensor() - tensor.set_dims([0, 1]) - tensor.alloc_float(place) + tensor._set_dims([0, 1]) + tensor._alloc_float(place) tensor_array = numpy.array(tensor) self.assertEqual((0, 1), tensor_array.shape) if core.is_compiled_with_cuda(): gpu_place = core.CUDAPlace(0) - tensor.alloc_float(gpu_place) + tensor._alloc_float(gpu_place) tensor_array = numpy.array(tensor) self.assertEqual((0, 1), tensor_array.shape) diff --git a/python/paddle/fluid/tests/unittests/testsuite.py b/python/paddle/fluid/tests/unittests/testsuite.py index a995ee10f29a714b674fae4b31070e6ba2ca9953..55c6e54906e739ef0bc953fa5c9e9641ec575ccf 100644 --- a/python/paddle/fluid/tests/unittests/testsuite.py +++ b/python/paddle/fluid/tests/unittests/testsuite.py @@ -75,7 +75,7 @@ def set_input(scope, op, inputs, place): if isinstance(var, tuple): tensor.set_recursive_sequence_lengths(var[1]) var = var[0] - tensor.set_dims(var.shape) + tensor._set_dims(var.shape) tensor.set(var, place) elif isinstance(var, float): scope.find_var(var_name).set_float(var) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 53d6ca86a008f798af2854a154cce8b7242d2f35..121c36e477327d4d0e7b1cba1713e68ce4d06e03 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -377,11 +377,6 @@ class DistributeTranspiler(object): # append it into the sub program. global_ops = [] - # HACK: optimization global ops only used to scale beta1 and beta2 - # replace it with dependency engine. - for op in self.optimize_ops: - if self._is_adam_connected_op(op): - global_ops.append(op) def __append_optimize_op__(op, block, grad_to_block_id, merged_var, lr_ops): @@ -1289,26 +1284,8 @@ class DistributeTranspiler(object): # If one op's input is another op's output or # one op's output is another op's input, we say # the two operator is connected. - def _append_inname_remove_beta(varname_list): - op_input_names = [] - for in_name in varname_list: - # HACK: remove beta1 and beta2 to avoid let all - # ops connected. - if in_name.startswith("beta2_pow_acc") or \ - in_name.startswith("beta1_pow_acc"): - continue - else: - op_input_names.append(in_name) - return op_input_names - - op1_input_names = _append_inname_remove_beta(op1.desc.input_arg_names()) - op1_output_names = op1.desc.output_arg_names() - - op2_input_names = _append_inname_remove_beta(op2.desc.input_arg_names()) - op2_output_names = op2.desc.output_arg_names() - - if set(op1_output_names) & set(op2_input_names) or \ - set(op1_input_names) & set(op2_output_names): + if set(op1.desc.output_arg_names()) & set(op2.desc.input_arg_names()) or \ + set(op1.desc.input_arg_names()) & set(op2.desc.output_arg_names()): return True return False @@ -1413,7 +1390,7 @@ class DistributeTranspiler(object): def _get_optimize_pass(self): """ - Get optimizer operators, paramters and gradients from origin_program + Get optimizer operators, parameters and gradients from origin_program Returns: opt_ops (list): optimize operators. params_grads (dict): paramter->gradient. @@ -1436,20 +1413,6 @@ class DistributeTranspiler(object): origin_var_dict[param_name], origin_var_dict[input_name] ]) - elif self._is_adam_connected_op(op): - opt_ops.append(op) else: pass return opt_ops, params_grads - - def _is_adam_connected_op(self, op): - """ - A hack function to determinate whether the input operator - is connected to optimize operator. - """ - if op.type == "scale": - for in_name in op.input_arg_names: - if in_name.startswith("beta1_pow_acc") or \ - in_name.startswith("beta2_pow_acc"): - return True - return False diff --git a/python/setup.py.in b/python/setup.py.in index 38a3873430505936a1058359e61140dd302d3e3f..a064f36cc19dbc626dd85d76290280a01fa57215 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -42,12 +42,12 @@ def get_patch(): def is_taged(): try: - cmd = ['git', 'describe', '--exact-match', '--tags'] + cmd = ['git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null'] git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() except: return False - if git_tag.replace('v', '') == '@PADDLE_VERSION@': + if str(git_tag).replace('v', '') == '@PADDLE_VERSION@': return True else: return False