diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c7eb260aea8478f4833cb79253f4481e10b8685..e8ea828dd2a25f5f47b03e92ae86e083d4425dc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) -option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) +option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 585db019d521b1699baadfae31ef95b5059c71b4..33ef6860e1d38f4e87c4431addf43f9f8a655fc2 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -186,6 +186,11 @@ function(cc_library TARGET_NAME) add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) endif() if (cc_library_DEPS) + # Don't need link libwarpctc.so + if ("${cc_library_DEPS};" MATCHES "warpctc;") + list(REMOVE_ITEM cc_library_DEPS warpctc) + add_dependencies(${TARGET_NAME} warpctc) + endif() add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) endif() @@ -224,12 +229,18 @@ function(cc_test TARGET_NAME) if(WITH_TESTING) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) + set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) + # Support linking flags: --whole-archive (Linux) / -force_load (MacOS) + target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) + if("${cc_test_DEPS}" MATCHES "ARCHIVE_START") + list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END) + endif() add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) - add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + add_test(NAME ${TARGET_NAME} + COMMAND ${TARGET_NAME} ${cc_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() endfunction(cc_test) @@ -457,12 +468,12 @@ endfunction() function(py_test TARGET_NAME) if(WITH_TESTING) - set(options STATIC static SHARED shared) + set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS ARGS) + set(multiValueArgs SRCS DEPS ARGS ENVS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python + COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python ${py_test_ENVS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/doc/api/v2/fluid/data_feeder.rst b/doc/api/v2/fluid/data_feeder.rst index 0fa78f7dfb04c13be7eb83b7fd35cb03f2f4a7fa..a591c7334fd31c98a94b50a4344f251560a0f2f9 100644 --- a/doc/api/v2/fluid/data_feeder.rst +++ b/doc/api/v2/fluid/data_feeder.rst @@ -1,9 +1,14 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + =========== -DataFeeder +data_feeder =========== DataFeeder ------------ -.. automodule:: paddle.v2.fluid.data_feeder - :members: DataFeeder +---------- + +.. autoclass:: paddle.v2.fluid.data_feeder.DataFeeder + :members: :noindex: + diff --git a/doc/api/v2/fluid/evaluator.rst b/doc/api/v2/fluid/evaluator.rst index a23f3301d0331e0ea3733f06444515eb4680cd31..00dcecfd628a35d83d1c596bf0aea819a1705862 100644 --- a/doc/api/v2/fluid/evaluator.rst +++ b/doc/api/v2/fluid/evaluator.rst @@ -1,9 +1,21 @@ -=========== -Evaluator -=========== - -Evaluator ------------ -.. automodule:: paddle.v2.fluid.evaluator - :members: Evaluator +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +========= +evaluator +========= + +Accuracy +-------- + +.. autoclass:: paddle.v2.fluid.evaluator.Accuracy + :members: :noindex: + +ChunkEvaluator +-------------- + +.. autoclass:: paddle.v2.fluid.evaluator.ChunkEvaluator + :members: + :noindex: + diff --git a/doc/api/v2/fluid/executor.rst b/doc/api/v2/fluid/executor.rst index 3a283538c120cfa1ef646c390bb71c6251c23675..a028f6283f2ca333bdf6c9857a98661c0222b41e 100644 --- a/doc/api/v2/fluid/executor.rst +++ b/doc/api/v2/fluid/executor.rst @@ -1,9 +1,32 @@ -=========== -Executor -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +======== +executor +======== Executor +-------- + +.. autoclass:: paddle.v2.fluid.executor.Executor + :members: + :noindex: + +global_scope +------------ + +.. autofunction:: paddle.v2.fluid.executor.global_scope + :noindex: + +scope_guard ----------- -.. automodule:: paddle.v2.fluid.executor - :members: Executor + +.. autofunction:: paddle.v2.fluid.executor.scope_guard + :noindex: + +switch_scope +------------ + +.. autofunction:: paddle.v2.fluid.executor.switch_scope :noindex: + diff --git a/doc/api/v2/fluid/gen_doc.py b/doc/api/v2/fluid/gen_doc.py new file mode 100644 index 0000000000000000000000000000000000000000..a2147fd3f7ea635d8f14210fbcd1a568ee2230ee --- /dev/null +++ b/doc/api/v2/fluid/gen_doc.py @@ -0,0 +1,109 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import argparse +import sys +import types + +import paddle.v2.fluid as fluid + + +def parse_arg(): + parser = argparse.ArgumentParser() + parser.add_argument('--submodules', nargs="*") + parser.add_argument( + 'module', type=str, help='Generate the documentation of which module') + return parser.parse_args() + + +class DocGenerator(object): + def __init__(self, module_name, stream=sys.stdout): + self.stream = stream + self.module_name = module_name + if not hasattr(fluid, module_name): + raise ValueError("Cannot find fluid.{0}".format(module_name)) + else: + self.module = getattr(fluid, module_name) + self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +''') + + self._print_header_(module_name, dot='=', is_title=True) + + def print_submodule(self, submodule_name): + submodule = getattr(self.module, submodule_name) + if submodule is None: + raise ValueError("Cannot find submodule {0}".format(submodule_name)) + self.print_section(submodule_name) + + for item in submodule.__all__: + self.print_item(item) + + def print_current_module(self): + for item in self.module.__all__: + self.print_item(item) + + def print_section(self, name): + self._print_header_(name, dot='=', is_title=False) + + def print_item(self, name): + item = getattr(self.module, name) + if isinstance(item, types.TypeType): + self.print_class(name) + elif isinstance(item, types.FunctionType): + self.print_method(name) + else: + raise RuntimeError("Unsupported item {0}".format(name)) + + def print_class(self, name): + self._print_header_(name, dot='-', is_title=False) + self.stream.write('''.. autoclass:: paddle.v2.fluid.{0}.{1} + :members: + :noindex: + +'''.format(self.module_name, name)) + + def print_method(self, name): + self._print_header_(name, dot='-', is_title=False) + self.stream.write('''.. autofunction:: paddle.v2.fluid.{0}.{1} + :noindex: + +'''.format(self.module_name, name)) + + def _print_header_(self, name, dot, is_title): + dot_line = dot * len(name) + if is_title: + self.stream.write(dot_line) + self.stream.write('\n') + self.stream.write(name) + self.stream.write('\n') + self.stream.write(dot_line) + self.stream.write('\n') + self.stream.write('\n') + + +def main(): + args = parse_arg() + gen = DocGenerator(args.module) + if args.submodules is None: + gen.print_current_module() + else: + for submodule_name in args.submodules: + gen.print_submodule(submodule_name) + + +if __name__ == '__main__': + main() diff --git a/doc/api/v2/fluid/gen_doc.sh b/doc/api/v2/fluid/gen_doc.sh new file mode 100755 index 0000000000000000000000000000000000000000..ba7b7ba8e51399deb852b0a7c8ddd3128f521e85 --- /dev/null +++ b/doc/api/v2/fluid/gen_doc.sh @@ -0,0 +1,7 @@ +#!/bin/bash +python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst + +for module in io data_feeder evaluator executor initializer io nets optimizer param_attr profiler regularizer +do + python gen_doc.py ${module} > ${module}.rst +done diff --git a/doc/api/v2/fluid/initializer.rst b/doc/api/v2/fluid/initializer.rst index 8f587837e9873370722062404f511654a9460587..c38be033fff2997930525f51c93995db09daa2b6 100644 --- a/doc/api/v2/fluid/initializer.rst +++ b/doc/api/v2/fluid/initializer.rst @@ -1,50 +1,35 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + =========== -Initializer +initializer =========== +Constant +-------- - -Initializer ------------ -.. automodule:: paddle.v2.fluid.initializer - :members: Initializer - :noindex: - - - -ConstantInitializer -------------------- -.. automodule:: paddle.v2.fluid.initializer - :members: ConstantInitializer +.. autoclass:: paddle.v2.fluid.initializer.Constant + :members: :noindex: +Uniform +------- - -UniformInitializer ------------------- -.. automodule:: paddle.v2.fluid.initializer - :members: UniformInitializer - :noindex: - - - -NormalInitializer ------------------ -.. automodule:: paddle.v2.fluid.initializer - :members: NormalInitializer +.. autoclass:: paddle.v2.fluid.initializer.Uniform + :members: :noindex: +Normal +------ -XavierInitializer ------------------ -.. automodule:: paddle.v2.fluid.initializer - :members: XavierInitializer +.. autoclass:: paddle.v2.fluid.initializer.Normal + :members: :noindex: +Xavier +------ -MSRAInitializer ---------------- -.. automodule:: paddle.v2.fluid.initializer - :members: MSRAInitializer +.. autoclass:: paddle.v2.fluid.initializer.Xavier + :members: :noindex: diff --git a/doc/api/v2/fluid/io.rst b/doc/api/v2/fluid/io.rst index 67f68c4e9e16b379207b8de114cdf769e056f78e..37c9c273e369532e8ff596e9649cb695a98a2505 100644 --- a/doc/api/v2/fluid/io.rst +++ b/doc/api/v2/fluid/io.rst @@ -1,10 +1,61 @@ -=========== -IO -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +== +io +== +save_vars +--------- -is_parameter +.. autofunction:: paddle.v2.fluid.io.save_vars + :noindex: + +save_params ----------- -.. autofunction:: paddle.v2.fluid.io.is_parameter + +.. autofunction:: paddle.v2.fluid.io.save_params + :noindex: + +save_persistables +----------------- + +.. autofunction:: paddle.v2.fluid.io.save_persistables + :noindex: + +load_vars +--------- + +.. autofunction:: paddle.v2.fluid.io.load_vars + :noindex: + +load_params +----------- + +.. autofunction:: paddle.v2.fluid.io.load_params :noindex: + +load_persistables +----------------- + +.. autofunction:: paddle.v2.fluid.io.load_persistables + :noindex: + +save_inference_model +-------------------- + +.. autofunction:: paddle.v2.fluid.io.save_inference_model + :noindex: + +load_inference_model +-------------------- + +.. autofunction:: paddle.v2.fluid.io.load_inference_model + :noindex: + +get_inference_program +--------------------- + +.. autofunction:: paddle.v2.fluid.io.get_inference_program + :noindex: + diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 231ec2d4ba102a5d31c47cbc7a5d484ef17a7f3a..e24613b94b422b7cdf9c6383c359fa92a4faf6ff 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -1,546 +1,799 @@ -========== -Layers -========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +====== +layers +====== -fc ---- -.. autofunction:: paddle.v2.fluid.layers.fc +control_flow +============ + +split_lod_tensor +---------------- + +.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor :noindex: -embedding ---------- -.. autofunction:: paddle.v2.fluid.layers.embedding +merge_lod_tensor +---------------- + +.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor :noindex: -dynamic_lstm ------------- -.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm +BlockGuard +---------- + +.. autoclass:: paddle.v2.fluid.layers.BlockGuard + :members: :noindex: -dynamic_lstmp -------------- -.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp +BlockGuardWithCompletion +------------------------ + +.. autoclass:: paddle.v2.fluid.layers.BlockGuardWithCompletion + :members: :noindex: -dynamic_gru ------------ -.. autofunction:: paddle.v2.fluid.layers.dynamic_gru +StaticRNNMemoryLink +------------------- + +.. autoclass:: paddle.v2.fluid.layers.StaticRNNMemoryLink + :members: :noindex: -data ----- -.. autofunction:: paddle.v2.fluid.layers.data +WhileGuard +---------- + +.. autoclass:: paddle.v2.fluid.layers.WhileGuard + :members: :noindex: -mean ----- -.. autofunction:: paddle.v2.fluid.layers.mean +While +----- + +.. autoclass:: paddle.v2.fluid.layers.While + :members: :noindex: -mul ---- -.. autofunction:: paddle.v2.fluid.layers.mul +lod_rank_table +-------------- + +.. autofunction:: paddle.v2.fluid.layers.lod_rank_table :noindex: -elementwise_add ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_add +max_sequence_len +---------------- + +.. autofunction:: paddle.v2.fluid.layers.max_sequence_len :noindex: -elementwise_sub ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_sub +topk +---- + +.. autofunction:: paddle.v2.fluid.layers.topk :noindex: -elementwise_mul ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_mul +lod_tensor_to_array +------------------- + +.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array :noindex: -elementwise_div ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_div +array_to_lod_tensor +------------------- + +.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor :noindex: +increment +--------- -dropout -------- -.. autofunction:: paddle.v2.fluid.layers.dropout +.. autofunction:: paddle.v2.fluid.layers.increment :noindex: +array_write +----------- -reshape --------- -.. autofunction:: paddle.v2.fluid.layers.reshape +.. autofunction:: paddle.v2.fluid.layers.array_write :noindex: +create_array +------------ -sigmoid +.. autofunction:: paddle.v2.fluid.layers.create_array + :noindex: + +less_than --------- -.. autofunction:: paddle.v2.fluid.layers.sigmoid + +.. autofunction:: paddle.v2.fluid.layers.less_than :noindex: +array_read +---------- -scale ---------- -.. autofunction:: paddle.v2.fluid.layers.scale +.. autofunction:: paddle.v2.fluid.layers.array_read + :noindex: + +shrink_memory +------------- + +.. autofunction:: paddle.v2.fluid.layers.shrink_memory :noindex: +array_length +------------ -transpose +.. autofunction:: paddle.v2.fluid.layers.array_length + :noindex: + +IfElse +------ + +.. autoclass:: paddle.v2.fluid.layers.IfElse + :members: + :noindex: + +DynamicRNN +---------- + +.. autoclass:: paddle.v2.fluid.layers.DynamicRNN + :members: + :noindex: + +ConditionalBlock +---------------- + +.. autoclass:: paddle.v2.fluid.layers.ConditionalBlock + :members: + :noindex: + +StaticRNN --------- -.. autofunction:: paddle.v2.fluid.layers.transpose + +.. autoclass:: paddle.v2.fluid.layers.StaticRNN + :members: :noindex: +reorder_lod_tensor_by_rank +-------------------------- -sigmoid_cross_entropy_with_logits ---------------------------------- -.. autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits +.. autofunction:: paddle.v2.fluid.layers.reorder_lod_tensor_by_rank :noindex: +ParallelDo +---------- -cast +.. autoclass:: paddle.v2.fluid.layers.ParallelDo + :members: + :noindex: + +Print +----- + +.. autofunction:: paddle.v2.fluid.layers.Print + :noindex: + +device +====== + +get_places +---------- + +.. autofunction:: paddle.v2.fluid.layers.get_places + :noindex: + +io +== + +data ---- -.. autofunction:: paddle.v2.fluid.layers.cast + +.. autofunction:: paddle.v2.fluid.layers.data :noindex: +BlockGuardServ +-------------- -concat -------- -.. autofunction:: paddle.v2.fluid.layers.concat +.. autoclass:: paddle.v2.fluid.layers.BlockGuardServ + :members: :noindex: +ListenAndServ +------------- -sums +.. autoclass:: paddle.v2.fluid.layers.ListenAndServ + :members: + :noindex: + +Send ---- -.. autofunction:: paddle.v2.fluid.layers.sums + +.. autofunction:: paddle.v2.fluid.layers.Send :noindex: +nn +== -linear_chain_crf ----------------- -.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf +fc +-- + +.. autofunction:: paddle.v2.fluid.layers.fc :noindex: +embedding +--------- -assign -------- .. autofunction:: paddle.v2.fluid.layers.embedding :noindex: +dynamic_lstm +------------ -split_lod_tensor ----------------- -.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor +.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm :noindex: +dynamic_lstmp +------------- -merge_lod_tensor +.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp + :noindex: + +dynamic_gru +----------- + +.. autofunction:: paddle.v2.fluid.layers.dynamic_gru + :noindex: + +gru_unit +-------- + +.. autofunction:: paddle.v2.fluid.layers.gru_unit + :noindex: + +linear_chain_crf ---------------- -.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor + +.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf + :noindex: + +crf_decoding +------------ + +.. autofunction:: paddle.v2.fluid.layers.crf_decoding :noindex: cos_sim --------- +------- + .. autofunction:: paddle.v2.fluid.layers.cos_sim :noindex: - cross_entropy ------------- + .. autofunction:: paddle.v2.fluid.layers.cross_entropy :noindex: - - square_error_cost ----------------- + .. autofunction:: paddle.v2.fluid.layers.square_error_cost :noindex: - accuracy ---------- +-------- + .. autofunction:: paddle.v2.fluid.layers.accuracy :noindex: +chunk_eval +---------- + +.. autofunction:: paddle.v2.fluid.layers.chunk_eval + :noindex: sequence_conv ------------- + .. autofunction:: paddle.v2.fluid.layers.sequence_conv :noindex: - conv2d ------ + .. autofunction:: paddle.v2.fluid.layers.conv2d :noindex: - sequence_pool ------------- + .. autofunction:: paddle.v2.fluid.layers.sequence_pool :noindex: +pool2d +------ -sequence_first_step -------------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_first_step +.. autofunction:: paddle.v2.fluid.layers.pool2d :noindex: +batch_norm +---------- + +.. autofunction:: paddle.v2.fluid.layers.batch_norm + :noindex: -sequence_last_step +beam_search_decode ------------------ -.. autofunction:: paddle.v2.fluid.layers.sequence_last_step + +.. autofunction:: paddle.v2.fluid.layers.beam_search_decode :noindex: +conv2d_transpose +---------------- -pool2d ------- -.. autofunction:: paddle.v2.fluid.layers.pool2d +.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose :noindex: +sequence_expand +--------------- -batch_norm +.. autofunction:: paddle.v2.fluid.layers.sequence_expand + :noindex: + +lstm_unit +--------- + +.. autofunction:: paddle.v2.fluid.layers.lstm_unit + :noindex: + +reduce_sum ---------- -.. autofunction:: paddle.v2.fluid.layers.batch_norm + +.. autofunction:: paddle.v2.fluid.layers.reduce_sum + :noindex: + +reduce_mean +----------- + +.. autofunction:: paddle.v2.fluid.layers.reduce_mean :noindex: +reduce_max +---------- + +.. autofunction:: paddle.v2.fluid.layers.reduce_max + :noindex: -beam_search_decode +reduce_min +---------- + +.. autofunction:: paddle.v2.fluid.layers.reduce_min + :noindex: + +sequence_first_step +------------------- + +.. autofunction:: paddle.v2.fluid.layers.sequence_first_step + :noindex: + +sequence_last_step ------------------ -.. autofunction:: paddle.v2.fluid.layers.beam_search_decode + +.. autofunction:: paddle.v2.fluid.layers.sequence_last_step + :noindex: + +dropout +------- + +.. autofunction:: paddle.v2.fluid.layers.dropout :noindex: +split +----- -lod_rank_table --------------- -.. autofunction:: paddle.v2.fluid.layers.lod_rank_table +.. autofunction:: paddle.v2.fluid.layers.split :noindex: +ctc_greedy_decoder +------------------ -max_sequence_len ----------------- -.. autofunction:: paddle.v2.fluid.layers.max_sequence_len +.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder :noindex: +edit_distance +------------- -topk ------ -.. autofunction:: paddle.v2.fluid.layers.topk +.. autofunction:: paddle.v2.fluid.layers.edit_distance :noindex: +l2_normalize +------------ -lod_tensor_to_array -------------------- -.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array +.. autofunction:: paddle.v2.fluid.layers.l2_normalize :noindex: +matmul +------ - -array_to_lod_tensor -------------------- -.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor +.. autofunction:: paddle.v2.fluid.layers.matmul :noindex: +warpctc +------- +.. autofunction:: paddle.v2.fluid.layers.warpctc + :noindex: +sequence_reshape +---------------- -fill_constant -------------- -.. autofunction:: paddle.v2.fluid.layers.fill_constant +.. autofunction:: paddle.v2.fluid.layers.sequence_reshape :noindex: +transpose +--------- +.. autofunction:: paddle.v2.fluid.layers.transpose + :noindex: -fill_constant_batch_size_like ------------------------------ -.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like +im2sequence +----------- + +.. autofunction:: paddle.v2.fluid.layers.im2sequence :noindex: +nce +--- -ones ----- -.. autofunction:: paddle.v2.fluid.layers.ones +.. autofunction:: paddle.v2.fluid.layers.nce :noindex: +beam_search +----------- -zeros ------ -.. autofunction:: paddle.v2.fluid.layers.zeros +.. autofunction:: paddle.v2.fluid.layers.beam_search :noindex: +row_conv +-------- -increment ---------- -.. autofunction:: paddle.v2.fluid.layers.increment +.. autofunction:: paddle.v2.fluid.layers.row_conv :noindex: +multiplex +--------- -array_write ------------ -.. autofunction:: paddle.v2.fluid.layers.array_write +.. autofunction:: paddle.v2.fluid.layers.multiplex :noindex: +ops +=== +mean +---- -create_array ------------- -.. autofunction:: paddle.v2.fluid.layers.create_array +.. autofunction:: paddle.v2.fluid.layers.mean :noindex: +mul +--- -less_than ---------- -.. autofunction:: paddle.v2.fluid.layers.less_than +.. autofunction:: paddle.v2.fluid.layers.mul :noindex: +reshape +------- -array_read ----------- -.. autofunction:: paddle.v2.fluid.layers.array_read +.. autofunction:: paddle.v2.fluid.layers.reshape :noindex: +scale +----- -shrink_memory --------------- -.. autofunction:: paddle.v2.fluid.layers.shrink_memory +.. autofunction:: paddle.v2.fluid.layers.scale :noindex: +sigmoid_cross_entropy_with_logits +--------------------------------- -array_length -------------- -.. autofunction:: paddle.v2.fluid.layers.array_length +.. autofunction:: paddle.v2.fluid.layers.sigmoid_cross_entropy_with_logits :noindex: +elementwise_add +--------------- -conv2d_transpose ----------------- -.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose +.. autofunction:: paddle.v2.fluid.layers.elementwise_add :noindex: - -sequence_expand +elementwise_div --------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_expand + +.. autofunction:: paddle.v2.fluid.layers.elementwise_div :noindex: +elementwise_sub +--------------- -gru_unit --------- -.. autofunction:: paddle.v2.fluid.layers.gru_unit +.. autofunction:: paddle.v2.fluid.layers.elementwise_sub :noindex: +elementwise_mul +--------------- -lstm_unit ---------- -.. autofunction:: paddle.v2.fluid.layers.lstm_unit +.. autofunction:: paddle.v2.fluid.layers.elementwise_mul :noindex: +elementwise_max +--------------- -sequence_softmax ----------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_softmax +.. autofunction:: paddle.v2.fluid.layers.elementwise_max :noindex: +elementwise_min +--------------- -reduce_sum ----------- -.. autofunction:: paddle.v2.fluid.layers.reduce_sum +.. autofunction:: paddle.v2.fluid.layers.elementwise_min :noindex: +elementwise_pow +--------------- -reduce_mean ------------ -.. autofunction:: paddle.v2.fluid.layers.reduce_mean +.. autofunction:: paddle.v2.fluid.layers.elementwise_pow :noindex: +clip +---- -reduce_max ----------- -.. autofunction:: paddle.v2.fluid.layers.reduce_max +.. autofunction:: paddle.v2.fluid.layers.clip :noindex: +clip_by_norm +------------ -reduce_min ----------- -.. autofunction:: paddle.v2.fluid.layers.reduce_min +.. autofunction:: paddle.v2.fluid.layers.clip_by_norm :noindex: +sequence_softmax +---------------- -split ------ -.. autofunction:: paddle.v2.fluid.layers.split +.. autofunction:: paddle.v2.fluid.layers.sequence_softmax :noindex: +sigmoid +------- -matmul ------- -.. autofunction:: paddle.v2.fluid.layers.matmul +.. autofunction:: paddle.v2.fluid.layers.sigmoid :noindex: logsigmoid ---------- + .. autofunction:: paddle.v2.fluid.layers.logsigmoid :noindex: exp --- + .. autofunction:: paddle.v2.fluid.layers.exp :noindex: relu ---- + .. autofunction:: paddle.v2.fluid.layers.relu :noindex: tanh ---- + .. autofunction:: paddle.v2.fluid.layers.tanh :noindex: tanh_shrink ----------- + .. autofunction:: paddle.v2.fluid.layers.tanh_shrink :noindex: softshrink ---------- + .. autofunction:: paddle.v2.fluid.layers.softshrink :noindex: sqrt ---- + .. autofunction:: paddle.v2.fluid.layers.sqrt :noindex: abs ----- +--- + .. autofunction:: paddle.v2.fluid.layers.abs :noindex: ceil ---- + .. autofunction:: paddle.v2.fluid.layers.ceil :noindex: floor ----- + .. autofunction:: paddle.v2.fluid.layers.floor :noindex: round ----- + .. autofunction:: paddle.v2.fluid.layers.round :noindex: reciprocal ---------- + .. autofunction:: paddle.v2.fluid.layers.reciprocal :noindex: log --- + .. autofunction:: paddle.v2.fluid.layers.log :noindex: square ------ + .. autofunction:: paddle.v2.fluid.layers.square :noindex: softplus -------- + .. autofunction:: paddle.v2.fluid.layers.softplus :noindex: softsign ---------- +-------- + .. autofunction:: paddle.v2.fluid.layers.softsign :noindex: brelu ----- + .. autofunction:: paddle.v2.fluid.layers.brelu :noindex: leaky_relu ---------- + .. autofunction:: paddle.v2.fluid.layers.leaky_relu :noindex: soft_relu --------- + .. autofunction:: paddle.v2.fluid.layers.soft_relu :noindex: elu ----- +--- + .. autofunction:: paddle.v2.fluid.layers.elu :noindex: relu6 ----- + .. autofunction:: paddle.v2.fluid.layers.relu6 :noindex: pow ----- +--- + .. autofunction:: paddle.v2.fluid.layers.pow :noindex: +stanh +----- + +.. autofunction:: paddle.v2.fluid.layers.stanh + :noindex: + hard_shrink ----------- + .. autofunction:: paddle.v2.fluid.layers.hard_shrink :noindex: thresholded_relu ---------------- + .. autofunction:: paddle.v2.fluid.layers.thresholded_relu :noindex: hard_sigmoid -------------- +------------ + .. autofunction:: paddle.v2.fluid.layers.hard_sigmoid :noindex: swish ------- +----- + .. autofunction:: paddle.v2.fluid.layers.swish :noindex: -im2sequence +tensor +====== + +create_tensor +------------- + +.. autofunction:: paddle.v2.fluid.layers.create_tensor + :noindex: + +create_parameter +---------------- + +.. autofunction:: paddle.v2.fluid.layers.create_parameter + :noindex: + +create_global_var +----------------- + +.. autofunction:: paddle.v2.fluid.layers.create_global_var + :noindex: + +cast +---- + +.. autofunction:: paddle.v2.fluid.layers.cast + :noindex: + +concat ------ -.. autofunction:: paddle.v2.fluid.layers.im2sequence + +.. autofunction:: paddle.v2.fluid.layers.concat :noindex: -edit_distance ---------------- -.. autofunction:: paddle.v2.fluid.layers.edit_distance_error +sums +---- + +.. autofunction:: paddle.v2.fluid.layers.sums :noindex: -ctc_greedy_decoder ---------------- -.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder +assign +------ + +.. autofunction:: paddle.v2.fluid.layers.assign :noindex: -l2_normalize ------------- -.. autofunction:: paddle.v2.fluid.layers.l2_normalize +fill_constant_batch_size_like +----------------------------- + +.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like :noindex: -sequence_reshape ----------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_reshape +fill_constant +------------- + +.. autofunction:: paddle.v2.fluid.layers.fill_constant :noindex: -row_conv --------- -.. autofunction:: paddle.v2.fluid.layers.row_conv +ones +---- + +.. autofunction:: paddle.v2.fluid.layers.ones :noindex: -multiplex ---------- -.. autofunction:: paddle.v2.fluid.layers.multiplex +zeros +----- + +.. autofunction:: paddle.v2.fluid.layers.zeros :noindex: + diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/v2/fluid/nets.rst index 500019bc507f859c4c91de5d322a82eb1e78e2de..015581b7660848bdb0845fafe2d3fc05405e6ae6 100644 --- a/doc/api/v2/fluid/nets.rst +++ b/doc/api/v2/fluid/nets.rst @@ -1,33 +1,31 @@ -=========== -Nets -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +==== +nets +==== simple_img_conv_pool -------------------- -.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool - :noindex: - -img_conv_group ---------------- -.. autofunction:: paddle.v2.fluid.nets.img_conv_group +.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool :noindex: - sequence_conv_pool ------------------ + .. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool :noindex: - glu --- + .. autofunction:: paddle.v2.fluid.nets.glu :noindex: - scaled_dot_product_attention ---------------------------- + .. autofunction:: paddle.v2.fluid.nets.scaled_dot_product_attention :noindex: diff --git a/doc/api/v2/fluid/optimizer.rst b/doc/api/v2/fluid/optimizer.rst index 19b4940f08de3e2f7dc177f2961e538946d10a78..1691ebb9a7cb16da96e04147d0adea322374f529 100644 --- a/doc/api/v2/fluid/optimizer.rst +++ b/doc/api/v2/fluid/optimizer.rst @@ -1,54 +1,49 @@ -=========== -Optimizer -=========== - -Optimizer ------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: Optimizer - :noindex: +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +========= +optimizer +========= -SGDOptimizer ------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: SGDOptimizer - :noindex: +SGD +--- +.. autoclass:: paddle.v2.fluid.optimizer.SGD + :members: + :noindex: +Momentum +-------- -MomentumOptimizer ------------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: MomentumOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Momentum + :members: :noindex: +Adagrad +------- - -AdagradOptimizer ----------------- -.. automodule:: paddle.v2.fluid.optimizer - :members: AdagradOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Adagrad + :members: :noindex: +Adam +---- -AdamOptimizer -------------- -.. automodule:: paddle.v2.fluid.optimizer - :members: AdamOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Adam + :members: :noindex: +Adamax +------ -AdamaxOptimizer ------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: AdamaxOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Adamax + :members: :noindex: +DecayedAdagrad +-------------- -DecayedAdagradOptimizer ------------------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: DecayedAdagradOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.DecayedAdagrad + :members: :noindex: diff --git a/doc/api/v2/fluid/param_attr.rst b/doc/api/v2/fluid/param_attr.rst index ca0c8af9e8c4f2271de7a131ad0d27c0e8635f50..8083d0d858dafcd275eaddb9b475875ee42ef724 100644 --- a/doc/api/v2/fluid/param_attr.rst +++ b/doc/api/v2/fluid/param_attr.rst @@ -1,11 +1,21 @@ -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +========== +param_attr +========== + ParamAttr -=========== +--------- +.. autoclass:: paddle.v2.fluid.param_attr.ParamAttr + :members: + :noindex: +WeightNormParamAttr +------------------- -ParamAttr ------------ -.. automodule:: paddle.v2.fluid.param_attr - :members: ParamAttr +.. autoclass:: paddle.v2.fluid.param_attr.WeightNormParamAttr + :members: :noindex: + diff --git a/doc/api/v2/fluid/profiler.rst b/doc/api/v2/fluid/profiler.rst index 7d4042d1f41c12c4a551ba6576559d612116872a..4a1ff7cb6976e0054f77428b699ea679aa91394f 100644 --- a/doc/api/v2/fluid/profiler.rst +++ b/doc/api/v2/fluid/profiler.rst @@ -1,10 +1,25 @@ -=========== -Profiler -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +======== +profiler +======== +cuda_profiler +------------- -Profiler ------------ .. autofunction:: paddle.v2.fluid.profiler.cuda_profiler :noindex: + +reset_profiler +-------------- + +.. autofunction:: paddle.v2.fluid.profiler.reset_profiler + :noindex: + +profiler +-------- + +.. autofunction:: paddle.v2.fluid.profiler.profiler + :noindex: + diff --git a/doc/api/v2/fluid/regularizer.rst b/doc/api/v2/fluid/regularizer.rst index 868e225ed3d59e79aeb217fb88081ea25f80fa2c..2c17d15599baa1d02eb87c7b6c40034769ebb3a4 100644 --- a/doc/api/v2/fluid/regularizer.rst +++ b/doc/api/v2/fluid/regularizer.rst @@ -1,25 +1,27 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + =========== -Regularizer +regularizer =========== -WeightDecayRegularizer ----------------------- -.. automodule:: paddle.v2.fluid.regularizer - :members: WeightDecayRegularizer - :noindex: - +append_regularization_ops +------------------------- -L2DecayRegularizer ------------------- -.. automodule:: paddle.v2.fluid.regularizer - :members: L2DecayRegularizer +.. autofunction:: paddle.v2.fluid.regularizer.append_regularization_ops :noindex: +L1Decay +------- +.. autoclass:: paddle.v2.fluid.regularizer.L1Decay + :members: + :noindex: -L1DecayRegularizer -------------------- -.. automodule:: paddle.v2.fluid.regularizer - :members: L1DecayRegularizer +L2Decay +------- +.. autoclass:: paddle.v2.fluid.regularizer.L2Decay + :members: + :noindex: diff --git a/doc/design/speech/README.MD b/doc/design/speech/deep_speech_2.md similarity index 85% rename from doc/design/speech/README.MD rename to doc/design/speech/deep_speech_2.md index 7304650e628dba210488cd2dc4836318b5383b2a..cfdc4d6df04344c70d3334626bd38eca997c31ff 100644 --- a/doc/design/speech/README.MD +++ b/doc/design/speech/deep_speech_2.md @@ -140,7 +140,19 @@ TODO by Assignees ### Beam Search with CTC and LM -TODO by Assignees +
+
+Figure 2. Algorithm for CTC Beam Search Decoder. +
+ +- The **Beam Search Decoder** for DS2 CTC-trained network follows the similar approach in \[[3](#references)\] as shown in Figure 2, with two important modifications for the ambiguous parts: + - 1) in the iterative computation of probabilities, the assignment operation is changed to accumulation for one prefix may comes from different paths; + - 2) the if condition ```if l^+ not in A_prev then``` after probabilities' computation is deprecated for it is hard to understand and seems unnecessary. +- An **external scorer** would be passed into the decoder to evaluate a candidate prefix during decoding whenever a white space appended in English decoding and any character appended in Mandarin decoding. +- Such external scorer consists of language model, word count or any other custom scorers. +- The **language model** is built from Task 5, with parameters should be carefully tuned to achieve minimum WER/CER (c.f. Task 7) +- This decoder needs to perform with **high efficiency** for the convenience of parameters tuning and speech recognition in reality. + ## Future Work @@ -153,3 +165,4 @@ TODO by Assignees 1. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf). ICML 2016. 2. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595). arXiv:1512.02595. +3. Awni Y. Hannun, etc. [First-Pass Large Vocabulary Continuous Speech Recognition using Bi-Directional Recurrent DNNs](https://arxiv.org/abs/1408.2873). arXiv:1408.2873 diff --git a/doc/design/speech/image/beam_search.png b/doc/design/speech/image/beam_search.png new file mode 100644 index 0000000000000000000000000000000000000000..7f7e35f34223162d0f7f0ed97375909c43b830ae Binary files /dev/null and b/doc/design/speech/image/beam_search.png differ diff --git a/doc/getstarted/build_and_install/build_from_source_cn.rst b/doc/getstarted/build_and_install/build_from_source_cn.rst index 71904dc41ed0d946867d890cc585e1b88450ca8c..ff904b1022a41612c9680dce92d3fc2c69ad7e93 100644 --- a/doc/getstarted/build_and_install/build_from_source_cn.rst +++ b/doc/getstarted/build_and_install/build_from_source_cn.rst @@ -115,7 +115,7 @@ PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种B "WITH_AVX", "是否编译含有AVX指令集的PaddlePaddle二进制文件", "ON" "WITH_PYTHON", "是否内嵌PYTHON解释器", "ON" "WITH_STYLE_CHECK", "是否编译时进行代码风格检查", "ON" - "WITH_TESTING", "是否开启单元测试", "ON" + "WITH_TESTING", "是否开启单元测试", "OFF" "WITH_DOC", "是否编译中英文文档", "OFF" "WITH_SWIG_PY", "是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练", "Auto" "WITH_GOLANG", "是否编译go语言的可容错parameter server", "ON" diff --git a/doc/getstarted/build_and_install/build_from_source_en.rst b/doc/getstarted/build_and_install/build_from_source_en.rst index 27f73b2e2c029b41d514e1612912ed1c335605b6..718fb869c23a1f7be82c87c726282bded9dad516 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.rst +++ b/doc/getstarted/build_and_install/build_from_source_en.rst @@ -126,7 +126,7 @@ You can add :code:`-D` argument to pass such options, like: "WITH_AVX", "Build with AVX support", "ON" "WITH_PYTHON", "Build with integrated Python interpreter", "ON" "WITH_STYLE_CHECK", "Check code style when building", "ON" - "WITH_TESTING", "Build unit tests", "ON" + "WITH_TESTING", "Build unit tests", "OFF" "WITH_DOC", "Build documentations", "OFF" "WITH_SWIG_PY", "Build Python SWIG interface for V2 API", "Auto" "WITH_GOLANG", "Build fault-tolerant parameter server written in go", "ON" diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 98fada7bdb46f4dd2927d6f93bcbcebbe7d18604..79d214635a069a739060e0b79424729f6ff90387 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -95,6 +95,12 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note docker run -p 8888:8888 paddlepaddle/book +国内用户可以使用下面的镜像源来加速访问: + + .. code-block: bash + + docker run -p 8888:8888 docker.paddlepaddlehub.com/book + 然后在浏览器中输入以下网址: .. code-block:: text diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index b1d0890b4cdddb77114a80276130afd07c22d270..e0e0559fb858a093db96a9b4ec1c5a45d6c71a38 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -102,6 +102,12 @@ We provide a packaged book image, simply issue the command: docker run -p 8888:8888 paddlepaddle/book +For users in China, we provide a faster mirror: + + .. code-block: bash + + docker run -p 8888:8888 docker.paddlepaddlehub.com/book + Then, you would back and paste the address into the local browser: .. code-block:: text diff --git a/doc/howto/usage/cluster/cluster_train_cn.md b/doc/howto/usage/cluster/cluster_train_cn.md index c2fc86687d7106aac7c74d6dd16bc229353cb7c1..0f3db59607fb6b43da01f5fdb46949087517ed6c 100644 --- a/doc/howto/usage/cluster/cluster_train_cn.md +++ b/doc/howto/usage/cluster/cluster_train_cn.md @@ -92,11 +92,11 @@ paddle.init( 参数说明 - use_gpu: **可选,默认False**,是否启用GPU训练 -- trainer_count:**必选,默认1**,当前训练任务trainer总个数 +- trainer_count:**必选,默认1**,当前trainer的线程数目 - port:**必选,默认7164**,连接到pserver的端口 - ports_num:**必选,默认1**,连接到pserver的端口个数 - ports_num_for_sparse:**必选,默认0**,和pserver之间用于稀疏类型参数通信的端口个数 -- num_gradient_servers:**必选,默认1**,当前训练任务pserver总数 +- num_gradient_servers:**必选,默认1**,当前训练任务trainer总数 - trainer_id:**必选,默认0**,每个trainer的唯一ID,从0开始的整数 - pservers:**必选,默认127.0.0.1**,当前训练任务启动的pserver的IP列表,多个IP使用“,”隔开 diff --git a/doc/howto/usage/cluster/cluster_train_en.md b/doc/howto/usage/cluster/cluster_train_en.md index 28cd1fa7903e559e33a7fc2f00172fdfbe2fdc97..f9424f8f1a29fcf001c4e7976086512b22f6e858 100644 --- a/doc/howto/usage/cluster/cluster_train_en.md +++ b/doc/howto/usage/cluster/cluster_train_en.md @@ -95,11 +95,11 @@ paddle.init( Parameter Description - use_gpu: **optional, default False**, set to "True" to enable GPU training. -- trainer_count: **required, default 1**, total count of trainers in the training job. +- trainer_count: **required, default 1**, number of threads in current trainer. - port: **required, default 7164**, port to connect to parameter server. - ports_num: **required, default 1**, number of ports for communication. - ports_num_for_sparse: **required, default 0**, number of ports for sparse type caculation. -- num_gradient_servers: **required, default 1**, total number of gradient server. +- num_gradient_servers: **required, default 1**, number of trainers in current job. - trainer_id: **required, default 0**, ID for every trainer, start from 0. - pservers: **required, default 127.0.0.1**, list of IPs of parameter servers, separated by ",". diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 318661af8bd04880577222fdc82cc1b6e79a457f..8b71f73c36c33d882b34c833031c50cd14817e76 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -22,11 +22,11 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory) -nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) +nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init) cc_test(variable_test SRCS variable_test.cc) -cc_library(threadpool SRCS threadpool.cc) +cc_library(threadpool SRCS threadpool.cc DEPS enforce) cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) cc_library(scope SRCS scope.cc DEPS glog threadpool) diff --git a/paddle/framework/channel.h b/paddle/framework/channel.h index 70ecccc1a1078374f3190b3956103ed8000c4fc5..0570980c5a4d7fa45e672ae5baac65d2c65ddad9 100644 --- a/paddle/framework/channel.h +++ b/paddle/framework/channel.h @@ -26,9 +26,7 @@ class Channel { virtual void Send(T*) = 0; virtual void Receive(T*) = 0; virtual size_t Cap() = 0; - - // Don't delete channels; instead, call Channel::Close. - protected: + virtual void Close() = 0; virtual ~Channel() {} }; @@ -50,11 +48,7 @@ Channel* MakeChannel(size_t buffer_size) { template void CloseChannel(Channel* ch) { - if (ch->Cap() > 0) { - delete dynamic_cast*>(ch); - } else { - delete dynamic_cast*>(ch); - } + ch->Close(); } } // namespace framework diff --git a/paddle/framework/channel_test.cc b/paddle/framework/channel_test.cc index 9efc0172658c800d14102531332dbef68fa392f4..020f806380626d2f1efac683741ee84f1b573aeb 100644 --- a/paddle/framework/channel_test.cc +++ b/paddle/framework/channel_test.cc @@ -14,13 +14,114 @@ limitations under the License. */ #include "paddle/framework/channel.h" +#include +#include + #include "gtest/gtest.h" +using paddle::framework::Channel; +using paddle::framework::MakeChannel; +using paddle::framework::CloseChannel; + TEST(Channel, MakeAndClose) { - using paddle::framework::Channel; - using paddle::framework::MakeChannel; - using paddle::framework::CloseChannel; + using paddle::framework::details::Buffered; + using paddle::framework::details::UnBuffered; + { + // MakeChannel should return a buffered channel is buffer_size > 0. + auto ch = MakeChannel(10); + EXPECT_NE(dynamic_cast*>(ch), nullptr); + EXPECT_EQ(dynamic_cast*>(ch), nullptr); + CloseChannel(ch); + delete ch; + } + { + // MakeChannel should return an un-buffered channel is buffer_size = 0. + auto ch = MakeChannel(0); + EXPECT_EQ(dynamic_cast*>(ch), nullptr); + EXPECT_NE(dynamic_cast*>(ch), nullptr); + CloseChannel(ch); + delete ch; + } +} + +TEST(Channel, SufficientBufferSizeDoesntBlock) { + const size_t buffer_size = 10; + auto ch = MakeChannel(buffer_size); + for (size_t i = 0; i < buffer_size; ++i) { + ch->Send(&i); // should not block + } + + size_t out; + for (size_t i = 0; i < buffer_size; ++i) { + ch->Receive(&out); // should not block + EXPECT_EQ(out, i); + } + CloseChannel(ch); + delete ch; +} + +TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) { + const size_t buffer_size = 10; + auto ch = MakeChannel(buffer_size); + size_t sum = 0; + std::thread t([&]() { + // Try to write more than buffer size. + for (size_t i = 0; i < 2 * buffer_size; ++i) { + ch->Send(&i); // should not block + sum += i; + } + }); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); // wait 0.5 sec + EXPECT_EQ(sum, 45U); + + CloseChannel(ch); + t.join(); + delete ch; +} + +TEST(Channel, SimpleUnbufferedChannelTest) { + auto ch = MakeChannel(0); + unsigned sum_send = 0; + std::thread t([&]() { + for (int i = 0; i < 5; i++) { + ch->Send(&i); + sum_send += i; + } + }); + for (int i = 0; i < 5; i++) { + int recv; + ch->Receive(&recv); + EXPECT_EQ(recv, i); + } + + CloseChannel(ch); + t.join(); + EXPECT_EQ(sum_send, 10U); + delete ch; +} + +TEST(Channel, UnbufferedLessReceiveMoreSendTest) { + auto ch = MakeChannel(0); + unsigned sum_send = 0; + // Send should block after three iterations + // since we only have three receivers. + std::thread t([&]() { + // Try to send more number of times + // than receivers + for (int i = 0; i < 4; i++) { + ch->Send(&i); + sum_send += i; + } + }); + for (int i = 0; i < 3; i++) { + int recv; + ch->Receive(&recv); + EXPECT_EQ(recv, i); + } + std::this_thread::sleep_for(std::chrono::milliseconds(100)); // wait 0.5 sec + EXPECT_EQ(sum_send, 3U); - Channel* ch = MakeChannel(10); CloseChannel(ch); + t.join(); + delete ch; } diff --git a/paddle/framework/details/buffered_channel.h b/paddle/framework/details/buffered_channel.h index 572e29d44a3baec84a029d87f9b0874784aa761b..b093e1589293b030ef2bedb82504a8e86b3dc857 100644 --- a/paddle/framework/details/buffered_channel.h +++ b/paddle/framework/details/buffered_channel.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include "paddle/framework/channel.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { @@ -32,6 +33,8 @@ class Buffered : public paddle::framework::Channel { virtual void Send(T*); virtual void Receive(T*); virtual size_t Cap() { return cap_; } + virtual void Close(); + virtual ~Buffered(); private: size_t cap_; @@ -39,9 +42,11 @@ class Buffered : public paddle::framework::Channel { std::condition_variable empty_cond_var_; std::condition_variable full_cond_var_; std::deque channel_; + bool closed_; - Buffered(size_t cap) : cap_(cap) {} - virtual ~Buffered(); + Buffered(size_t cap) : cap_(cap), closed_(false) { + PADDLE_ENFORCE_GT(cap, 0); + } void NotifyAllSenders(std::unique_lock*); }; @@ -49,24 +54,39 @@ class Buffered : public paddle::framework::Channel { template void Buffered::Send(T* item) { std::unique_lock lock(mu_); - full_cond_var_.wait(lock, [this]() { return channel_.size() < cap_; }); - channel_.push_back(std::move(*item)); - lock.unlock(); - empty_cond_var_.notify_one(); + full_cond_var_.wait(lock, + [this]() { return channel_.size() < cap_ || closed_; }); + if (!closed_) { + channel_.push_back(std::move(*item)); + lock.unlock(); + empty_cond_var_.notify_one(); + } } template void Buffered::Receive(T* item) { std::unique_lock lock(mu_); - empty_cond_var_.wait(lock, [this]() { return !channel_.empty(); }); - *item = std::move(channel_.front()); - channel_.pop_front(); + empty_cond_var_.wait(lock, [this]() { return !channel_.empty() || closed_; }); + if (!closed_) { + *item = std::move(channel_.front()); + channel_.pop_front(); + NotifyAllSenders(&lock); + } else { + item = nullptr; + } +} + +template +void Buffered::Close() { + std::unique_lock lock(mu_); + closed_ = true; NotifyAllSenders(&lock); } template Buffered::~Buffered() { std::unique_lock lock(mu_); + closed_ = true; channel_.clear(); NotifyAllSenders(&lock); } @@ -74,7 +94,7 @@ Buffered::~Buffered() { template void Buffered::NotifyAllSenders(std::unique_lock* lock) { lock->unlock(); - full_cond_var_.notify_one(); + full_cond_var_.notify_all(); } } // namespace details diff --git a/paddle/framework/details/unbuffered_channel.h b/paddle/framework/details/unbuffered_channel.h index 7ecced1fba88fea781fc342091bc71e5aa496d3a..0dc5afd7e57c1f59dfc1b86093eea231d46966f1 100644 --- a/paddle/framework/details/unbuffered_channel.h +++ b/paddle/framework/details/unbuffered_channel.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include -#include #include #include "paddle/framework/channel.h" @@ -32,20 +32,108 @@ class UnBuffered : public paddle::framework::Channel { virtual void Send(T*); virtual void Receive(T*); virtual size_t Cap() { return 0; } + virtual void Close(); + virtual ~UnBuffered(); private: - UnBuffered() {} - virtual ~UnBuffered(); + std::mutex mu_ch_; + // Mutex for readers and writers who are waiting for other reader + // and writer to complete execution + std::recursive_mutex mu_read_, mu_write_; + // reader_found_ is set true when a reader is ready to accept data + // writer_found_ is set true when a writer is ready to send data + // A transaction occurs only when both are true + std::atomic reader_found_{false}, writer_found_{false}; + std::condition_variable cv_channel_; + std::condition_variable_any cv_reader_, cv_writer_; + T* item{nullptr}; + std::atomic closed_{false}; + + UnBuffered() : closed_(false) {} + + void NotifyAllParticipants(std::unique_lock*); }; +// This function implements the concept of how data should +// be sent from a writer to a reader. +template +void UnBuffered::Send(T* data) { + // Prevent other writers from entering + std::unique_lock writer_lock(mu_write_); + writer_found_ = true; + std::unique_lock cv_lock(mu_write_); + // If writer comes first, it should wait till a reader arrives + cv_writer_.wait(cv_lock, + [this]() { return reader_found_ == true || closed_; }); + cv_reader_.notify_one(); + if (!closed_) { + std::unique_lock channel_lock(mu_ch_); + item = data; + channel_lock.unlock(); + cv_channel_.notify_one(); + channel_lock.lock(); + cv_channel_.wait(channel_lock, + [this]() { return item == nullptr || closed_; }); + } + writer_found_ = false; +} + +// This function implements the concept of how +// data that was sent by a writer is read from a reader. +template +void UnBuffered::Receive(T* data) { + // Prevent other readers from entering + std::unique_lock read_lock{mu_read_}; + reader_found_ = true; + std::unique_lock cv_lock{mu_read_}; + // If reader comes first, it should wait till a writer arrives + cv_reader_.wait(cv_lock, + [this]() { return writer_found_ == true || closed_; }); + cv_writer_.notify_one(); + if (!closed_) { + std::unique_lock lock_ch{mu_ch_}; + // Reader should wait for the writer to first write its data + cv_channel_.wait(lock_ch, [this]() { return item != nullptr || closed_; }); + if (!closed_) { + *data = std::move(*item); + item = nullptr; + lock_ch.unlock(); + } + cv_channel_.notify_one(); + } + reader_found_ = false; +} + +// This function implements the sequence of events +// that take place once the channel is closed. template -void UnBuffered::Send(T* channel_element) {} +void UnBuffered::Close() { + std::unique_lock lock(mu_ch_); + item = nullptr; + closed_ = true; + NotifyAllParticipants(&lock); +} +// This function implements the sequence of events +// that are executed once the object of an UnBuffered +// channel is destroyed. template -void UnBuffered::Receive(T*) {} +UnBuffered::~UnBuffered() { + std::unique_lock lock(mu_ch_); + item = nullptr; + closed_ = true; + NotifyAllParticipants(&lock); +} +// This function notifies all the readers, writers and +// the channel condition variables. template -UnBuffered::~UnBuffered() {} +void UnBuffered::NotifyAllParticipants(std::unique_lock* lock) { + lock->unlock(); + cv_writer_.notify_all(); + cv_channel_.notify_all(); + cv_reader_.notify_all(); +} } // namespace details } // namespace framework diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index cbf3ec75265fa74aaacffee684b7b7d5f73b7c02..9a232b08434d299d10bb2acdb6e96295de875d56 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -25,7 +25,7 @@ limitations under the License. */ #include "paddle/platform/place.h" #include "paddle/platform/profiler.h" -DECLARE_bool(do_memory_benchmark); +DECLARE_bool(benchmark); DEFINE_bool(check_nan_inf, false, "Checking whether operator produce NAN/INF or not. It will be " "extremely slow so please use this flag wisely."); @@ -33,9 +33,6 @@ DEFINE_bool(check_nan_inf, false, namespace paddle { namespace framework { -const std::string kFeedOpType = "feed"; -const std::string kFetchOpType = "fetch"; - Executor::Executor(const platform::Place& place) : place_(place) {} static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { @@ -125,7 +122,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, op->Run(*local_scope, place_); VLOG(3) << op->DebugStringEx(local_scope); - if (FLAGS_do_memory_benchmark) { + if (FLAGS_benchmark) { VLOG(2) << "Memory used after operator " + op->Type() + " running: " << memory::memory_usage(place_); } @@ -142,7 +139,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, if (create_vars && create_local_scope) { scope->DeleteScope(local_scope); } - if (FLAGS_do_memory_benchmark) { + if (FLAGS_benchmark) { VLOG(2) << "-------------------------------------------------------"; VLOG(2) << "Memory used after deleting local scope: " << memory::memory_usage(place_); diff --git a/paddle/framework/feed_fetch_type.h b/paddle/framework/feed_fetch_type.h index 9bc4a90c44828ecb7458d524f59609f01848cc5c..168f456675af508df86dd0520cdeb5d16d94ad31 100644 --- a/paddle/framework/feed_fetch_type.h +++ b/paddle/framework/feed_fetch_type.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include "paddle/framework/lod_tensor.h" @@ -20,5 +21,8 @@ namespace paddle { namespace framework { using FeedFetchType = LoDTensor; using FeedFetchList = std::vector; + +static const std::string kFeedOpType = "feed"; +static const std::string kFetchOpType = "fetch"; } // namespace framework } // namespace paddle diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc index 4ef82a541efaa35bcf831d5122570154f2fa2423..3f6ea121b3994979d89a7d5a8c20c59240a0c111 100644 --- a/paddle/framework/init.cc +++ b/paddle/framework/init.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include // for strdup #include +#include #include #include "paddle/framework/init.h" @@ -46,17 +47,23 @@ void InitDevices() { std::vector places; places.emplace_back(platform::CPUPlace()); + int count = 0; #ifdef PADDLE_WITH_CUDA - int count = platform::GetCUDADeviceCount(); - for (int i = 0; i < count; ++i) { - places.emplace_back(platform::CUDAPlace(i)); + try { + count = platform::GetCUDADeviceCount(); + } catch (const std::exception &exp) { + LOG(WARNING) << "Compiled with WITH_GPU, but no GPU found in runtime."; } #else LOG(WARNING) - << "'GPU' is not supported, Please re-compile with WITH_GPU option"; + << "'CUDA' is not supported, Please re-compile with WITH_GPU option"; #endif + for (int i = 0; i < count; ++i) { + places.emplace_back(platform::CUDAPlace(i)); + } + platform::DeviceContextPool::Init(places); } diff --git a/paddle/framework/init_test.cc b/paddle/framework/init_test.cc index f837a965d3be7d40c20803ae4462b3bfd91bffd0..01e076dd8ea24831e3ed7c8a7f8fae6818a89335 100644 --- a/paddle/framework/init_test.cc +++ b/paddle/framework/init_test.cc @@ -20,7 +20,21 @@ TEST(InitDevices, CPU) { using paddle::framework::InitDevices; using paddle::platform::DeviceContextPool; +#ifndef PADDLE_WITH_CUDA InitDevices(); DeviceContextPool& pool = DeviceContextPool::Instance(); - ASSERT_GE(pool.size(), 1U); + ASSERT_EQ(pool.size(), 1U); +#endif +} + +TEST(InitDevices, CUDA) { + using paddle::framework::InitDevices; + using paddle::platform::DeviceContextPool; + +#ifdef PADDLE_WITH_CUDA + int count = paddle::platform::GetCUDADeviceCount(); + InitDevices(); + DeviceContextPool& pool = DeviceContextPool::Instance(); + ASSERT_EQ(pool.size(), 1U + static_cast(count)); +#endif } diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 53b0d0fe083579da4f0bb600f292765aa2aa0d8a..cb27de6991674247e6215ce64a2da5000fa78ed4 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -24,8 +24,6 @@ limitations under the License. */ #include #include -#include - namespace paddle { namespace framework { diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 9d1294fdeb9bd76bf944f7ec3687e3c5bb333241..d0ab640485baf6d76ee629ea420b603f42b031b4 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -18,11 +18,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #include -#include #endif #include #include "paddle/framework/ddim.h" +#include "paddle/framework/mixed_vector.h" #include "paddle/framework/tensor.h" #include "paddle/framework/tensor_util.h" #include "paddle/platform/enforce.h" @@ -31,15 +31,6 @@ limitations under the License. */ namespace paddle { namespace framework { -#ifndef PADDLE_WITH_CUDA -template -using Vector = std::vector; -#else -template -using Vector = thrust::host_vector< - T, thrust::system::cuda::experimental::pinned_allocator>; -#endif - /* * LoD is short for Level of Details. * @@ -55,7 +46,15 @@ using Vector = thrust::host_vector< * 0 2 4 7 * 0 2 5 7 10 12 15 20 */ -using LoD = std::vector>; +struct LoD : public std::vector> { + using std::vector>::vector; + + void CopyFromCUDA() { + for (auto it = this->begin(); it != this->end(); ++it) { + it->CopyFromCUDA(); + } + } +}; std::ostream& operator<<(std::ostream& os, const LoD& lod); std::ostream& operator<<(std::ostream& os, const LoDTensor& t); @@ -109,7 +108,10 @@ bool CheckAbsLoD(const LoD& in, int tensor_height = -1); */ class LoDTensor : public Tensor { public: - LoDTensor() {} + LoDTensor() : Tensor() {} + + /* Constructor with place should only be used in pybind */ + explicit LoDTensor(const platform::Place& place) : Tensor(place) {} explicit LoDTensor(const LoD& lod) : lod_(lod) {} diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 4d172c43c7cceacb7d0dfaf1c4d3028717350268..3b63020e685436396071fa05cd7697630ae56c95 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -23,6 +23,17 @@ namespace paddle { namespace framework { +TEST(LoD, data) { + LoD lod{{0, 1, 2}}; + lod.push_back({0, 2, 4, 5}); + lod.push_back(std::vector({0, 1, 6, 8, 10, 11})); + + auto& v = lod[0]; + for (size_t i = 0; i < v.size(); ++i) { + EXPECT_EQ(v[i], i); + } +} + TEST(LodExpand, test) { LoD lod{{0, 2}}; LoDTensor tensor; diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu index 1e253a2f6f35e827fb2e5db6270da03705b39514..d4c9f00bd9c00f3cae68858ca46c5320fc117405 100644 --- a/paddle/framework/lod_tensor_test.cu +++ b/paddle/framework/lod_tensor_test.cu @@ -14,6 +14,8 @@ #include #include +#include +#include "paddle/framework/init.h" #include "paddle/framework/lod_tensor.h" #include "paddle/platform/assert.h" @@ -26,7 +28,48 @@ __global__ void test(size_t* a, int size) { } } +TEST(Vector, Normal) { + using namespace paddle::framework; + using namespace paddle::platform; + using namespace paddle::memory; + + paddle::framework::InitDevices(); + + paddle::framework::Vector vec({1, 2, 3}); + size_t* ptr = vec.data(); + for (size_t i = 0; i < vec.size(); ++i) { + EXPECT_EQ(vec[i], *(ptr + i)); + } + + vec.clear(); + vec.CopyFromCUDA(); + + std::vector v = {1, 2, 3}; + for (size_t i = 0; i < v.size(); ++i) { + EXPECT_EQ(v[i], vec[i]); + } +} + +TEST(LoD, data) { + paddle::framework::InitDevices(); + + paddle::framework::LoD lod{{0, 1, 2}}; + lod.push_back({0, 2, 4, 5}); + lod.push_back(std::vector({0, 1, 6, 8, 10, 11})); + + auto& v = lod[0]; + test<<<1, 1>>>(v.cuda_data(), v.size()); + cudaDeviceSynchronize(); + + v.CopyFromCUDA(); + for (size_t i = 0; i < v.size(); ++i) { + EXPECT_EQ(v[i], i * 2); + } +} + TEST(LoDTensor, LoDInGPU) { + paddle::framework::InitDevices(); + paddle::framework::LoDTensor lod_tensor; paddle::platform::CUDAPlace place(0); @@ -42,8 +85,9 @@ TEST(LoDTensor, LoDInGPU) { auto lod = lod_tensor.lod(); - test<<<1, 8>>>(lod[0].data(), lod[0].size()); + test<<<1, 8>>>(lod[0].cuda_data(), lod[0].size()); cudaDeviceSynchronize(); + lod.CopyFromCUDA(); for (size_t i = 0; i < src_lod[0].size(); ++i) { EXPECT_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2); diff --git a/paddle/framework/mixed_vector.h b/paddle/framework/mixed_vector.h new file mode 100644 index 0000000000000000000000000000000000000000..85caac8dcd9ede4fe997e2fd246d1421aa73c80a --- /dev/null +++ b/paddle/framework/mixed_vector.h @@ -0,0 +1,135 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +#include "paddle/memory/memcpy.h" +#include "paddle/memory/memory.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/enforce.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace framework { + +/** + * @brief Vector support both cpu and gpu. + * host vector lifetime is same with Vector + * device vector is lazily malloc and modified. + */ + +template +class Vector : public std::vector { + public: + using std::vector::vector; + + Vector() {} + Vector(const std::vector &v) : std::vector(v) {} // NOLINT + + virtual ~Vector() { +#ifdef PADDLE_WITH_CUDA + if (cuda_ptr_ != nullptr) { + memory::Free(place_, cuda_ptr_); + } +#endif + } + + /* Get device vector */ + T *cuda_data() { + CopyToCUDA(); + PADDLE_ENFORCE_NOT_NULL( + cuda_ptr_, "No data or Insufficient CUDA memory to allocation"); + return static_cast(cuda_ptr_); + } + + /* Get host vector */ + T *data() { return std::vector::data(); } + const T *data() const { return std::vector::data(); } + + /* Synchronize host vector to device vector */ + void CopyToCUDA(); + /* Synchronize device vector to host vector */ + void CopyFromCUDA(); + /* Switch device vector location */ + void CopyToPeer(platform::Place); + + private: + void *cuda_ptr_ = nullptr; + size_t cuda_size_ = 0; // device vector numel + platform::CUDAPlace place_; +}; + +template +void Vector::CopyToCUDA() { +#ifdef PADDLE_WITH_CUDA + if (cuda_size_ < this->size()) { + if (cuda_ptr_ != nullptr) { + memory::Free(place_, cuda_ptr_); + } + cuda_ptr_ = + memory::Alloc(place_, this->size() * sizeof(T)); + } + cuda_size_ = this->size(); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *ctx = pool.GetByPlace(place_); + memory::Copy(place_, cuda_ptr_, platform::CPUPlace(), + static_cast(this->data()), + this->size() * sizeof(T), ctx->stream()); + ctx->Wait(); +#endif +} + +template +void Vector::CopyFromCUDA() { +#ifdef PADDLE_WITH_CUDA + if (cuda_ptr_ == nullptr) { + LOG(WARNING) << "No uncommitted cuda data."; + return; + } + this->resize(cuda_size_); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *ctx = pool.GetByPlace(place_); + memory::Copy(platform::CPUPlace(), static_cast(this->data()), place_, + static_cast(cuda_ptr_), this->size() * sizeof(T), + ctx->stream()); + ctx->Wait(); +#endif +} + +template +void Vector::CopyToPeer(platform::Place peer_place) { +#ifdef PADDLE_WITH_CUDA + auto *ctx = platform::DeviceContextPool::Instance().GetByPlace(place_); + void *peer_cuda_ptr = memory::Alloc( + boost::get(peer_place), this->size() * sizeof(T)); + memory::Copy(boost::get(peer_place), peer_cuda_ptr, + place_, cuda_ptr_, this->size() * sizeof(T), ctx->stream()); + ctx->Wait(); + + memory::Free(place_, cuda_ptr_); + place_ = boost::get(peer_place); + cuda_ptr_ = peer_cuda_ptr; +#endif +} + +template class Vector; +template class Vector; +template class Vector; +template class Vector; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 831b1e2a1e10777d9e89364adcd4b1f367e86080..4e854f54dd43d760bab44fb5f7cafeb13314b27c 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -22,9 +22,7 @@ limitations under the License. */ #include "paddle/framework/shape_inference.h" #include "paddle/framework/var_type.h" -DEFINE_bool(op_sync, false, - "Default cuda is asynchronous device, set to True will" - "force op run in synchronous mode."); +DECLARE_bool(benchmark); namespace paddle { namespace framework { @@ -531,7 +529,7 @@ void OperatorWithKernel::Run(const Scope& scope, ExecutionContext(*this, new_scope, *new_dev_ctx)); /*For profiling/benchmark only*/ - if (FLAGS_op_sync) { + if (FLAGS_benchmark) { new_dev_ctx->Wait(); } } diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc index b5d9e5e385c1ba57169ef885824fc23b0f130692..15ea4035c6e6193105b621210a900e74d1466941 100644 --- a/paddle/framework/program_desc.cc +++ b/paddle/framework/program_desc.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/framework/program_desc.h" #include "paddle/framework/block_desc.h" +#include "paddle/framework/feed_fetch_type.h" namespace paddle { namespace framework { @@ -64,5 +65,27 @@ ProgramDesc::ProgramDesc(const std::string &binary_str) { } } +const std::vector ProgramDesc::GetFeedTargetNames() { + BlockDesc *global_block = blocks_[0].get(); + std::vector feed_target_names; + for (auto *op : global_block->AllOps()) { + if (op->Type() == kFeedOpType) { + feed_target_names.insert(feed_target_names.begin(), op->Output("Out")[0]); + } + } + return feed_target_names; +} + +const std::vector ProgramDesc::GetFetchTargetNames() { + BlockDesc *global_block = blocks_[0].get(); + std::vector fetch_target_names; + for (auto *op : global_block->AllOps()) { + if (op->Type() == kFetchOpType) { + fetch_target_names.push_back(op->Input("X")[0]); + } + } + return fetch_target_names; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/program_desc.h b/paddle/framework/program_desc.h index 15a962bb696d6172acd1a83cf9bb1ffd0846d449..8e958eab6ee08436ca73b13bac010e66c7df2b8b 100644 --- a/paddle/framework/program_desc.h +++ b/paddle/framework/program_desc.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include "paddle/framework/block_desc.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/proto_desc.h" #include "paddle/platform/macros.h" @@ -45,6 +46,9 @@ class ProgramDesc { proto::ProgramDesc *Proto(); + const std::vector GetFeedTargetNames(); + const std::vector GetFetchTargetNames(); + private: proto::ProgramDesc desc_; diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index 25eb813ffb96e9b1e13299421ead9f85c02da59f..bff8e0bceaca9749101b2c45edddba526d565624 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include #include @@ -102,6 +103,32 @@ void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output, *op_field->Add() = input.blocks(block_id).ops(i); } } + + // remove the VarDescs in BlockDesc that are not referenced in + // the pruned OpDescs + std::unordered_map var_map; + auto* var_field = output->mutable_blocks(block_id)->mutable_vars(); + for (const auto& var : *var_field) { + var_map[var.name()] = var; + } + + var_field->Clear(); + for (const auto& op : *op_field) { + // add VarDescs of all input arguments for each OpDesc + auto& input_field = op.inputs(); + for (auto& input_var : input_field) { + for (auto& arg : input_var.arguments()) { + *var_field->Add() = var_map[arg]; + } + } + // add VarDescs of all output arguments for each OpDesc + auto& output_field = op.outputs(); + for (auto& output_var : output_field) { + for (auto& arg : output_var.arguments()) { + *var_field->Add() = var_map[arg]; + } + } + } } // TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index a67ff910093d93060d07d849f6e968e5f4ce21cd..af08b2ab816f63c05d4c65df9601c787e57994f5 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -20,9 +20,11 @@ limitations under the License. */ #include "paddle/framework/threadpool.h" #include "paddle/string/printf.h" -DEFINE_bool(do_memory_benchmark, false, +DEFINE_bool(benchmark, false, "Doing memory benchmark. It will make deleting scope synchronized, " - "and add some memory usage logs"); + "and add some memory usage logs." + "Default cuda is asynchronous device, set to True will" + "force op run in synchronous mode."); namespace paddle { namespace framework { @@ -93,7 +95,7 @@ void Scope::DeleteScope(Scope* scope) { PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. - if (FLAGS_do_memory_benchmark) { + if (FLAGS_benchmark) { delete scope; } else { Async([scope] { delete scope; }); diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 4aaa29d794c95592832a1fe990e2dce274eba9d5..f0ea709a5c37e769e3ffa1b2e9d1e39721979251 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -47,6 +47,11 @@ class Tensor { public: Tensor() : offset_(0) {} + /*! Constructor with place should only be used in pybind. */ + explicit Tensor(const platform::Place& place) : offset_(0) { + holder_->set_place(place); + } + /*! Return a pointer to mutable memory block. */ template inline T* data(); @@ -137,6 +142,7 @@ class Tensor { virtual std::type_index type() const = 0; virtual platform::Place place() const = 0; virtual void set_type(std::type_index type) = 0; + virtual void set_place(platform::Place place) = 0; }; template @@ -156,6 +162,7 @@ class Tensor { virtual void* ptr() const { return static_cast(ptr_.get()); } virtual std::type_index type() const { return type_; } virtual void set_type(std::type_index type) { type_ = type; } + virtual void set_place(platform::Place place) { place_ = place; } /*! the pointer of memory block. */ std::unique_ptr> ptr_; diff --git a/paddle/framework/threadpool.cc b/paddle/framework/threadpool.cc index b2f5ae4a96593fde1623dd10d3b63c984ae228db..b7d7c00bcf9d9770f58284023ca2defcda299d64 100644 --- a/paddle/framework/threadpool.cc +++ b/paddle/framework/threadpool.cc @@ -14,6 +14,8 @@ #include "paddle/framework/threadpool.h" +#include "paddle/platform/enforce.h" + namespace paddle { namespace framework { diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h index 8912b1a43a26f9df662d3b5ddf68bfb2b87f4a20..4e9b58679d9e7c84adf76b6245b397c7a8872483 100644 --- a/paddle/framework/threadpool.h +++ b/paddle/framework/threadpool.h @@ -22,7 +22,7 @@ limitations under the License. */ #include #include -#include "paddle/platform/enforce.h" +#include "paddle/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN namespace paddle { namespace framework { diff --git a/paddle/function/GemmConvOp.cpp b/paddle/function/GemmConvOp.cpp index cbdbf5335d32d55a0221728758025c9d2cb3e7d1..a9876cec2aabf7d116443b685391ee9d20bc1370 100644 --- a/paddle/function/GemmConvOp.cpp +++ b/paddle/function/GemmConvOp.cpp @@ -178,19 +178,22 @@ public: real* inputData = inputs[0].data(); real* filterData = inputs[1].data(); real* outputData = outputs[0].data(); + real* colData = NULL; bool needIm2col = isNeedIm2col(filter); TensorShape imShape = TensorShape({inputChannels / groups_, inputHeight, inputWidth}); - TensorShape colShape; - real* colData = NULL; - size_t colHeight = inputChannels / groups_ * filterHeight * filterWidth; - size_t colWidth = outputHeight * outputWidth; - // Max col matrix height 256, Max col matrix width 1024 - size_t stepColHeight = std::min(colHeight, static_cast(256)); - size_t stepColWidth = std::min(colWidth, static_cast(2048)); + // Max col matrix width 4096, Max col matrix size 4M. + size_t outputHeightSteps = + std::min(std::max(4096 / outputWidth, (size_t)1), outputHeight); + size_t maxColWidth = outputHeightSteps * outputWidth; + size_t channelSteps = + std::min(std::max((1048576 / maxColWidth) / filterHeight * filterWidth, + (size_t)1), + inputChannels / groups_); + size_t maxColHeight = channelSteps * filterHeight * filterWidth; if (needIm2col) { colShape = TensorShape({inputChannels / groups_, @@ -199,7 +202,7 @@ public: outputHeight, outputWidth}); - resizeBuffer(stepColHeight * stepColWidth * sizeof(real)); + resizeBuffer(maxColHeight * maxColWidth * sizeof(real)); colData = reinterpret_cast(memory_->getBuf()); } @@ -209,20 +212,24 @@ public: (outputChannels / groups_) * outputHeight * outputWidth; size_t filterOffset = filter.getElements() / groups_; - int nStride = colWidth; - int kStride = colHeight; + int nStride = outputHeight * outputWidth; + int kStride = inputChannels / groups_ * filterHeight * filterWidth; for (size_t i = 0; i < batchSize; i++) { + filterData = inputs[1].data(); for (size_t g = 0; g < groups_; g++) { if (needIm2col) { real beta_ = beta; - for (size_t colHeightStart = 0; colHeightStart < colHeight; - colHeightStart += stepColHeight) { - for (size_t colWidthStart = 0; colWidthStart < colWidth; - colWidthStart += stepColWidth) { - int N = std::min(colWidth - colWidthStart, stepColWidth); - int K = std::min(colHeight - colHeightStart, stepColHeight); + for (size_t ic = 0; ic < inputChannels / groups_; + ic += channelSteps) { + int channels = std::min(inputChannels / groups_ - ic, channelSteps); + for (size_t oh = 0; oh < outputHeight; oh += outputHeightSteps) { + int height = std::min(outputHeight - oh, outputHeightSteps); + + int M = outputChannels / groups_; + int N = height * outputWidth; + int K = channels * filterHeight * filterWidth; // im2col - im2col(inputData + g * inputOffset, + im2col(inputData, imShape, colData, colShape, @@ -232,13 +239,12 @@ public: paddingW(), dilationH(), dilationW(), - colHeightStart, - K, - colWidthStart, + channels, + oh, + height, N); // gemm - int M = outputChannels / groups_; BlasGemm::compute( false, false, @@ -246,12 +252,12 @@ public: N, K, 1.0f, - filterData + g * filterOffset + colHeightStart, + filterData + ic * filterHeight * filterWidth, kStride, colData, N, beta_, - outputData + g * outputOffset + colWidthStart, + outputData + oh * outputWidth, nStride); } beta_ = 1.0; @@ -266,17 +272,18 @@ public: N, K, 1.0f, - filterData + g * filterOffset, + filterData, K, - inputData + g * inputOffset, + inputData, N, beta, - outputData + g * outputOffset, + outputData, N); } + inputData += inputOffset; + outputData += outputOffset; + filterData += filterOffset; } - inputData += inputChannels * inputHeight * inputWidth; - outputData += outputChannels * outputHeight * outputWidth; } memory_.reset(); diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h index 36a9bcf84e4b14965c83627821b71d1c7c0da1b2..915119e291caaa223249cf8e37078723621517b0 100644 --- a/paddle/function/Im2Col.h +++ b/paddle/function/Im2Col.h @@ -111,39 +111,42 @@ public: int paddingWidth, int dilationHeight, int dilationWidth, - int colHeightStart, - int colHeightSize, - int colWidthStart, - int colWidthSize) { + int inputChannels, + int colOffset, + int colOutputHeight, + int colWidth) { int inputHeight = imShape[1]; int inputWidth = imShape[2]; int filterHeight = colShape[1]; int filterWidth = colShape[2]; int outputWidth = colShape[4]; - for (int colh = 0; colh < colHeightSize; colh++) { - int wOffset = (colHeightStart + colh) % filterWidth; - int hOffset = ((colHeightStart + colh) / filterWidth) % filterHeight; - int c_im = (colHeightStart + colh) / filterWidth / filterHeight; - - for (int colw = 0; colw < colWidthSize; colw++) { - int h = (colWidthStart + colw) / outputWidth; - int w = (colWidthStart + colw) % outputWidth; - - int imRowIdx = h * strideHeight + hOffset * dilationHeight; - int imColIdx = w * strideWidth + wOffset * dilationWidth; - if ((imRowIdx - paddingHeight) < 0 || - (imRowIdx - paddingHeight) >= inputHeight || - (imColIdx - paddingWidth) < 0 || - (imColIdx - paddingWidth) >= inputWidth) { - colData[colh * colWidthSize + colw] = static_cast(0); - } else { - imRowIdx += c_im * inputHeight - paddingHeight; - imColIdx -= paddingWidth; - colData[colh * colWidthSize + colw] = - imData[imRowIdx * inputWidth + imColIdx]; + for (int ic = 0; ic < inputChannels; ic++) { + for (int oh = 0; oh < colOutputHeight; oh++) { + T* dstData = colData + oh * outputWidth; + for (int fh = 0; fh < filterHeight; fh++) { + for (int fw = 0; fw < filterWidth; fw++) { + int imRowIdx = (oh + colOffset) * strideHeight + + fh * dilationHeight - paddingHeight; + if (imRowIdx < 0 || imRowIdx >= inputHeight) { + memset(dstData, 0, outputWidth * sizeof(T)); + } else { + for (int ow = 0; ow < outputWidth; ow++) { + int imColIdx = + ow * strideWidth + fw * dilationWidth - paddingWidth; + if (imColIdx < 0 || imColIdx >= inputWidth) { + dstData[ow] = T(0); + } else { + dstData[ow] = imData[imRowIdx * inputWidth + imColIdx]; + } + } + } + dstData += colWidth; + } } } + colData += filterHeight * filterWidth * colWidth; + imData += inputHeight * inputWidth; } } }; diff --git a/paddle/function/Im2ColTest.cpp b/paddle/function/Im2ColTest.cpp index 3ba866dcdd845403d52f7a85adfef08cbb11c305..fe44a8bf79005efb87c56f6a79f46421129bab22 100644 --- a/paddle/function/Im2ColTest.cpp +++ b/paddle/function/Im2ColTest.cpp @@ -202,10 +202,10 @@ void TestIm2ColMobileFunctor() { padding, dilation, dilation, + channels, 0, - height, - 0, - width); + outputHeight, + outputHeight * outputWidth); autotest::TensorCheckEqual(*output1, *output2); } diff --git a/paddle/inference/CMakeLists.txt b/paddle/inference/CMakeLists.txt index ae4d3fd2f58daf87a650428e04722581610ed780..2289ddc139cbddfbaa5238e683b2f8e784a7291e 100644 --- a/paddle/inference/CMakeLists.txt +++ b/paddle/inference/CMakeLists.txt @@ -1,14 +1,14 @@ -set(FLUID_CORE_MODULES proto_desc paddle_memory executor prune init) +set(FLUID_CORE_MODULES proto_desc paddle_memory lod_tensor executor prune init) cc_library(paddle_fluid_api - SRCS inference.cc + SRCS io.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) # Merge all modules into a single static library cc_library(paddle_fluid DEPS paddle_fluid_api ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) # Create shared library -add_library(paddle_fluid_shared SHARED inference.cc) +add_library(paddle_fluid_shared SHARED io.cc) target_circle_link_libraries(paddle_fluid_shared ARCHIVE_START @@ -20,23 +20,10 @@ SET_TARGET_PROPERTIES(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) # install library & headers if(NOT WITH_C_API AND WITH_FLUID) - install(FILES inference.h DESTINATION include/paddle/inference) + install(FILES io.h DESTINATION include/paddle/inference) install(TARGETS paddle_fluid_shared DESTINATION lib) endif() -add_executable(example example.cc) -if(APPLE) - set(OPTIONAL_LINK_FLAGS) - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") - set(OPTIONAL_LINK_FLAGS "-undefined dynamic_lookup") - endif() - target_link_libraries(example - -Wl,-force_load paddle_fluid - ${OPTIONAL_LINK_FLAGS} - ${PTOOLS_LIB}) -else() - target_link_libraries(example - -Wl,--start-group -Wl,--whole-archive paddle_fluid - -Wl,--no-whole-archive -Wl,--end-group - ${PTOOLS_LIB}) +if(WITH_TESTING) + add_subdirectory(tests/book) endif() diff --git a/paddle/inference/example.cc b/paddle/inference/example.cc deleted file mode 100644 index 0c18b45624dedcb5839d4b771e044b4a7b32af52..0000000000000000000000000000000000000000 --- a/paddle/inference/example.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "gflags/gflags.h" -#include "paddle/inference/inference.h" - -DEFINE_string(dirname, "", "Directory of the inference model."); - -int main(int argc, char** argv) { - google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_dirname.empty()) { - // Example: - // ./example --dirname=recognize_digits_mlp.inference.model - std::cout << "Usage: ./example --dirname=path/to/your/model" << std::endl; - exit(1); - } - - std::cout << "FLAGS_dirname: " << FLAGS_dirname << std::endl; - std::string dirname = FLAGS_dirname; - - paddle::InferenceEngine* engine = new paddle::InferenceEngine(); - engine->LoadInferenceModel(dirname); - - paddle::framework::LoDTensor input; - srand(time(0)); - float* input_ptr = - input.mutable_data({1, 784}, paddle::platform::CPUPlace()); - for (int i = 0; i < 784; ++i) { - input_ptr[i] = rand() / (static_cast(RAND_MAX)); - } - - std::vector feeds; - feeds.push_back(input); - std::vector fetchs; - engine->Execute(feeds, fetchs); - - for (size_t i = 0; i < fetchs.size(); ++i) { - auto dims_i = fetchs[i].dims(); - std::cout << "dims_i:"; - for (int j = 0; j < dims_i.size(); ++j) { - std::cout << " " << dims_i[j]; - } - std::cout << std::endl; - std::cout << "result:"; - float* output_ptr = fetchs[i].data(); - for (int j = 0; j < paddle::framework::product(dims_i); ++j) { - std::cout << " " << output_ptr[j]; - } - std::cout << std::endl; - } - - delete engine; - return 0; -} diff --git a/paddle/inference/inference.cc b/paddle/inference/inference.cc deleted file mode 100644 index b43c359ed1787143403336e8c1cb4c7f85b1d7a2..0000000000000000000000000000000000000000 --- a/paddle/inference/inference.cc +++ /dev/null @@ -1,187 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "inference.h" -#include -#include "paddle/framework/executor.h" -#include "paddle/framework/init.h" -#include "paddle/framework/scope.h" - -namespace paddle { - -void InferenceEngine::LoadInferenceModel(const std::string& dirname) { - std::string model_filename = dirname + "/__model__"; - LOG(INFO) << "loading model from " << model_filename; - std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); - std::string program_desc_str; - inputfs.seekg(0, std::ios::end); - program_desc_str.resize(inputfs.tellg()); - inputfs.seekg(0, std::ios::beg); - LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); - inputfs.read(&program_desc_str[0], program_desc_str.size()); - inputfs.close(); - - program_ = new framework::ProgramDesc(program_desc_str); - GenerateLoadProgram(dirname); - - framework::BlockDesc* global_block = program_->MutableBlock(0); - feed_var_names_.clear(); - fetch_var_names_.clear(); - for (auto* op : global_block->AllOps()) { - if (op->Type() == "feed") { - feed_var_names_.insert(feed_var_names_.begin(), op->Output("Out")[0]); - } else if (op->Type() == "fetch") { - fetch_var_names_.push_back(op->Input("X")[0]); - } - } -} - -bool InferenceEngine::IsParameter(const framework::VarDesc* var) { - if (var->Persistable()) { - // There are many unreachable variables in the program - for (size_t i = 0; i < program_->Size(); ++i) { - const framework::BlockDesc& block = program_->Block(i); - for (auto* op : block.AllOps()) { - if (op->Type() == "feed") { - continue; - } - for (auto input_argument_name : op->InputArgumentNames()) { - if (input_argument_name == var->Name()) { - return true; - } - } - } - } - } - return false; -} - -void InferenceEngine::GenerateLoadProgram(const std::string& dirname) { - framework::BlockDesc* global_block = program_->MutableBlock(0); - - load_program_ = new framework::ProgramDesc(); - framework::BlockDesc* load_block = load_program_->MutableBlock(0); - for (auto* var : global_block->AllVars()) { - if (IsParameter(var)) { - LOG(INFO) << "parameter's name: " << var->Name(); - - framework::VarDesc* new_var = load_block->Var(var->Name()); - new_var->SetShape(var->Shape()); - new_var->SetDataType(var->GetDataType()); - new_var->SetType(var->GetType()); - new_var->SetLoDLevel(var->GetLoDLevel()); - new_var->SetPersistable(true); - - // append_op - framework::OpDesc* op = load_block->AppendOp(); - op->SetType("load"); - op->SetOutput("Out", {new_var->Name()}); - op->SetAttr("file_path", {dirname + "/" + new_var->Name()}); - op->CheckAttrs(); - } - } -} - -void InferenceEngine::PrependFeedOp() { - if (!program_) { - LOG(FATAL) << "Please initialize the program_ first."; - } - - framework::BlockDesc* global_block = program_->MutableBlock(0); - - // create_var - framework::VarDesc* feed_var = global_block->Var("feed"); - feed_var->SetType(framework::proto::VarDesc::FEED_MINIBATCH); - feed_var->SetPersistable(true); - - // prepend feed_op - for (size_t i = 0; i < feed_var_names_.size(); ++i) { - std::string var_name = feed_var_names_[i]; - LOG(INFO) << "feed var's name: " << var_name; - - // prepend_op - framework::OpDesc* op = global_block->PrependOp(); - op->SetType("feed"); - op->SetInput("X", {"feed"}); - op->SetOutput("Out", {var_name}); - op->SetAttr("col", {static_cast(i)}); - op->CheckAttrs(); - } -} - -void InferenceEngine::AppendFetchOp() { - if (!program_) { - LOG(FATAL) << "Please initialize the program_ first."; - } - - framework::BlockDesc* global_block = program_->MutableBlock(0); - - // create_var - framework::VarDesc* fetch_var = global_block->Var("fetch"); - fetch_var->SetType(framework::proto::VarDesc::FETCH_LIST); - fetch_var->SetPersistable(true); - - // append fetch_op - for (size_t i = 0; i < fetch_var_names_.size(); ++i) { - std::string var_name = fetch_var_names_[i]; - LOG(INFO) << "fetch var's name: " << var_name; - - // append_op - framework::OpDesc* op = global_block->AppendOp(); - op->SetType("fetch"); - op->SetInput("X", {var_name}); - op->SetOutput("Out", {"fetch"}); - op->SetAttr("col", {static_cast(i)}); - op->CheckAttrs(); - } -} - -void InferenceEngine::Execute(const std::vector& feeds, - std::vector& fetchs) { - if (!program_ || !load_program_) { - LOG(FATAL) << "Please initialize the program_ and load_program_ first."; - } - - if (feeds.size() != feed_var_names_.size()) { - LOG(FATAL) << "Please feed " << feed_var_names_.size() << " input Tensors."; - } - - auto* place = new platform::CPUPlace(); - framework::InitDevices(); - framework::Executor* executor = new framework::Executor(*place); - framework::Scope* scope = new framework::Scope(); - - executor->Run(*load_program_, scope, 0, true, true); - - std::map feed_targets; - std::map fetch_targets; - - // set_feed_variable - for (size_t i = 0; i < feed_var_names_.size(); ++i) { - feed_targets[feed_var_names_[i]] = &feeds[i]; - } - - // get_fetch_variable - fetchs.resize(fetch_var_names_.size()); - for (size_t i = 0; i < fetch_var_names_.size(); ++i) { - fetch_targets[fetch_var_names_[i]] = &fetchs[i]; - } - - executor->Run(*program_, scope, feed_targets, fetch_targets); - - delete place; - delete scope; - delete executor; -} -} // namespace paddle diff --git a/paddle/inference/inference.h b/paddle/inference/inference.h deleted file mode 100644 index 26f259824b945e260b370ced9d065842264075d5..0000000000000000000000000000000000000000 --- a/paddle/inference/inference.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/framework/block_desc.h" -#include "paddle/framework/lod_tensor.h" -#include "paddle/framework/program_desc.h" - -namespace paddle { - -class InferenceEngine { -public: - InferenceEngine() : program_(nullptr), load_program_(nullptr) {} - ~InferenceEngine() { - delete program_; - delete load_program_; - } - - void LoadInferenceModel(const std::string& dirname); - void Execute(const std::vector& feeds, - std::vector& fetchs); - -private: - bool IsParameter(const framework::VarDesc* var); - void GenerateLoadProgram(const std::string& dirname); - void PrependFeedOp(); - void AppendFetchOp(); - -private: - framework::ProgramDesc* program_; - framework::ProgramDesc* load_program_; - std::vector feed_var_names_; - std::vector fetch_var_names_; -}; - -} // namespace paddle diff --git a/paddle/inference/io.cc b/paddle/inference/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..60ad7af1c0a469beb6a07bf057a8647fcb98cca8 --- /dev/null +++ b/paddle/inference/io.cc @@ -0,0 +1,98 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/inference/io.h" + +#include +#include "paddle/framework/block_desc.h" +#include "paddle/framework/feed_fetch_type.h" + +namespace paddle { +namespace inference { + +bool IsParameter(const framework::VarDesc* var, + const framework::ProgramDesc& main_program) { + if (var->Persistable()) { + // There are many unreachable variables in the program + for (size_t i = 0; i < main_program.Size(); ++i) { + const framework::BlockDesc& block = main_program.Block(i); + for (auto* op : block.AllOps()) { + if (op->Type() == framework::kFeedOpType) { + continue; + } + for (auto input_argument_name : op->InputArgumentNames()) { + if (input_argument_name == var->Name()) { + return true; + } + } + } + } + } + return false; +} + +void LoadPersistables(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname, + const framework::ProgramDesc& main_program) { + const framework::BlockDesc& global_block = main_program.Block(0); + + framework::ProgramDesc* load_program = new framework::ProgramDesc(); + framework::BlockDesc* load_block = load_program->MutableBlock(0); + for (auto* var : global_block.AllVars()) { + if (IsParameter(var, main_program)) { + VLOG(3) << "parameter's name: " << var->Name(); + + framework::VarDesc* new_var = load_block->Var(var->Name()); + new_var->SetShape(var->Shape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + + // append_op + framework::OpDesc* op = load_block->AppendOp(); + op->SetType("load"); + op->SetOutput("Out", {new_var->Name()}); + op->SetAttr("file_path", {dirname + "/" + new_var->Name()}); + op->CheckAttrs(); + } + } + executor.Run(*load_program, &scope, 0, true, true); + delete load_program; +} + +std::unique_ptr Load(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname) { + std::string model_filename = dirname + "/__model__"; + LOG(INFO) << "loading model from " << model_filename; + std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); + std::string program_desc_str; + inputfs.seekg(0, std::ios::end); + program_desc_str.resize(inputfs.tellg()); + inputfs.seekg(0, std::ios::beg); + LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); + inputfs.read(&program_desc_str[0], program_desc_str.size()); + inputfs.close(); + + std::unique_ptr main_program( + new framework::ProgramDesc(program_desc_str)); + + LoadPersistables(executor, scope, dirname, *main_program); + return main_program; +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/inference/io.h b/paddle/inference/io.h new file mode 100644 index 0000000000000000000000000000000000000000..962b6c4e20d30de3cc28eae1c8c5c33b3ab5f6ac --- /dev/null +++ b/paddle/inference/io.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/framework/executor.h" +#include "paddle/framework/program_desc.h" +#include "paddle/framework/scope.h" + +namespace paddle { +namespace inference { + +void LoadPersistables(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname, + const framework::ProgramDesc& main_program); + +std::unique_ptr Load(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname); + +} // namespace inference +} // namespace paddle diff --git a/paddle/inference/tests/book/CMakeLists.txt b/paddle/inference/tests/book/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3798fb8fd8769aef5940d4ce724cb0cc8686422 --- /dev/null +++ b/paddle/inference/tests/book/CMakeLists.txt @@ -0,0 +1,7 @@ +set(PYTHON_TESTS_DIR ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/tests) +cc_test(test_inference_recognize_digits_mlp + SRCS test_inference_recognize_digits.cc + DEPS ARCHIVE_START paddle_fluid ARCHIVE_END + ARGS --dirname=${PYTHON_TESTS_DIR}/book/recognize_digits_mlp.inference.model) +set_tests_properties(test_inference_recognize_digits_mlp + PROPERTIES DEPENDS test_recognize_digits_mlp_cpu) diff --git a/paddle/inference/tests/book/test_inference_recognize_digits.cc b/paddle/inference/tests/book/test_inference_recognize_digits.cc new file mode 100644 index 0000000000000000000000000000000000000000..26dc2aee04261d9a1fd29b4d75bfacc7870c09d8 --- /dev/null +++ b/paddle/inference/tests/book/test_inference_recognize_digits.cc @@ -0,0 +1,113 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "gflags/gflags.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/inference/io.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); + +template +void TestInference(const std::string& dirname, + const std::vector& cpu_feeds, + std::vector& cpu_fetchs) { + // 1. Define place, executor and scope + auto place = Place(); + auto executor = paddle::framework::Executor(place); + auto* scope = new paddle::framework::Scope(); + + // 2. Initialize the inference_program and load all parameters from file + auto inference_program = paddle::inference::Load(executor, *scope, dirname); + + // 3. Get the feed_target_names and fetch_target_names + const std::vector& feed_target_names = + inference_program->GetFeedTargetNames(); + const std::vector& fetch_target_names = + inference_program->GetFetchTargetNames(); + + // 4. Prepare inputs: set up maps for feed targets + std::map feed_targets; + for (size_t i = 0; i < feed_target_names.size(); ++i) { + // Please make sure that cpu_feeds[i] is right for feed_target_names[i] + feed_targets[feed_target_names[i]] = cpu_feeds[i]; + } + + // 5. Define Tensor to get the outputs: set up maps for fetch targets + std::map fetch_targets; + for (size_t i = 0; i < fetch_target_names.size(); ++i) { + fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; + } + + // 6. Run the inference program + executor.Run(*inference_program, scope, feed_targets, fetch_targets); + + delete scope; +} + +TEST(inference, recognize_digits) { + if (FLAGS_dirname.empty()) { + LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; + } + + LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; + std::string dirname = FLAGS_dirname; + + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // In unittests, this is done in paddle/testing/paddle_gtest_main.cc + + paddle::framework::LoDTensor input; + srand(time(0)); + float* input_ptr = + input.mutable_data({1, 28, 28}, paddle::platform::CPUPlace()); + for (int i = 0; i < 784; ++i) { + input_ptr[i] = rand() / (static_cast(RAND_MAX)); + } + std::vector cpu_feeds; + cpu_feeds.push_back(&input); + + paddle::framework::LoDTensor output1; + std::vector cpu_fetchs1; + cpu_fetchs1.push_back(&output1); + + // Run inference on CPU + TestInference( + dirname, cpu_feeds, cpu_fetchs1); + LOG(INFO) << output1.dims(); + +#ifdef PADDLE_WITH_CUDA + paddle::framework::LoDTensor output2; + std::vector cpu_fetchs2; + cpu_fetchs2.push_back(&output2); + + // Run inference on CUDA GPU + TestInference( + dirname, cpu_feeds, cpu_fetchs2); + LOG(INFO) << output2.dims(); + + EXPECT_EQ(output1.dims(), output2.dims()); + EXPECT_EQ(output1.numel(), output2.numel()); + + float err = 1E-3; + int count = 0; + for (int64_t i = 0; i < output1.numel(); ++i) { + if (fabs(output1.data()[i] - output2.data()[i]) > err) { + count++; + } + } + EXPECT_EQ(count, 0) << "There are " << count << " different elements."; +#endif +} diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 1ec4336cabbc7d3073b7638b7484bf61e83a2dc5..cc86b12be08ba987f9682ebf3fda56c2f07fb576 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -2015,13 +2015,6 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, CHECK_EQ(channels * outLength, maskMatP->getWidth()); } - /* initialize the data_ */ - for (size_t i = 0; i < height_; i++) { - for (size_t j = 0; j < width_; j++) { - outData[i * outStride + j] = -(real)FLT_MAX; - } - } - /* pool max one by one */ for (size_t n = 0; n < num; ++n) { // frame by frame if (!isContiguous()) { @@ -2030,19 +2023,24 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t ph = 0; ph < outputH; ++ph) { int hstart = ph * strideH - paddingH; - int hend = std::min(hstart + sizeY, imgSizeH); - hstart = std::max(hstart, 0); + int hend = hstart + sizeY; + hstart = hstart < 0 ? 0 : hstart; + hend = hend < (int)imgSizeH ? hend : (int)imgSizeH; for (size_t pw = 0; pw < outputW; ++pw) { int wstart = pw * strideW - paddingW; - int wend = std::min(wstart + sizeX, imgSizeW); - wstart = std::max(wstart, 0); + int wend = wstart + sizeX; + wstart = wstart < 0 ? 0 : wstart; + wend = wend < (int)imgSizeW ? wend : (int)imgSizeW; if (maskData == NULL) { + real tmp = -(real)FLT_MAX; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - outData[ph * outputW + pw] = std::max( - outData[ph * outputW + pw], inputData[h * imgSizeW + w]); + tmp = tmp < inputData[h * imgSizeW + w] + ? inputData[h * imgSizeW + w] + : tmp; } } + outData[ph * outputW + pw] = tmp; } else { for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 48cf5816cce4bb5ee8e66e72c5b1acea8535ab10..e903f43ba69ee9e28b3a03e8921a41ffa81a2542 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -122,9 +122,11 @@ if(WITH_DISTRIBUTE) set_source_files_properties(send_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) op_library(recv_op DEPS ${DISTRIBUTE_DEPS}) set_source_files_properties(recv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor) + op_library(listen_and_serv_op DEPS ${DISTRIBUTE_DEPS}) + set_source_files_properties(listen_and_serv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op listen_and_serv_op sum_op executor) else() - set(DEPS_OPS ${DEPS_OPS} send_op recv_op) + set(DEPS_OPS ${DEPS_OPS} send_op recv_op listen_and_serv_op) endif() op_library(cond_op DEPS framework_proto tensor net_op) @@ -173,6 +175,8 @@ endif() # FIXME(typhoonzero): save/load depends lodtensor serialization functions op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) +op_library(save_combine_op DEPS lod_tensor) +op_library(load_combine_op DEPS lod_tensor) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) @@ -192,3 +196,4 @@ if(WITH_GPU) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) +cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu index 4e579387924a5b0499f29609bc6b1322030a3c0d..00cb6e9cafb4e79ed3d59cd4a6e40ea132e5efda 100644 --- a/paddle/operators/adagrad_op.cu +++ b/paddle/operators/adagrad_op.cu @@ -82,7 +82,7 @@ struct SparseAdagradFunctor { math::scatter::MergeAdd merge_func; auto grad_merge = merge_func(context, grad); auto* grad_merge_data = grad_merge.mutable_value()->template data(); - auto& merge_rows = grad_merge.rows(); + framework::Vector merge_rows(grad_merge.rows()); // 2. m += g_m * g_m math::scatter::Mul sqare_func; auto grad_square = sqare_func(context, grad_merge, grad_merge); @@ -101,8 +101,8 @@ struct SparseAdagradFunctor { SparseAdagradFunctorKernel< T, 256><<(context) - .stream()>>>(grad_merge_data, grad_merge.rows().data(), - lr, param_data, moment_data, grad_width, + .stream()>>>(grad_merge_data, merge_rows.cuda_data(), lr, + param_data, moment_data, grad_width, epsilon); } }; diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h index 9cc34bdded780e61e8700eb4fa4a295c84fb48bc..bf536687d398b8342e6ae76a07c11e5fe47483e0 100644 --- a/paddle/operators/adam_op.h +++ b/paddle/operators/adam_op.h @@ -199,7 +199,12 @@ class AdamOpKernel : public framework::OpKernel { merge_func(ctx.template device_context(), grad); auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); - auto* rows = grad_merge.rows().data(); + int64_t* rows = nullptr; + if (platform::is_gpu_place(ctx.GetPlace())) { + rows = grad_merge.mutable_rows()->cuda_data(); + } else { + rows = grad_merge.mutable_rows()->data(); + } auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); SparseAdamFunctor functor( diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu index 45635f16745346b08f7e31db2f25905bdbc3aeeb..2a970cd9fa965b4126356eaa1519068f9c7a7f34 100644 --- a/paddle/operators/ctc_align_op.cu +++ b/paddle/operators/ctc_align_op.cu @@ -69,12 +69,11 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { auto stream = ctx.cuda_device_context().stream(); MergeAndDelCudaKernel<<<1, 1, 0, stream>>>( - num_tokens, tokens, num_seq, input_lod[level].data(), blank, + num_tokens, tokens, num_seq, input_lod[level].cuda_data(), blank, merge_repeated, dev_out_lod0_ptr, output_data); // set output lod - thrust::host_vector host_out_lod0(dev_out_lod0.begin(), - dev_out_lod0.end()); + std::vector host_out_lod0(dev_out_lod0.begin(), dev_out_lod0.end()); framework::LoD out_lod; out_lod.push_back(host_out_lod0); output->set_lod(out_lod); diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index 35cb18797ff66cb87a6658e73ce02b0bfae29baa..5274aa204e6629c9c5ea850c433e0948c89015bd 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -51,6 +51,13 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { "'dropout_prob' must be between 0.0 and 1.0."); }); AddAttr("is_test", "True if in test phase.").SetDefault(false); + AddAttr("fix_seed", + "A flag indicating whether to use a fixed seed to generate " + "random mask. NOTE: DO NOT set this flag to true in " + "training. Setting this flag to true is only useful in " + "unittest or for debug that always the same output units " + "will be dropped.") + .SetDefault(false); AddAttr("seed", "Dropout random seed.").SetDefault(0); AddComment(R"DOC( diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index c56930336e865079f1b96df0f35b0a051fe63a27..84d78445a4fa340ba3c066bb48b96b2a890db652 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -62,7 +62,11 @@ class GPUDropoutKernel : public framework::OpKernel { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int size = framework::product(mask->dims()); - int seed = context.Attr("seed"); + + std::random_device rnd; + int seed = + context.Attr("fix_seed") ? context.Attr("seed") : rnd(); + thrust::counting_iterator index_sequence_begin(0); thrust::transform(index_sequence_begin, index_sequence_begin + size, thrust::device_ptr(mask_data), diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index c90b8d277eb78048c001d36a367287146b51c636..46e5dbc64ff9ad3d04a9c1c07f4226932f661baf 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -38,9 +38,15 @@ class CPUDropoutKernel : public framework::OpKernel { if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); - int seed = context.Attr("seed"); + + // NOTE: fixed seed should only be used in unittest or for debug. + // Guarantee to use random seed in training. + std::random_device rnd; std::minstd_rand engine; + int seed = + context.Attr("fix_seed") ? context.Attr("seed") : rnd(); engine.seed(seed); + std::uniform_real_distribution dist(0, 1); size_t size = framework::product(mask->dims()); for (size_t i = 0; i < size; ++i) { diff --git a/paddle/operators/elementwise_pow_op.cc b/paddle/operators/elementwise_pow_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5293cc7dd34ccee860c50e964516da9b4d42d29c --- /dev/null +++ b/paddle/operators/elementwise_pow_op.cc @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/elementwise_pow_op.h" +#include "paddle/operators/elementwise_op.h" + +namespace paddle { +namespace operators { +class ElementwisePowOpMaker : public ElementwiseOpMaker { + public: + ElementwisePowOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : ElementwiseOpMaker(proto, op_checker) { + SetComment("Pow", "Out = X ^ Y"); + AddComment(comment_); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(elementwise_pow, ops::ElementwiseOp, + ops::ElementwisePowOpMaker); +REGISTER_OP_CPU_KERNEL( + elementwise_pow, + ops::ElementwisePowKernel, + ops::ElementwisePowKernel); diff --git a/paddle/operators/elementwise_pow_op.cu b/paddle/operators/elementwise_pow_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..643c978e635bc8e9671b47774c2eac5b713f59c2 --- /dev/null +++ b/paddle/operators/elementwise_pow_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/elementwise_pow_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_CUDA_KERNEL( + elementwise_pow, + ops::ElementwisePowKernel, + ops::ElementwisePowKernel); diff --git a/paddle/operators/elementwise_pow_op.h b/paddle/operators/elementwise_pow_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6019e709e0db0fd62b4d3350bb768095f87ef241 --- /dev/null +++ b/paddle/operators/elementwise_pow_op.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/operators/elementwise_op_function.h" + +namespace paddle { +namespace operators { + +template +struct PowFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { return std::pow(a, b); } +}; + +template +class ElementwisePowKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + ElementwiseComputeEx, DeviceContext, T>(ctx); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc index d738e1850ca4f658f4fca5c9bf643c44f676cce9..789d01e0022b5c36957f295265a9dc42649b310f 100644 --- a/paddle/operators/feed_op.cc +++ b/paddle/operators/feed_op.cc @@ -52,7 +52,11 @@ class FeedOp : public framework::OperatorBase { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); - framework::Copy(feed_item, place, dev_ctx, out_item); + if (platform::is_same_place(feed_item.place(), place)) { + out_item->ShareDataWith(feed_item); + } else { + framework::Copy(feed_item, place, dev_ctx, out_item); + } out_item->set_lod(feed_item.lod()); } }; diff --git a/paddle/operators/gru_op.h b/paddle/operators/gru_op.h index b1957fb9ce6add8628cb206abf2c569d3f615c85..a08bd4233b02d021aaa64bafe4b855f11a60d338 100644 --- a/paddle/operators/gru_op.h +++ b/paddle/operators/gru_op.h @@ -30,11 +30,12 @@ using Tensor = framework::Tensor; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, const size_t* index, + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index, *dst, indexed_src); + row_shuffle(ctx, src, index_lod, *dst, indexed_src); } template @@ -76,7 +77,9 @@ class GRUKernel : public framework::OpKernel { gru_value.state_weight = const_cast(weight_data + 2 * frame_size * frame_size); Tensor ordered_h0; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (h0) { // Since the batch computing for GRU reorders the input sequences // according to their length. The initialized cell state also needs @@ -159,7 +162,9 @@ class GRUGradKernel : public framework::OpKernel { zero(dev_ctx, &batch_reset_hidden_prev_grad, static_cast(0.0)); Tensor ordered_h0, ordered_h0_grad; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (h0) { ReorderInitState(dev_ctx, *h0, order, &ordered_h0, true); diff --git a/paddle/operators/label_smooth_op.cc b/paddle/operators/label_smooth_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..c89082f44b360cbd171eccb212674040b8688a46 --- /dev/null +++ b/paddle/operators/label_smooth_op.cc @@ -0,0 +1,128 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/label_smooth_op.h" + +namespace paddle { +namespace operators { + +class LabelSmoothOp : public framework::OperatorWithKernel { + public: + LabelSmoothOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of LabelSmoothOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of LabelSmoothOp should not be null."); + auto in_dims = ctx->GetInputDim("X"); + if (ctx->HasInput("PriorDist")) { + auto noise_dims = ctx->GetInputDim("PriorDist"); + auto noise_numel = paddle::framework::product(noise_dims); + PADDLE_ENFORCE( + in_dims[1] == noise_numel, + "The number of elements in Input(PriorDist) must be equal to the " + "dimension of each label."); + } + ctx->ShareLoD("X", /*->*/ "Out"); + ctx->SetOutputDim("Out", in_dims); + } +}; + +class LabelSmoothOpMaker : public framework::OpProtoAndCheckerMaker { + public: + LabelSmoothOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor) The input labels of LabelSmooth operator. This " + "input can be batched labels in one-hot encoding or output from " + "softmax, with shape [N x K], where N is the batch size and K is " + "the number of classes"); + AddInput("PriorDist", + "(Tensor, optional)" + "The prior distribution to be added to the smoothed label. It is " + "fixed during training and the number of elements should be equal " + "to the dimension K of each label. Default is uniform " + "distribution and each element will be set to 1/K if not provided " + "in input.") + .AsDispensable(); + AddOutput("Out", + "(loDTensor) The smoothed label of LabelSmooth operator. It has" + "the same shape and LoD with the Input(LoDTensor)."); + AddAttr("epsilon", + "(float, default 0.0f)" + "The smoothing parameter of LabelSmooth operator.") + .SetDefault(0.0f); + AddComment(R"DOC( +LabelSmooth Operator. + +Label smoothing is a mechanism to regularize the classifier layer. In machine +learning, optimizing the log-likelihood of the correct label directly may +cause two problems. First, it may result in overfitting: if the model learns +to assign full probability to the ground-truth label for each training example, +it is not guaranteed to generalize. Second, it encourages the differences +between the largest logit and all others to become large, reducing the ability +of the model to adapt. Label smoothing is proposed to encourage the model to +be less confident, which replaces the ground-truth label $y$ with the weighted +sum of itself and some fixed distribution $\mu$, i.e. + +$$ + \tilde{y} = (1 - \epsilon) * y + \epsilon * \mu, +$$ + +where $(1 - \epsilon)$ and $\epsilon$ are the weights respectively, and +$\tilde{y}$ is the smoothed label. Usually uniform distribution is used for +$\mu$. This change in the ground-truth label is called label-smoothing +regularization or LSR. + +See more details about label smoothing in https://arxiv.org/abs/1512.00567. + +)DOC"); + } +}; + +class LabelSmoothGradOp : public framework::OperatorWithKernel { + public: + LabelSmoothGradOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) shouldn't be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OP(label_smooth, ops::LabelSmoothOp, ops::LabelSmoothOpMaker, + label_smooth_grad, ops::LabelSmoothGradOp); +REGISTER_OP_CPU_KERNEL( + label_smooth, + ops::LabelSmoothKernel, + ops::LabelSmoothKernel); +REGISTER_OP_CPU_KERNEL( + label_smooth_grad, + ops::LabelSmoothGradKernel, + ops::LabelSmoothGradKernel); diff --git a/paddle/operators/label_smooth_op.cu b/paddle/operators/label_smooth_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..5a0cec12bc58a56e4b0c3bd6fbc6c4754ef81fa4 --- /dev/null +++ b/paddle/operators/label_smooth_op.cu @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/label_smooth_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_CUDA_KERNEL( + label_smooth, + ops::LabelSmoothKernel, + ops::LabelSmoothKernel); +REGISTER_OP_CUDA_KERNEL( + label_smooth_grad, + ops::LabelSmoothGradKernel, + ops::LabelSmoothGradKernel); diff --git a/paddle/operators/label_smooth_op.h b/paddle/operators/label_smooth_op.h new file mode 100644 index 0000000000000000000000000000000000000000..87bc9f793e3b4e249142710243c45d51f3a913b2 --- /dev/null +++ b/paddle/operators/label_smooth_op.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class LabelSmoothKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* out_t = ctx.Output("Out"); + auto* in_t = ctx.Input("X"); + auto* dist_t = ctx.Input("PriorDist"); + auto label_dim = in_t->dims()[1]; + out_t->mutable_data(ctx.GetPlace()); + + auto epsilon = ctx.Attr("epsilon"); + auto out = framework::EigenVector::Flatten(*out_t); + auto in = framework::EigenVector::Flatten(*in_t); + auto& dev = *ctx.template device_context().eigen_device(); + if (dist_t) { + auto dist = framework::EigenVector::Flatten(*dist_t); + out.device(dev) = + static_cast(1 - epsilon) * in + + epsilon * dist.broadcast(Eigen::DSizes(in_t->numel())); + } else { + out.device(dev) = static_cast(1 - epsilon) * in + + static_cast(epsilon / label_dim); + } + } +}; + +template +class LabelSmoothGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* d_out_t = ctx.Input(framework::GradVarName("Out")); + auto* d_in_t = ctx.Output(framework::GradVarName("X")); + d_in_t->mutable_data(ctx.GetPlace()); + + auto d_out = framework::EigenVector::Flatten(*d_out_t); + auto d_in = framework::EigenVector::Flatten(*d_in_t); + + auto epsilon = ctx.Attr("epsilon"); + auto& dev = *ctx.template device_context().eigen_device(); + d_in.device(dev) = static_cast(1 - epsilon) * d_out; + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/layer_norm_op.cc b/paddle/operators/layer_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1c6d2ae4d05becaeed34d66cad398cc90f9d3ece --- /dev/null +++ b/paddle/operators/layer_norm_op.cc @@ -0,0 +1,370 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/layer_norm_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DataLayout = framework::DataLayout; + +template +using EigenMatrixMapRowMajor = Eigen::Map< + Eigen::Matrix>; +template +using ConstEigenMatrixMapRowMajor = Eigen::Map< + const Eigen::Matrix>; + +class LayerNormOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Y"), + "Output(Y) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Mean"), + "Output(Mean) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Variance"), + "Output(Variance) of LayerNormOp should not be null."); + + auto x_dim = ctx->GetInputDim("X"); + auto begin_norm_axis = ctx->Attrs().Get("begin_norm_axis"); + PADDLE_ENFORCE_LT(begin_norm_axis, x_dim.size(), + "'begin_norm_axis' must be less than the rank of X."); + + auto matrix_dim = framework::flatten_to_2d(x_dim, begin_norm_axis); + int left = static_cast(matrix_dim[0]); + int right = static_cast(matrix_dim[1]); + if (ctx->HasInput("Scale")) { + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], right); + } + if (ctx->HasInput("Bias")) { + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], right); + } + + ctx->SetOutputDim("Y", ctx->GetInputDim("X")); + ctx->SetOutputDim("Mean", {left}); + ctx->SetOutputDim("Variance", {left}); + ctx->ShareLoD("X", "Y"); + } +}; + +class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { + public: + LayerNormOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(LoDTensor) The input tensor."); + AddInput("Scale", + "(Tensor, optional) Scale is a 1-dimensional tensor of size " + "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." + "It is applied to the output.") + .AsDispensable(); + AddInput("Bias", + "(Tensor, optional) Bias is a 1-dimensional tensor of size " + "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." + "It is applied to the output.") + .AsDispensable(); + AddOutput("Y", "(LoDTensor) Result after normalization."); + AddOutput("Mean", "(Tensor) Mean of the current mini batch.") + .AsIntermediate(); + AddOutput("Variance", "(Tensor) Variance of the current mini batch.") + .AsIntermediate(); + + AddAttr("epsilon", + "(float, default 1e-5) Constant for " + "numerical stability") + .SetDefault(1e-5) + .AddCustomChecker([](const float &epsilon) { + PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f, + "'epsilon' should be between 0.0 and 0.001."); + }); + AddAttr("begin_norm_axis", + "(int default:1), the " + "axis of `begin_norm_axis ... Rank(X) - 1` will be " + "normalized. `begin_norm_axis` splits the tensor(`X`) to a " + "matrix [N,H].") + .SetDefault(1) + .AddCustomChecker([](const int &begin_norm_axis) { + PADDLE_ENFORCE_GT(begin_norm_axis, 0, + "'begin_norm_axis' should be greater than zero."); + }); + + AddComment(R"DOC( +Layer Normalization. + +Layer Norm has been implemented as discussed in the paper: +https://arxiv.org/abs/1607.06450 +... +)DOC"); + } +}; + +template +class LayerNormKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const float epsilon = ctx.Attr("epsilon"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *x = ctx.Input("X"); + const auto &x_dims = x->dims(); + const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); + + auto *output = ctx.Output("Y"); + auto *mean = ctx.Output("Mean"); + auto *var = ctx.Output("Variance"); + output->mutable_data(ctx.GetPlace()); + mean->mutable_data(ctx.GetPlace()); + var->mutable_data(ctx.GetPlace()); + + auto matrix_dim = framework::flatten_to_2d(x_dims, begin_norm_axis); + int left = static_cast(matrix_dim[0]); + int right = static_cast(matrix_dim[1]); + + auto input_map = ConstEigenMatrixMapRowMajor(x->data(), left, right); + + auto mean_map = EigenMatrixMapRowMajor(mean->data(), left, 1); + auto var_map = EigenMatrixMapRowMajor(var->data(), left, 1); + auto output_map = EigenMatrixMapRowMajor(output->data(), left, right); + + auto squre = [](T ele) { return ele * ele; }; + auto add_epslion = [epsilon](T ele) { return ele + epsilon; }; + + mean_map = input_map.rowwise().mean(); + var_map = (input_map - mean_map.replicate(1, right)) + .unaryExpr(squre) + .rowwise() + .mean() + .unaryExpr(add_epslion); + + auto inv_std_func = [](T ele) { return std::sqrt(1 / ele); }; + // TODO(zcd): Some thinking about output_map, is it appropriate that + // `output_map` and `input_map` point to the same memory. + auto inv_std = var_map.unaryExpr(inv_std_func); + if (scale && bias) { + auto scale_map = + ConstEigenMatrixMapRowMajor(scale->data(), 1, right); + auto bias_map = ConstEigenMatrixMapRowMajor(bias->data(), 1, right); + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)) + .cwiseProduct(scale_map.replicate(left, 1)) + + bias_map.replicate(left, 1); + } else if (scale) { + auto scale_map = + ConstEigenMatrixMapRowMajor(scale->data(), 1, right); + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)) + .cwiseProduct(scale_map.replicate(left, 1)); + } else if (bias) { + auto bias_map = ConstEigenMatrixMapRowMajor(bias->data(), 1, right); + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)) + + bias_map.replicate(left, 1); + } else { + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)); + } + } +}; + +class LayerNormGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + // check input + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Scale"), + "Input(Scale) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Mean"), + "Input(Mean) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Variance"), + "Input(Variance) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), + "Input(Y@GRAD) of LayerNormOp should not be null."); + + // check output + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } + if (ctx->HasOutput(framework::GradVarName("Scale"))) { + ctx->SetOutputDim(framework::GradVarName("Scale"), + ctx->GetInputDim("Scale")); + } + if (ctx->HasOutput(framework::GradVarName("Bias"))) { + ctx->SetOutputDim(framework::GradVarName("Bias"), + ctx->GetInputDim("Bias")); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + const auto *var = ctx.InputVar(framework::GradVarName("Y")); + if (var == nullptr) { + PADDLE_THROW("can't find Y@GRAD"); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW("can't find Y@GRAD"); + } + return framework::OpKernelType(framework::ToDataType(t->type()), + ctx.GetPlace()); + } +}; + +template +class LayerNormGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *x = ctx.Input("X"); + const auto *mean = ctx.Input("Mean"); + const auto *var = ctx.Input("Variance"); + const auto *scale = ctx.Input("Scale"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + + const auto &x_dims = x->dims(); + + const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); + auto matrix_dim = framework::flatten_to_2d(x_dims, begin_norm_axis); + int left = static_cast(matrix_dim[0]); + int right = static_cast(matrix_dim[1]); + + // init output + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + + auto x_map = ConstEigenMatrixMapRowMajor(x->data(), left, right); + auto d_y_map = ConstEigenMatrixMapRowMajor(d_y->data(), left, right); + auto mean_map = ConstEigenMatrixMapRowMajor(mean->data(), left, 1); + auto var_map = ConstEigenMatrixMapRowMajor(var->data(), left, 1); + + if (d_bias) { + d_bias->mutable_data(ctx.GetPlace()); + auto d_bias_map = EigenMatrixMapRowMajor(d_bias->data(), 1, right); + d_bias_map = d_y_map.colwise().sum(); + } + if (d_scale) { + d_scale->mutable_data(ctx.GetPlace()); + auto d_scale_map = + EigenMatrixMapRowMajor(d_scale->data(), 1, right); + auto inv_std_func = [](T ele) { return std::sqrt(1 / ele); }; + // There are two equation to compute d_scale. One uses "Y" and the other + // does not use "Y" + d_scale_map = + ((x_map - mean_map.replicate(1, right)) + .cwiseProduct( + var_map.unaryExpr(inv_std_func).replicate(1, right)) + .cwiseProduct(d_y_map)) + .colwise() + .sum(); + } + + if (d_x) { + d_x->mutable_data(ctx.GetPlace()); + auto d_x_map = EigenMatrixMapRowMajor(d_x->data(), left, right); + auto triple_product_func = [](T ele) { return ele * ele * ele; }; + auto inv_std_func = [](T ele) { return std::sqrt(1 / ele); }; + // TODO(zcd): these code can be refined + if (d_scale) { + auto scale_map = + ConstEigenMatrixMapRowMajor(scale->data(), 1, right); + // dy_dx + auto dx_end = var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map) + .cwiseProduct(scale_map.replicate(left, 1)); + // dy_dmean_dx + auto dx_mean = (T(-1.0) / right) * + var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map) + .cwiseProduct(scale_map.replicate(left, 1)) + .rowwise() + .sum() + .replicate(1, right); + // dy_var_dx + auto dvar_end_part = (x_map - mean_map.replicate(1, right)) + .cwiseProduct(scale_map.replicate(left, 1)) + .cwiseProduct(d_y_map) + .rowwise() + .sum(); + auto dvar_end = var_map.unaryExpr(inv_std_func) + .unaryExpr(triple_product_func) + .cwiseProduct(dvar_end_part) + .replicate(1, right); + auto dx_var = + (T(-1.0) / right) * + (x_map - mean_map.replicate(1, right)).cwiseProduct(dvar_end); + + d_x_map = dx_end + dx_mean + dx_var; + } else { + // dy_dx + auto dx_end = var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map); + // dy_dmean_dx + auto dx_mean = (T(-1.0) / right) * + var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map) + .rowwise() + .sum() + .replicate(1, right); + // dy_var_dx + auto dvar_end_part = (x_map - mean_map.replicate(1, right)) + .cwiseProduct(d_y_map) + .rowwise() + .sum(); + auto dvar_end = var_map.unaryExpr(inv_std_func) + .unaryExpr(triple_product_func) + .cwiseProduct(dvar_end_part) + .replicate(1, right); + auto dx_var = + (T(-1.0) / right) * + (x_map - mean_map.replicate(1, right)).cwiseProduct(dvar_end); + + d_x_map = dx_end + dx_mean + dx_var; + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(layer_norm, ops::LayerNormOp, ops::LayerNormOpMaker, + layer_norm_grad, ops::LayerNormGradOp); +REGISTER_OP_CPU_KERNEL( + layer_norm, + ops::LayerNormKernel); +REGISTER_OP_CPU_KERNEL( + layer_norm_grad, + ops::LayerNormGradKernel); diff --git a/paddle/operators/layer_norm_op.h b/paddle/operators/layer_norm_op.h new file mode 100644 index 0000000000000000000000000000000000000000..bca35b91e6f52d35dee14aac9d080b52914942e3 --- /dev/null +++ b/paddle/operators/layer_norm_op.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class LayerNormKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +template +class LayerNormGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/listen_and_serv_op.cc b/paddle/operators/listen_and_serv_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..099f6b23736adcc2a6e9c27dca297178687ae785 --- /dev/null +++ b/paddle/operators/listen_and_serv_op.cc @@ -0,0 +1,207 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include + +#include + +#include "paddle/framework/executor.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/proto_desc.h" +#include "paddle/operators/detail/grpc_server.h" +#include "paddle/operators/detail/sendrecvop_utils.h" +#include "paddle/operators/detail/simple_block_queue.h" +#include "paddle/string/printf.h" + +namespace paddle { +namespace operators { + +constexpr char kOptimizeBlock[] = "OptimizeBlock"; + +void RunServer(std::shared_ptr service) { + service->RunSyncUpdate(); + VLOG(4) << "RunServer thread end"; +} + +static void CreateTensorFromMessageType(framework::Variable *var, + sendrecv::VarType var_type) { + if (var_type == sendrecv::VarType::LOD_TENSOR) { + var->GetMutable(); + } else if (var_type == sendrecv::VarType::SELECTED_ROWS) { + var->GetMutable(); + } else { + PADDLE_THROW( + "VariableMessage type %d is not in " + "[LoDTensor, SelectedRows]", + var_type); + } +} + +class ListenAndServOp : public framework::OperatorBase { + public: + ListenAndServOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) { + if (!rpc_service_) { + std::string endpoint = Attr("endpoint"); + rpc_service_.reset(new detail::AsyncGRPCServer(endpoint)); + server_thread_.reset(new std::thread(RunServer, rpc_service_)); + } + } + + void Stop() override { + detail::MessageWithName term_msg; + term_msg.first = LISTEN_TERMINATE_MESSAGE; + rpc_service_->Push(term_msg); + rpc_service_->ShutDown(); + server_thread_->join(); + } + + std::string GetGradVarNameForTrainer(const std::string &varname) const { + if (grads_counter_.find(varname) == grads_counter_.end()) { + grads_counter_[varname] = 0; + } + return string::Sprintf("%s.trainer_%d", varname, grads_counter_[varname]++); + } + + void Run(const framework::Scope &scope, + const platform::Place &dev_place) const override { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); + framework::Scope &recv_scope = scope.NewScope(); + + // FIXME(Yancey1989): initialize rpc server with lazy mode. + rpc_service_->SetScope(&recv_scope); + rpc_service_->SetDevCtx(&dev_ctx); + auto param_list = Attr>("ParamList"); + auto grad_list = Attr>("GradList"); + auto fan_in = Attr("Fanin"); + + auto *block = Attr(kOptimizeBlock); + auto *program = block->Program(); + framework::Executor executor(dev_place); + + // TODO(typhoonzero): change this to a while_op for every cluster-batch. + bool exit_flag = false; + while (!exit_flag) { + // Get from multiple trainers, we don't care about the order in which + // the gradients arrives, just add suffix 0~n and merge the gradient. + rpc_service_->SetCond(0); + size_t recv_var_cnt = 0; + int batch_barrier = 0; + while (batch_barrier != fan_in) { + const detail::MessageWithName &v = rpc_service_->Get(); + auto grad_var_name = v.first; + if (grad_var_name == LISTEN_TERMINATE_MESSAGE) { + LOG(INFO) << "received terminate message and exit"; + exit_flag = true; + break; + } else if (grad_var_name == BATCH_BARRIER_MESSAGE) { + VLOG(3) << "recv batch barrier message"; + batch_barrier++; + continue; + } else { + // receive a variable + recv_var_cnt++; + auto it = + std::find(grad_list.begin(), grad_list.end(), grad_var_name); + std::string param_var_name; + if (it != grad_list.end()) { + param_var_name = param_list[it - grad_list.begin()]; + } else { + LOG(ERROR) << "grad has no paired param:" << grad_var_name; + } + VLOG(3) << "received grad: " << grad_var_name + << " updating param: " << param_var_name; + + if (fan_in > 1) { + grad_var_name = this->GetGradVarNameForTrainer(grad_var_name); + } + auto *var = recv_scope.FindVar(grad_var_name); + if (var == nullptr) { + LOG(ERROR) << "Can not find server side var: " << grad_var_name; + PADDLE_THROW("Can not find server side var"); + } + detail::DeserializeFromMessage(v.second, dev_ctx, var); + } + } + VLOG(3) << "recv " << recv_var_cnt << " parmeters for one barrier."; + // TODO(Yancey1989): merge SelectedRows variables here + if (exit_flag) { + rpc_service_->ShutDown(); + } + + try { + executor.Run(*program, &recv_scope, block->ID(), /*global_block*/ + false /*create_local_scope*/, false /*create_vars*/); + } catch (std::exception &e) { + LOG(ERROR) << "run sub program error " << e.what(); + } + rpc_service_->SetCond(1); + rpc_service_->WaitClientGet(recv_var_cnt); + grads_counter_.clear(); + } // while(true) + } + + protected: + std::shared_ptr rpc_service_; + std::shared_ptr server_thread_; + mutable std::unordered_map grads_counter_; +}; + +class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ListenAndServOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddComment(R"DOC( +ListenAndServ operator + +This operator will start a RPC server which can receive variables +from send_op and send back variables to recv_op. +)DOC"); + AddAttr("endpoint", + "(string, default 127.0.0.1:6164)" + "IP address to listen on.") + .SetDefault("127.0.0.1:6164") + .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); + AddAttr(kOptimizeBlock, + "BlockID to run on server side."); + AddAttr>( + "ParamList", "type list of string", + "grad->param name mapping to find which parameters to optimize.") + .SetDefault({}); + AddAttr>( + "GradList", "type list of string", + "grad->param name mapping to find which parameters to optimize.") + .SetDefault({}); + AddAttr("Fanin", "type int", + "Number of trainers in the current cluster job") + .SetDefault(1); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(listen_and_serv, ops::ListenAndServOp, + ops::ListenAndServOpMaker); diff --git a/paddle/operators/load_combine_op.cc b/paddle/operators/load_combine_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f4be793d7bf1f346c011842c57fb5b5179a697d6 --- /dev/null +++ b/paddle/operators/load_combine_op.cc @@ -0,0 +1,108 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include + +#include "paddle/framework/op_registry.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { + +class LoadCombineOp : public framework::OperatorBase { + public: + LoadCombineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::Place &place) const override { + auto filename = Attr("file_path"); + + std::ifstream fin(filename); + PADDLE_ENFORCE(static_cast(fin), + "Cannot open file %s for load_combine op", filename); + + auto out_var_names = Outputs("Out"); + PADDLE_ENFORCE_GT( + static_cast(out_var_names.size()), 0, + "The number of output variables should be greater than 0."); + + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + for (size_t i = 0; i < out_var_names.size(); i++) { + auto *out_var = scope.FindVar(out_var_names[i]); + + PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", + out_var_names[i]); + + auto *tensor = out_var->GetMutable(); + + // Error checking + PADDLE_ENFORCE(static_cast(fin), "Cannot read more from file %s", + filename); + + // Get data from fin to tensor + DeserializeFromStream(fin, tensor, dev_ctx); + + if (platform::is_gpu_place(place)) { + // copy CPU to GPU + framework::LoDTensor cpu_tensor; + cpu_tensor.ShareDataWith(*tensor); + cpu_tensor.set_lod(tensor->lod()); + + // reset tensor + out_var->Clear(); + tensor = out_var->GetMutable(); + tensor->set_lod(cpu_tensor.lod()); + Copy(cpu_tensor, place, dev_ctx, tensor); + } + } + } +}; + +class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + LoadCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput( + "Out", + "(vector) The output LoDTensors that will be read from the input file.") + .AsDuplicable(); + AddAttr("file_path", + "(string) " + "LoDTensors will be loaded from \"file_path\".") + .AddCustomChecker( + [](const std::string &path) { return !path.empty(); }); + AddComment(R"DOC( +LoadCombine Operator. + +LoadCombine operator loads LoDTensor variables from a file. The file should +contain one or more LoDTensors serialized using the SaveCombine operator. The +LoadCombine operator applies a deserialization strategy to appropriately load +the LodTensors, and this strategy complements the serialization strategy used +in the SaveCombine operator. Hence, the LoadCombine operator is tightly coupled +with the SaveCombine operator, and can only deserialize one or more LoDTensors +that were saved using the SaveCombine operator. + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR(load_combine, ops::LoadCombineOp, + ops::LoadCombineOpProtoMaker); diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index d97390fa1c53fa0bdf16ab34cb209b994621f83c..07372808bbf078bd2e9b0bb5782b95a046253f46 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -125,8 +125,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { new_rows.resize(ids_dim[0]); auto gpu_place = boost::get(context.GetPlace()); - memory::Copy(platform::CPUPlace(), new_rows.data(), gpu_place, ids_data, - ids_dim[0] * sizeof(int64_t), stream); + memory::Copy(platform::CPUPlace(), new_rows.cuda_data(), gpu_place, + ids_data, ids_dim[0] * sizeof(int64_t), stream); d_table->set_rows(new_rows); diff --git a/paddle/operators/lstm_op.h b/paddle/operators/lstm_op.h index c57ee414dc5b3417549c8ac3a7fd57a9c8f452df..72e95b75e29c88c5944607ceaa40435bac7a745c 100644 --- a/paddle/operators/lstm_op.h +++ b/paddle/operators/lstm_op.h @@ -27,11 +27,12 @@ using Tensor = framework::Tensor; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, const size_t* index, + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index, *dst, indexed_src); + row_shuffle(ctx, src, index_lod, *dst, indexed_src); } template @@ -84,7 +85,9 @@ class LSTMKernel : public framework::OpKernel { } lstm_value.prev_state_value = nullptr; Tensor ordered_c0; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (cell_t0) { // Since the batch computing for LSTM reorders the input sequence // according to their length. The initialized cell state also needs @@ -202,7 +205,8 @@ class LSTMGradKernel : public framework::OpKernel { // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. Tensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - const size_t* order = batch_gate->lod()[2].data(); + framework::Vector order(batch_gate->lod()[2]); + if (c0) { ReorderInitState(device_ctx, *c0, order, &ordered_c0, true); diff --git a/paddle/operators/lstmp_op.h b/paddle/operators/lstmp_op.h index ee82d5c10a5421b181e525f49a263d4808ede62f..e064a155dfadd8104fa80727a962cb2e24ade29f 100644 --- a/paddle/operators/lstmp_op.h +++ b/paddle/operators/lstmp_op.h @@ -34,7 +34,8 @@ using EigenMatrix = framework::EigenMatrix; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, const size_t* index, + const framework::Tensor& src, + framework::Vector index, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); @@ -109,7 +110,9 @@ class LSTMPKernel : public framework::OpKernel { } lstmp_value.prev_state_value = nullptr; Tensor ordered_c0; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (cell_t0) { // Since the batch computing for LSTMP reorders the input sequence // according to their length. The initialized cell state also needs @@ -275,7 +278,9 @@ class LSTMPGradKernel : public framework::OpKernel { // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. Tensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (c0) { ReorderInitState(device_ctx, *c0, order, &ordered_c0, true); diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index 0ee456f9bc61436bd0f2f8ef20dd1654e7e56d56..acdd87cb3550bc5f3891aed6fefd4301a3395f9f 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -31,7 +31,7 @@ struct SelectedRowsAdd { PADDLE_ENFORCE_EQ(in1_height, input2.height()); output->set_height(in1_height); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); auto& in2_rows = input2.rows(); std::vector out_rows; out_rows.reserve(in1_rows.size() + in2_rows.size()); @@ -108,7 +108,7 @@ struct SelectedRowsAddTensor { PADDLE_ENFORCE_EQ(in1_height, out_dims[0]); auto& in1_value = input1.value(); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); PADDLE_ENFORCE_EQ(in1_row_numel, input2.numel() / in1_height); @@ -126,7 +126,7 @@ struct SelectedRowsAddTensor { dim3 grid(1, in1_rows.size()); SelectedRowsAddTensorKernel< T, block_size><<>>( - in1_data, in1_rows.data(), out_data, in1_row_numel); + in1_data, in1_rows.cuda_data(), out_data, in1_row_numel); auto out_eigen = framework::EigenVector::Flatten(*output); auto in2_eigen = framework::EigenVector::Flatten(input2); @@ -146,7 +146,7 @@ struct SelectedRowsAddTo { auto in1_height = input1.height(); PADDLE_ENFORCE_EQ(in1_height, input2->height()); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); auto& in2_rows = *(input2->mutable_rows()); auto& in1_value = input1.value(); @@ -204,7 +204,7 @@ struct SelectedRowsAddToTensor { PADDLE_ENFORCE_EQ(in1_height, in2_dims[0]); auto& in1_value = input1.value(); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height); @@ -216,7 +216,7 @@ struct SelectedRowsAddToTensor { dim3 grid(1, in1_rows.size()); SelectedRowsAddToTensorKernel< T, block_size><<>>( - in1_data, in1_rows.data(), in2_data, in1_row_numel); + in1_data, in1_rows.cuda_data(), in2_data, in1_row_numel); } }; @@ -257,7 +257,7 @@ struct MergeAdd { framework::SelectedRows operator()(const platform::CUDADeviceContext& context, const framework::SelectedRows& input) { framework::SelectedRows out; - auto input_rows = input.rows(); + framework::Vector input_rows(input.rows()); std::set row_set(input_rows.begin(), input_rows.end()); std::vector merge_rows(row_set.begin(), row_set.end()); @@ -283,9 +283,9 @@ struct MergeAdd { MergeAddKernel< T, 256><<(context) - .stream()>>>(input_data, input.rows().data(), out_data, - out.rows().data(), out.rows().size(), - input_width); + .stream()>>>(input_data, input_rows.cuda_data(), out_data, + out.mutable_rows()->cuda_data(), + out.rows().size(), input_width); return out; } }; @@ -370,8 +370,8 @@ struct UpdateToTensor { dim3 threads(platform::PADDLE_CUDA_NUM_THREADS, 1); dim3 grid(1, in1_rows.size()); UpdateToTensorKernel<<< - grid, threads, 0, context.stream()>>>(in1_data, in1_rows.data(), op, - in2_data, in1_row_numel); + grid, threads, 0, context.stream()>>>(in1_data, in1_rows.cuda_data(), + op, in2_data, in1_row_numel); } }; } // namespace scatter diff --git a/paddle/operators/math/sequence2batch.cc b/paddle/operators/math/sequence2batch.cc index e459a42ca251a9fc79f745f48a118ce898a0f77e..17abce1c2f809f75edb2c5dc46709094c2ce10c3 100644 --- a/paddle/operators/math/sequence2batch.cc +++ b/paddle/operators/math/sequence2batch.cc @@ -23,8 +23,10 @@ template class CopyMatrixRowsFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& src, const size_t* index, - framework::Tensor& dst, bool is_src_index) { + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor& dst, + bool is_src_index) { + size_t* index = index_lod.data(); auto src_dims = src.dims(); auto dst_dims = dst.dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2UL, diff --git a/paddle/operators/math/sequence2batch.cu b/paddle/operators/math/sequence2batch.cu index 452ae8951000872b706f7e4227a62dbf98109e7e..f27631271a42b4d64abef00d7f119b85e32edda4 100644 --- a/paddle/operators/math/sequence2batch.cu +++ b/paddle/operators/math/sequence2batch.cu @@ -42,8 +42,10 @@ template class CopyMatrixRowsFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& src, const size_t* index, - framework::Tensor& dst, bool is_src_index) { + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor& dst, + bool is_src_index) { + size_t* index = index_lod.cuda_data(); auto src_dims = src.dims(); auto dst_dims = dst.dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2, diff --git a/paddle/operators/math/sequence2batch.h b/paddle/operators/math/sequence2batch.h index a5c43a2c7d4d729c35a20a27de2a23141e6019bc..6db0427b4174a09dd254d771e8d3d215cc6571a9 100644 --- a/paddle/operators/math/sequence2batch.h +++ b/paddle/operators/math/sequence2batch.h @@ -35,7 +35,7 @@ class CopyMatrixRowsFunctor { // copy the input src to the indexed rows of output dst. // The indexed rows are based on the input index. void operator()(const DeviceContext& context, const framework::Tensor& src, - const size_t* index, framework::Tensor& dst, + framework::Vector index_lod, framework::Tensor& dst, bool is_src_index); }; @@ -66,7 +66,7 @@ class LoDTensor2BatchFunctor { PADDLE_ENFORCE_EQ(lods[1].size(), static_cast(lod_tensor.dims()[0])); CopyMatrixRowsFunctor to_batch; - to_batch(context, lod_tensor, lods[1].data(), batch, true); + to_batch(context, lod_tensor, lods[1], batch, true); return; } @@ -144,7 +144,7 @@ class LoDTensor2BatchFunctor { batch.set_lod(batch_lods); CopyMatrixRowsFunctor to_batch; - to_batch(context, lod_tensor, seq2batch_idx, batch, true); + to_batch(context, lod_tensor, batch_lods[1], batch, true); } }; @@ -159,8 +159,7 @@ class Batch2LoDTensorFunctor { PADDLE_ENFORCE_EQ(in_lod[1].size(), static_cast(lod_tensor.dims()[0])); CopyMatrixRowsFunctor to_seq; - size_t* index = in_lod[1].data(); - to_seq(context, batch, index, lod_tensor, false); + to_seq(context, batch, in_lod[1], lod_tensor, false); } }; diff --git a/paddle/operators/math/sequence_padding.cu b/paddle/operators/math/sequence_padding.cu index a38df26f59569c4fd54a1ba5691b2cd5f3245344..65c9cfe4a0ec14d220ad237baa71703a783ed0fa 100644 --- a/paddle/operators/math/sequence_padding.cu +++ b/paddle/operators/math/sequence_padding.cu @@ -120,12 +120,14 @@ class PaddingLoDTensorFunctor { T* padding_data = padding.data(); if (norm_by_times) { SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + padding_data, const_cast(seq_data), + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } else { SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + padding_data, const_cast(seq_data), + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } } }; @@ -193,12 +195,14 @@ class UnpaddingLoDTensorFunctor { T* seq_data = seq.data(); if (norm_by_times) { SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + const_cast(padding_data), seq_data, + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } else { SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + const_cast(padding_data), seq_data, + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } } }; diff --git a/paddle/operators/math/sequence_pooling.cu b/paddle/operators/math/sequence_pooling.cu index 4c9e6b375ce7251747b9cd443d86cca0858c84ef..f66534a6812a66c737445ea96914a393077d7d65 100644 --- a/paddle/operators/math/sequence_pooling.cu +++ b/paddle/operators/math/sequence_pooling.cu @@ -73,7 +73,7 @@ class MaxSeqPoolFunctor { dim3 grid(num_seq, 1); auto stream = context.stream(); KeMaxSequencePool<<>>( - in_data, starts.data(), out_data, max_index, num_seq, dim); + in_data, starts.cuda_data(), out_data, max_index, num_seq, dim); } }; diff --git a/paddle/operators/math/sequence_scale.cu b/paddle/operators/math/sequence_scale.cu index ceaabd8e0fd81c927fbd4333c0aa7954b8da8513..fd4e28f6113729cd1fa9dc179bd9b601d29b8a7f 100644 --- a/paddle/operators/math/sequence_scale.cu +++ b/paddle/operators/math/sequence_scale.cu @@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor { SequenceScaleKernel<<< num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>( - seq_data, abs_offset_lod[level].data(), scales, seq_width); + seq_data, abs_offset_lod[level].cuda_data(), scales, seq_width); } }; diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 49e1eb3402482e7ff12d9b2b640f7271a80cf6d9..ba71094219f37eb7a3c2df68be986cec7afbf7ab 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -12,187 +12,60 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include -#include -#include - -#include "paddle/framework/executor.h" +#include "paddle/framework/data_type.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/proto_desc.h" -#include "paddle/operators/detail/grpc_server.h" -#include "paddle/operators/detail/sendrecvop_utils.h" -#include "paddle/operators/detail/simple_block_queue.h" -#include "paddle/string/printf.h" + +#include +#include "paddle/operators/detail/grpc_client.h" namespace paddle { namespace operators { -constexpr char kOptimizeBlock[] = "OptimizeBlock"; - -void RunServer(std::shared_ptr service) { - service->RunSyncUpdate(); - VLOG(4) << "RunServer thread end"; -} - -static void CreateTensorFromMessageType(framework::Variable *var, - sendrecv::VarType var_type) { - if (var_type == sendrecv::VarType::LOD_TENSOR) { - var->GetMutable(); - } else if (var_type == sendrecv::VarType::SELECTED_ROWS) { - var->GetMutable(); - } else { - PADDLE_THROW( - "VariableMessage type %d is not in " - "[LoDTensor, SelectedRows]", - var_type); - } -} - class RecvOp : public framework::OperatorBase { public: - RecvOp(const std::string &type, const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) { - if (!rpc_service_) { - std::string endpoint = Attr("endpoint"); - rpc_service_.reset(new detail::AsyncGRPCServer(endpoint)); - server_thread_.reset(new std::thread(RunServer, rpc_service_)); - } - } - - void Stop() override { - detail::MessageWithName term_msg; - term_msg.first = LISTEN_TERMINATE_MESSAGE; - rpc_service_->Push(term_msg); - rpc_service_->ShutDown(); - server_thread_->join(); - } - - std::string GetGradVarNameForTrainer(const std::string &varname) const { - if (grads_counter_.find(varname) == grads_counter_.end()) { - grads_counter_[varname] = 0; + RecvOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void Run(const framework::Scope& scope, + const platform::Place& place) const override { + auto outs = Outputs("Out"); + std::vector epmap = Attr>("epmap"); + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& ctx = *pool.Get(place); + + for (size_t i = 0; i < outs.size(); i++) { + VLOG(3) << "getting " << outs[i]; + client_.AsyncGetVariable(epmap[i], ctx, scope, outs[i]); } - return string::Sprintf("%s.trainer_%d", varname, grads_counter_[varname]++); + PADDLE_ENFORCE(client_.Wait()); } - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(dev_place); - framework::Scope &recv_scope = scope.NewScope(); - - // FIXME(Yancey1989): initialize rpc server with laze mode. - rpc_service_->SetScope(&recv_scope); - rpc_service_->SetDevCtx(&dev_ctx); - auto param_list = Attr>("ParamList"); - auto grad_list = Attr>("GradList"); - auto fan_in = Attr("Fanin"); - - auto *block = Attr(kOptimizeBlock); - auto *program = block->Program(); - framework::Executor executor(dev_place); - - // TODO(typhoonzero): change this to a while_op for every cluster-batch. - bool exit_flag = false; - while (!exit_flag) { - // Get from multiple trainers, we don't care about the order in which - // the gradients arrives, just add suffix 0~n and merge the gradient. - rpc_service_->SetCond(0); - size_t recv_var_cnt = 0; - int batch_barrier = 0; - while (batch_barrier != fan_in) { - const detail::MessageWithName &v = rpc_service_->Get(); - auto grad_var_name = v.first; - if (grad_var_name == LISTEN_TERMINATE_MESSAGE) { - LOG(INFO) << "received terminate message and exit"; - exit_flag = true; - break; - } else if (grad_var_name == BATCH_BARRIER_MESSAGE) { - VLOG(3) << "recv batch barrier message"; - batch_barrier++; - continue; - } else { - // receive a variable - recv_var_cnt++; - auto it = - std::find(grad_list.begin(), grad_list.end(), grad_var_name); - std::string param_var_name; - if (it != grad_list.end()) { - param_var_name = param_list[it - grad_list.begin()]; - } else { - LOG(ERROR) << "grad has no paired param:" << grad_var_name; - } - VLOG(3) << "received grad: " << grad_var_name - << " updating param: " << param_var_name; - - if (fan_in > 1) { - grad_var_name = this->GetGradVarNameForTrainer(grad_var_name); - } - auto *var = recv_scope.FindVar(grad_var_name); - if (var == nullptr) { - LOG(ERROR) << "Can not find server side var: " << grad_var_name; - PADDLE_THROW("Can not find server side var"); - } - detail::DeserializeFromMessage(v.second, dev_ctx, var); - } - } - VLOG(3) << "recv " << recv_var_cnt << " parmeters for one barrier."; - // TODO(Yancey1989): merge SelectedRows variables here - if (exit_flag) { - break; - } - - try { - executor.Run(*program, &recv_scope, block->ID(), /*global_block*/ - false /*create_local_scope*/, false /*create_vars*/); - } catch (std::exception &e) { - LOG(ERROR) << "run sub program error " << e.what(); - } - rpc_service_->SetCond(1); - rpc_service_->WaitClientGet(recv_var_cnt); - grads_counter_.clear(); - } // while(true) - } - - protected: - std::shared_ptr rpc_service_; - std::shared_ptr server_thread_; - mutable std::unordered_map grads_counter_; + private: + mutable detail::RPCClient client_; }; class RecvOpMaker : public framework::OpProtoAndCheckerMaker { public: - RecvOpMaker(OpProto *proto, OpAttrChecker *op_checker) + RecvOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput("Out", "(Tensor) Variables to get from server.").AsDuplicable(); AddComment(R"DOC( Recv operator -This operator will recieve tensor from send_op +This operator can get variables from server side. )DOC"); - AddAttr("endpoint", - "(string, default 127.0.0.1:6164)" - "IP address to listen on.") - .SetDefault("127.0.0.1:6164") - .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); - AddAttr( - kOptimizeBlock, "Serialized ProgramDesc string for recv to run."); - AddAttr>( - "ParamList", "type list of string", - "grad->param name mapping to find which parameters to optimize.") - .SetDefault({}); - AddAttr>( - "GradList", "type list of string", - "grad->param name mapping to find which parameters to optimize.") + AddAttr>("epmap", + "(string vector, default 127.0.0.1:6164)" + "Server endpoints in the order of input " + "variables for mapping") .SetDefault({}); - AddAttr("Fanin", "type int", - "Number of trainers in the current cluster job") - .SetDefault(1); } }; diff --git a/paddle/operators/row_conv_op.cu b/paddle/operators/row_conv_op.cu index 41f2c5b9de91ade15b4010f56377675cfd1b611c..b3825212e1ac41b13a2f4cad2c128da39c5f6e71 100644 --- a/paddle/operators/row_conv_op.cu +++ b/paddle/operators/row_conv_op.cu @@ -307,7 +307,7 @@ class RowConvKernel int input_dim = X->dims()[1]; int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - size_t *idx = batch_indices.data(); + size_t *idx = batch_indices.cuda_data(); auto stream = context.cuda_device_context().stream(); if (future_context <= 32) { @@ -345,7 +345,7 @@ class RowConvGradKernel int input_dim = X->dims()[1]; int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - size_t *idx = batch_indices.data(); + size_t *idx = batch_indices.cuda_data(); auto &device_ctx = context.cuda_device_context(); math::SetConstant zero; diff --git a/paddle/operators/save_combine_op.cc b/paddle/operators/save_combine_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..bffa2908bc42d73332f22fa3706d24ab49cd4b38 --- /dev/null +++ b/paddle/operators/save_combine_op.cc @@ -0,0 +1,141 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { + +// TODO(sidgoyal78): These function are needed by other files (save_op), move +// them to paddle::filesystem namespace. (as noted by yuyang18 in save_op). +constexpr char kSEP = '/'; +static bool FileExists(const std::string &filepath) { + struct stat buffer; + return (stat(filepath.c_str(), &buffer) == 0); +} + +static std::string DirName(const std::string &filepath) { + auto pos = filepath.rfind(kSEP); + if (pos == std::string::npos) { + return ""; + } + return filepath.substr(0, pos); +} + +static void MkDir(const char *path) { + if (mkdir(path, 0755)) { + PADDLE_ENFORCE_EQ(errno, EEXIST, "%s mkdir failed!", path); + } +} + +static void MkDirRecursively(const char *fullpath) { + if (*fullpath == '\0') return; // empty string + if (FileExists(fullpath)) return; + + MkDirRecursively(DirName(fullpath).c_str()); + MkDir(fullpath); +} + +class SaveCombineOp : public framework::OperatorBase { + public: + SaveCombineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::Place &place) const override { + auto filename = Attr("file_path"); + auto overwrite = Attr("overwrite"); + + bool is_present = FileExists(filename); + if (is_present && !overwrite) { + PADDLE_THROW("%s exists!, cannot save_combine to it when overwrite=false", + filename, overwrite); + } + + MkDirRecursively(DirName(filename).c_str()); + std::ofstream fout(filename); + PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + filename); + + auto inp_var_names = Inputs("X"); + PADDLE_ENFORCE_GT(static_cast(inp_var_names.size()), 0, + "The number of input variables should be greater than 0"); + + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + for (size_t i = 0; i < inp_var_names.size(); i++) { + auto *var = scope.FindVar(inp_var_names[i]); + + PADDLE_ENFORCE(var != nullptr, + "Cannot find variable %s for save_combine_op", + inp_var_names[i]); + PADDLE_ENFORCE(var->IsType(), + "SaveCombineOp only supports LoDTensor, %s has wrong type", + inp_var_names[i]); + + auto &tensor = var->Get(); + // Serialize tensor + framework::SerializeToStream(fout, tensor, dev_ctx); + } + fout.close(); + } +}; + +class SaveCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + SaveCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "X", + "(vector) Input LoDTensors that need to be saved together in a file.") + .AsDuplicable(); + AddComment(R"DOC( +SaveCombine operator + +This operator will serialize and write a list of input LoDTensor variables +to a file on disk. +)DOC"); + AddAttr("overwrite", + "(boolean, default true)" + "Overwrite the output file if it exists.") + .SetDefault(true); + AddAttr( + "file_path", + "(string)" + "The \"file_path\" where the LoDTensor variables will be saved.") + .AddCustomChecker( + [](const std::string &path) { return !path.empty(); }); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(save_combine, ops::SaveCombineOp, + ops::SaveCombineOpProtoMaker); diff --git a/paddle/operators/save_load_combine_op_test.cc b/paddle/operators/save_load_combine_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f3ddc4a6c55d72e4e444869a1ebcd7662c892317 --- /dev/null +++ b/paddle/operators/save_load_combine_op_test.cc @@ -0,0 +1,180 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "gtest/gtest.h" +#include "paddle/framework/op_registry.h" + +USE_NO_KERNEL_OP(save_combine); +USE_NO_KERNEL_OP(load_combine); + +int* CreateForSaveCombineOp(int x, int y, const std::vector& lod_info, + std::string var_name, + paddle::platform::CPUPlace& place, + paddle::framework::Scope& scope, + paddle::framework::LoD& expect_lod) { + auto var = scope.Var(var_name); + auto tensor = var->GetMutable(); + tensor->Resize({x, y}); + expect_lod.resize(1); + for (size_t i = 0; i < lod_info.size(); i++) { + expect_lod[0].push_back(lod_info[i]); + } + tensor->set_lod(expect_lod); + int* expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } + return expect; +} + +paddle::framework::LoDTensor* GeneratePlaceholderBeforeLoad( + const std::string out_var_name, paddle::framework::Scope& scope) { + auto load_var = scope.Var(out_var_name); + auto target = load_var->GetMutable(); + return target; +} + +int* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target, + paddle::framework::Scope& scope, + paddle::framework::LoD& actual_lod) { + int* actual = target->data(); + actual_lod = target->lod(); + return actual; +} + +void CheckValues(int* expect, int* actual, paddle::framework::LoD expect_lod, + paddle::framework::LoD actual_lod, const int& numel) { + for (int64_t i = 0; i < numel; ++i) { + EXPECT_EQ(expect[i], actual[i]); + } + EXPECT_EQ(expect_lod.size(), actual_lod.size()); + for (size_t i = 0; i < expect_lod.size(); ++i) { + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); + } + } +} + +// Here, we create 4 LoDTensors and use save_combine_op to first save these +// in a single file. Then, we use load_combine_op to load these sequentially +TEST(SaveLoadCombineOp, CPU) { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + + std::vector lod1 = {0, 1, 2, 3, 10}; + int numel1 = 100; + paddle::framework::LoD expect_lod1; + int* expect1 = CreateForSaveCombineOp(10, 10, lod1, "test_var1", place, scope, + expect_lod1); + + std::vector lod2 = {0, 2, 5, 10}; + int numel2 = 200; + paddle::framework::LoD expect_lod2; + int* expect2 = CreateForSaveCombineOp(10, 20, lod2, "test_var2", place, scope, + expect_lod2); + + std::vector lod3 = {0, 2, 3, 20}; + int numel3 = 4000; + paddle::framework::LoD expect_lod3; + int* expect3 = CreateForSaveCombineOp(20, 200, lod3, "test_var3", place, + scope, expect_lod3); + + std::vector lod4 = {0, 1, 20}; + int numel4 = 1000; + paddle::framework::LoD expect_lod4; + int* expect4 = CreateForSaveCombineOp(20, 50, lod4, "test_var4", place, scope, + expect_lod4); + + // Set attributes + std::string filename = "check_tensor.ls"; + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string(filename)}); + + // Run the save_combine_op + auto save_combine_op = paddle::framework::OpRegistry::CreateOp( + "save_combine", + {{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs); + save_combine_op->Run(scope, place); + + // Set up output vars + auto target1 = GeneratePlaceholderBeforeLoad("out_var1", scope); + auto target2 = GeneratePlaceholderBeforeLoad("out_var2", scope); + auto target3 = GeneratePlaceholderBeforeLoad("out_var3", scope); + auto target4 = GeneratePlaceholderBeforeLoad("out_var4", scope); + + // Run the load_combine_op + auto load_combine_op = paddle::framework::OpRegistry::CreateOp( + "load_combine", {}, + {{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs); + load_combine_op->Run(scope, place); + + paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4; + int* actual1 = GetValuesAfterLoadCombineOp(target1, scope, actual_lod1); + int* actual2 = GetValuesAfterLoadCombineOp(target2, scope, actual_lod2); + int* actual3 = GetValuesAfterLoadCombineOp(target3, scope, actual_lod3); + int* actual4 = GetValuesAfterLoadCombineOp(target4, scope, actual_lod4); + + CheckValues(expect1, actual1, expect_lod1, actual_lod1, numel1); + CheckValues(expect2, actual2, expect_lod2, actual_lod2, numel2); + CheckValues(expect3, actual3, expect_lod3, actual_lod3, numel3); + CheckValues(expect4, actual4, expect_lod4, actual_lod4, numel4); +} + +// Test with original SaveLoadTest +TEST(SaveLoadTestWithCombineOp, CPU) { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + + auto var = scope.Var("test_var"); + auto tensor = var->GetMutable(); + tensor->Resize({3, 10}); + paddle::framework::LoD expect_lod; + expect_lod.resize(1); + expect_lod[0].push_back(0); + expect_lod[0].push_back(1); + expect_lod[0].push_back(2); + expect_lod[0].push_back(3); + + tensor->set_lod(expect_lod); + int* expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string("check_t.save")}); + + auto save_op = paddle::framework::OpRegistry::CreateOp( + "save_combine", {{"X", {"test_var"}}}, {}, attrs); + save_op->Run(scope, place); + + auto load_var = scope.Var("out_var"); + auto target = load_var->GetMutable(); + auto load_op = paddle::framework::OpRegistry::CreateOp( + "load_combine", {}, {{"Out", {"out_var"}}}, attrs); + load_op->Run(scope, place); + int* actual = target->data(); + for (int64_t i = 0; i < tensor->numel(); ++i) { + EXPECT_EQ(expect[i], actual[i]); + } + auto& actual_lod = target->lod(); + EXPECT_EQ(expect_lod.size(), actual_lod.size()); + for (size_t i = 0; i < expect_lod.size(); ++i) { + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); + } + } +} diff --git a/paddle/operators/save_load_op_test.cc b/paddle/operators/save_load_op_test.cc index 40103d864fb58804b39ca5f3c63e802a430ce886..d829d5da174b73613da9dcfcd308a5b05e12bce9 100644 --- a/paddle/operators/save_load_op_test.cc +++ b/paddle/operators/save_load_op_test.cc @@ -24,7 +24,7 @@ TEST(SaveLoadOp, CPU) { auto var = scope.Var("test_var"); auto tensor = var->GetMutable(); - tensor->Resize({10, 10}); + tensor->Resize({3, 10}); paddle::framework::LoD expect_lod; expect_lod.resize(1); expect_lod[0].push_back(0); diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index bb719dc2a8a577bc042a2a70f7169b7d70f83684..ee0f268b0e4dfa23bf878d71404d47553183a977 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -42,28 +42,34 @@ class SendOp : public framework::OperatorBase { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); + + auto client_var_name = Output("RPCClient"); + PADDLE_ENFORCE_NOT_NULL(scope.FindVar(client_var_name), + "Can not find variable '%s' in the scope.", + client_var_name); + auto* client_var = scope.FindVar(client_var_name); + detail::RPCClient* rpc_client = client_var->GetMutable(); + for (size_t i = 0; i < ins.size(); i++) { VLOG(3) << "sending " << ins[i] << " to " << epmap[i]; - client_.AsyncSendVariable(epmap[i], ctx, scope, ins[i]); + rpc_client->AsyncSendVariable(epmap[i], ctx, scope, ins[i]); } - PADDLE_ENFORCE(client_.Wait()); + PADDLE_ENFORCE(rpc_client->Wait()); for (auto& ep : endpoints) { VLOG(3) << "batch barrier, ep: " << ep; - client_.AsyncSendBatchBarrier(ep); + rpc_client->AsyncSendBatchBarrier(ep); } - PADDLE_ENFORCE(client_.Wait()); - - for (size_t i = 0; i < outs.size(); i++) { - VLOG(3) << "getting " << outs[i] << " from " << epmap[i]; - client_.AsyncGetVariable(epmap[i], ctx, scope, outs[i]); + PADDLE_ENFORCE(rpc_client->Wait()); + + if (outs.size() > 0) { + for (size_t i = 0; i < outs.size(); i++) { + VLOG(3) << "getting " << outs[i] << " from " << epmap[i]; + rpc_client->AsyncGetVariable(epmap[i], ctx, scope, outs[i]); + } + PADDLE_ENFORCE(rpc_client->Wait()); } - - PADDLE_ENFORCE(client_.Wait()); } - - private: - mutable detail::RPCClient client_; }; class SendOpMaker : public framework::OpProtoAndCheckerMaker { @@ -73,11 +79,16 @@ class SendOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "(Tensor) Input tensor to be sent").AsDuplicable(); AddOutput("Out", "(Tensor) Output tensor to be received from server") .AsDuplicable(); + AddOutput("RPCClient", + "(RPCClient) The RPC client object which is" + "initialized at most once."); AddComment(R"DOC( Send operator This operator will send tensor to recv_op at the parameter server. )DOC"); + // TODO(typhoonzero): remove this attr generate de-duplicated vector from + // epmap when initializing. AddAttr>("endpoints", "(string vector, default 127.0.0.1:6164)" "Server endpoints to send variables to.") diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc index 045a0f5434f339bab345d14881ed05450ce6588d..31527a906d56da54d2571910de627757d708a996 100644 --- a/paddle/operators/send_recv_op_test.cc +++ b/paddle/operators/send_recv_op_test.cc @@ -25,7 +25,7 @@ limitations under the License. */ #include "paddle/string/printf.h" USE_NO_KERNEL_OP(send); -USE_NO_KERNEL_OP(recv); +USE_NO_KERNEL_OP(listen_and_serv); USE_OP(sum); namespace f = paddle::framework; @@ -33,7 +33,7 @@ namespace p = paddle::platform; namespace m = paddle::operators::math; // global for simplicity. -std::unique_ptr recv_op; +std::unique_ptr listen_and_serv_op; void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) { p::CPUDeviceContext ctx(place); @@ -120,7 +120,7 @@ void StartServerNet(bool is_sparse) { InitTensorsInScope(scope, place); } - // sub program run in recv_op, for simple test we use sum + // sub program run in listen_and_serv_op, for simple test we use sum f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); // X for server side tensors, RX for received tensers, must be of same shape. @@ -131,8 +131,9 @@ void StartServerNet(bool is_sparse) { attrs.insert({"ParamList", std::vector({"Out"})}); attrs.insert({"GradList", std::vector({"x1"})}); attrs.insert({"OptimizeBlock", block}); - recv_op = f::OpRegistry::CreateOp("recv", {{"RX", {"x1"}}}, {}, attrs); - recv_op->Run(scope, place); + listen_and_serv_op = + f::OpRegistry::CreateOp("listen_and_serv", {}, {}, attrs); + listen_and_serv_op->Run(scope, place); } TEST(SendRecvOp, CPUDense) { @@ -161,9 +162,9 @@ TEST(SendRecvOp, CPUDense) { for (int64_t i = 0; i < target->numel(); ++i) { EXPECT_EQ(expected[i] * 2, actual[i]); } - recv_op->Stop(); + listen_and_serv_op->Stop(); server_thread.join(); - recv_op.reset(nullptr); + listen_and_serv_op.reset(nullptr); } TEST(SendRecvOp, CPUSparse) { @@ -200,7 +201,7 @@ TEST(SendRecvOp, CPUSparse) { EXPECT_EQ(expect_value->mutable_data(place)[i], actual->mutable_data(place)[i]); } - recv_op->Stop(); + listen_and_serv_op->Stop(); server_thread.join(); - recv_op.reset(); + listen_and_serv_op.reset(); } diff --git a/paddle/operators/sequence_erase_op.cu b/paddle/operators/sequence_erase_op.cu index f1e3b96acd0259de2b3ca1348834bd17e1e174a2..a5311f15f0c607c880a6f12c0bef10b2dd8c8a79 100644 --- a/paddle/operators/sequence_erase_op.cu +++ b/paddle/operators/sequence_erase_op.cu @@ -96,9 +96,8 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { GetOutLod<<<(lod_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>( num_erased_ptr, dev_in_lod_ptr, lod_len, dev_out_lod_ptr); - // Set LoD for output - thrust::host_vector out_lod0 = dev_out_lod; + std::vector out_lod0(dev_out_lod.begin(), dev_out_lod.end()); framework::LoD out_lod; out_lod.push_back(out_lod0); out->set_lod(out_lod); diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 42f8f8b2f072f9d204dfadcd732926b5c98dc617..29f5aa3542c26c76a1b80da61ec6752019216131 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -89,7 +89,7 @@ class SGDOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in_height, out_dims[0]); auto& in_value = grad->value(); - auto& in_rows = grad->rows(); + framework::Vector in_rows(grad->rows()); int64_t in_row_numel = in_value.numel() / in_rows.size(); PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height); @@ -102,7 +102,7 @@ class SGDOpCUDAKernel : public framework::OpKernel { dim3 grid(1, in_rows.size()); SparseSGDFunctorKernel< T, 256><<>>( - in_data, in_rows.data(), learning_rate->data(), out_data, + in_data, in_rows.cuda_data(), learning_rate->data(), out_data, in_row_numel); } else { diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index 48201b344de0d3bd2b121a12389876dad095f10d..3d8102c3ae20c8b714cd48b4fc78dc18a0cf89a7 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -68,7 +68,32 @@ class SumKernel : public framework::OpKernel { } } } else if (out_var->IsType()) { - PADDLE_ENFORCE(!in_place, "SelectedRows not support inplace sum now"); + std::unique_ptr in0; + if (in_place) { + // If is in_place, we store the input[0] to in0 + auto &in_sel0 = in_vars[0]->Get(); + auto &rows = in_sel0.rows(); +#ifdef PADDLE_WITH_CUDA + std::vector rows_in_cpu; + rows_in_cpu.reserve(rows.size()); + for (auto item : rows) { + rows_in_cpu.push_back(item); + } + in0.reset(new framework::SelectedRows(rows_in_cpu, in_sel0.height())); +#else + in0.reset(new framework::SelectedRows(rows, in_sel0.height())); +#endif + in0->mutable_value()->ShareDataWith(in_sel0.value()); + } + + auto get_selected_row = [&](size_t i) -> const SelectedRows & { + if (i == 0 && in0) { + return *in0.get(); + } else { + return in_vars[i]->Get(); + } + }; + auto *out = context.Output("Out"); out->mutable_rows()->clear(); auto *out_value = out->mutable_value(); @@ -76,24 +101,26 @@ class SumKernel : public framework::OpKernel { // Runtime InferShape size_t first_dim = 0; for (int i = 0; i < N; i++) { - first_dim += in_vars[i]->Get().rows().size(); + auto &sel_row = get_selected_row(i); + first_dim += sel_row.rows().size(); } - auto in_dim = in_vars[0]->Get().value().dims(); - auto in_dim_vec = framework::vectorize(in_dim); - in_dim_vec[0] = static_cast(first_dim); + auto in_dim = + framework::vectorize(get_selected_row(N - 1).value().dims()); + in_dim[0] = static_cast(first_dim); - out_value->Resize(framework::make_ddim(in_dim_vec)); + out_value->Resize(framework::make_ddim(in_dim)); out_value->mutable_data(context.GetPlace()); math::SelectedRowsAddTo functor; int64_t offset = 0; for (int i = 0; i < N; i++) { - PADDLE_ENFORCE_EQ(out->height(), - in_vars[i]->Get().height()); - functor(context.template device_context(), - in_vars[i]->Get(), offset, out); - offset += in_vars[i]->Get().value().numel(); + auto &sel_row = get_selected_row(i); + + PADDLE_ENFORCE_EQ(out->height(), sel_row.height()); + functor(context.template device_context(), sel_row, + offset, out); + offset += sel_row.value().numel(); } } else if (out_var->IsType()) { auto &out_array = *out_var->GetMutable(); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 490397afdd4de0cc1aafde746d31b1d800eded3b..a880d9bdbc63aacc1f2cdbc0d7da001a59c7b372 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -124,44 +124,25 @@ PYBIND11_PLUGIN(core) { .def( "__init__", [](LoDTensor &instance, const std::vector> &lod) { -#ifndef PADDLE_WITH_CUDA - new (&instance) LoDTensor(lod); -#else - LoD new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - new (&instance) LoDTensor(new_lod); -#endif + LoD new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + new (&instance) LoDTensor(new_lod); }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) .def("set_lod", [](LoDTensor &self, const std::vector> &lod) { -#ifndef PADDLE_WITH_CUDA - self.set_lod(lod); -#else LoD new_lod; new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); self.set_lod(new_lod); -#endif }) .def("lod", [](LoDTensor &self) -> std::vector> { -#ifndef PADDLE_WITH_CUDA - return self.lod(); -#else - auto lod = self.lod(); - std::vector> new_lod; - new_lod.reserve(lod.size()); - std::transform(lod.begin(), lod.end(), std::back_inserter(new_lod), - [](Vector item) -> - std::vector { - std::vector v; - v.reserve(item.size()); - std::copy(item.begin(), item.end(), std::back_inserter(v)); - return v; - }); - return new_lod; -#endif + auto lod = self.lod(); + std::vector> new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + return new_lod; }); py::class_(m, "SelectedRows") diff --git a/paddle/scripts/docker/README.md b/paddle/scripts/docker/README.md index f0620498cfa6775ce2949cc02fa9f6c9529dec2e..65c46745556bc5ea91fdd4e33060f2535422e8e8 100644 --- a/paddle/scripts/docker/README.md +++ b/paddle/scripts/docker/README.md @@ -56,7 +56,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" | ------ | -------- | ----------- | | `WITH_GPU` | OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. | | `WITH_AVX` | OFF | Set to "ON" to enable AVX support. | -| `WITH_TESTING` | ON | Build unit tests binaries. | +| `WITH_TESTING` | OFF | Build unit tests binaries. | | `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. | | `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. | | `WITH_SWIG_PY` | ON | Build with SWIG python API support. | diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index fbae37b2ca063e32cb12ded0da901d93438bc9a2..df7310d6b70ac95953177024a7c2981d1c81a901 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -32,7 +32,7 @@ function cmake_gen() { cat < new_argv; std::string gflags_env; - new_argv.push_back(argv[0]); + for (int i = 0; i < argc; ++i) { + new_argv.push_back(argv[i]); + } #ifdef PADDLE_WITH_CUDA new_argv.push_back( - strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); + strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory," + "warpctc_dir")); #else - new_argv.push_back(strdup("--tryfromenv=use_pinned_memory")); + new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,warpctc_dir")); #endif int new_argc = static_cast(new_argv.size()); char** new_argv_address = new_argv.data(); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 4fdf4090212e31adcccf6b119c937e70d5cbf995..186b91c226accbe1c2d5465d6244b9438eec9979 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -140,8 +140,13 @@ def init_config_environment( g_submodel_stack=[], g_add_submodel_suffix=False, ): - for k, v in locals().iteritems(): - globals()[k] = copy.deepcopy(v) + # directly iterate through locals().iteritems() will change + # the size of locals() due to introducing k, v into scope + # which will break the process in some env + + local_vars = copy.deepcopy(locals()) + for k, v in local_vars.iteritems(): + globals()[k] = v # Because type is widely used as a variable name in this code. diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 787416aed1acf81138df06110317614dfe77fb48..3ee58393c72c0b6f9bec96be51ad3946752a35dd 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -26,6 +26,7 @@ import initializer import layers import nets import optimizer +import learning_rate_decay import backward import regularizer from param_attr import ParamAttr @@ -35,27 +36,16 @@ from distribute_transpiler import DistributeTranspiler from distribute_transpiler_simple import SimpleDistributeTranspiler import clip from memory_optimization_transpiler import memory_optimize +import profiler Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + [ - 'io', - 'initializer', - 'layers', - 'nets', - 'optimizer', - 'backward', - 'regularizer', - 'LoDTensor', - 'CPUPlace', - 'CUDAPlace', - 'Tensor', + 'io', 'initializer', 'layers', 'nets', 'optimizer', 'learning_rate_decay', + 'backward', 'regularizer', 'LoDTensor', 'CPUPlace', 'CUDAPlace', 'Tensor', 'ParamAttr' - 'DataFeeder', - 'clip', - 'SimpleDistributeTranspiler', - 'DistributeTranspiler', - 'memory_optimize', + 'DataFeeder', 'clip', 'SimpleDistributeTranspiler', 'DistributeTranspiler', + 'memory_optimize', 'profiler' ] @@ -87,10 +77,10 @@ def __bootstrap__(): os.environ['OMP_NUM_THREADS'] = str(num_threads) read_env_flags = [ - 'use_pinned_memory', 'check_nan_inf', 'do_memory_benchmark' + 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir' ] if core.is_compiled_with_cuda(): - read_env_flags += ['fraction_of_gpu_memory_to_use', 'op_sync'] + read_env_flags += ['fraction_of_gpu_memory_to_use'] core.init_gflags([sys.argv[0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) core.init_glog(sys.argv[0]) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index 92081a47d48ce2f739562649f76c03621abadc3b..fdbc8524abb7d6687983b026ca8e65e61c3dfd1a 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -30,6 +30,9 @@ __all__ = [ class BaseErrorClipAttr(object): + def __str__(self): + raise NotImplementedError() + def append_clip_op(self, block, grad_name): raise NotImplementedError() @@ -44,6 +47,9 @@ class ErrorClipByValue(BaseErrorClipAttr): self.max = max self.min = min + def __str__(self): + return "ByValue, min=%f, max=%f" % (self.min, self.max) + def append_clip_op(self, block, grad_name): clip_op_desc = block.desc.append_op() clip_op_desc.set_type("clip") @@ -71,6 +77,9 @@ def error_clip_callback(block, context): class BaseGradientClipAttr(object): + def __str__(self): + raise NotImplementedError() + def process_context(self, context, param, grad): raise NotImplementedError() @@ -79,6 +88,9 @@ class BaseGradientClipAttr(object): class NullGradientClipAttr(BaseGradientClipAttr): + def __str__(self): + return "Null" + def process_context(self, context, param, grad): pass @@ -96,6 +108,9 @@ class GradientClipByValue(BaseGradientClipAttr): self.max = max self.min = min + def __str__(self): + return "ByValue, min=%f, max=%f" % (self.min, self.max) + def process_context(self, context, param, grad): pass @@ -108,6 +123,9 @@ class GradientClipByNorm(BaseGradientClipAttr): def __init__(self, clip_norm): self.clip_norm = clip_norm + def __str__(self): + return "ByNorm, clip_norm=%f" % self.clip_norm + def process_context(self, context, param, grad): pass @@ -124,6 +142,10 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): self.clip_norm = clip_norm self.group_name = group_name + def __str__(self): + return "ByGlobalNorm, group_name=%s, clip_norm=%f" % (self.group_name, + self.clip_norm) + def process_context(self, context, param, grad): if self.group_name not in context: context[self.group_name] = [] @@ -210,3 +232,5 @@ def append_gradient_clip_ops(param_grad): ClipByValue = GradientClipByValue +ClipByNorm = GradientClipByNorm +ClipByGlobalNorm = GradientClipByGlobalNorm diff --git a/python/paddle/v2/fluid/distribute_transpiler.py b/python/paddle/v2/fluid/distribute_transpiler.py index 77f80442e06cb18402bb1b8b97aa9119c7473f54..121b407cae41fa477843b7252ebacc9053d5f7aa 100644 --- a/python/paddle/v2/fluid/distribute_transpiler.py +++ b/python/paddle/v2/fluid/distribute_transpiler.py @@ -153,11 +153,18 @@ class DistributeTranspiler: self.param_grad_ep_mapping[ep]["params"].append(param) self.param_grad_ep_mapping[ep]["grads"].append(grad) + rpc_client_var = program.global_block().create_var( + name="RPC_CLIENT_VAR", + psersistable=True, + dtype='float32', # dtype and shape is not used in fact + shape=[0]) + # create send_op send_op = program.global_block().append_op( type="send", inputs={"X": send_inputs}, - outputs={"Out": send_outputs}, + outputs={"Out": send_outputs, + "RPCClient": rpc_client_var}, attrs={"endpoints": pserver_endpoints, "epmap": eplist}) # step4 @@ -471,9 +478,9 @@ class DistributeTranspiler: else: self._append_pserver_non_opt_ops(optimize_sub_program, pserver_program, opt_op) - # Append the recv op + # Append the listen_and_serv op pserver_program.global_block().append_op( - type="recv", + type="listen_and_serv", inputs={}, outputs={}, attrs={ diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 4d8343e7de9526d527ebe93f334b59108d5ace8e..7f5187d29984482d91b5709bf4514d013028767a 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -14,6 +14,7 @@ import collections import contextlib +import re import numpy as np @@ -239,20 +240,30 @@ class Variable(object): def __str__(self): return self.to_string(True) - def to_string(self, throw_on_error): + def to_string(self, throw_on_error, with_details=False): """ Get debug string. Args: throw_on_error(bool): True if raise an exception when self is not intialized. + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True Returns(str): The debug string. """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) protostr = self.desc.serialize_to_string() proto = framework_pb2.VarDesc.FromString(str(protostr)) - return _debug_string_(proto, throw_on_error) + res_str = _debug_string_(proto, throw_on_error) + if with_details: + additional_attr = ("error_clip", "stop_gradient") + for attr_name in additional_attr: + res_str += "%s: %s\n" % (attr_name, + str(getattr(self, attr_name))) + return res_str __repr__ = __str__ @@ -478,7 +489,8 @@ class Operator(object): no_kernel_op_set = { 'feed', 'fetch', 'save', 'load', 'recurrent', 'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', - 'recv', 'parallel_do' + 'recv', 'listen_and_serv', 'parallel_do', 'save_combine', + 'load_combine' } if type not in no_kernel_op_set: self.desc.infer_var_type(self.block.desc) @@ -629,10 +641,36 @@ class Block(object): def __str__(self): return self.to_string(True) - def to_string(self, throw_on_error): - protostr = self.desc.serialize_to_string() - proto = framework_pb2.BlockDesc.FromString(str(protostr)) - return _debug_string_(proto, throw_on_error) + def to_string(self, throw_on_error, with_details=False): + """ + To debug string. + Args: + throw_on_error(bool): raise exception when self is not initialized + when throw_on_error is True + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + + Returns(str): The debug string. + + """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) + if with_details: + re_add_indent = re.compile(r"\n(.)") + res_str = "blocks {\n idx: %d\n parent_idx: %d" % ( + self.idx, self.parent_idx) + for var in self.vars.itervalues(): + res_str += "\n vars {\n %s }" % re_add_indent.sub( + r"\n \1", var.to_string(throw_on_error, with_details)) + for op in self.ops: + res_str += "\n ops {\n %s }" % re_add_indent.sub( + r"\n \1", op.to_string(throw_on_error)) + res_str += "\n}" + else: + protostr = self.desc.serialize_to_string() + proto = framework_pb2.BlockDesc.FromString(str(protostr)) + res_str = _debug_string_(proto, throw_on_error) + return res_str __repr__ = __str__ @@ -796,10 +834,29 @@ class Program(object): def __str__(self): return self.to_string(True) - def to_string(self, throw_on_error): - protostr = self.desc.serialize_to_string() - proto = framework_pb2.ProgramDesc.FromString(str(protostr)) - return _debug_string_(proto, throw_on_error) + def to_string(self, throw_on_error, with_details=False): + """ + To debug string. + Args: + throw_on_error(bool): raise exception when self is not initialized + when throw_on_error is True + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + + Returns(str): The debug string. + + """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) + if with_details: + res_str = "" + for block in self.blocks: + res_str += block.to_string(throw_on_error, with_details) + else: + protostr = self.desc.serialize_to_string() + proto = framework_pb2.ProgramDesc.FromString(str(protostr)) + res_str = _debug_string_(proto, throw_on_error) + return res_str def get_desc(self): return self.desc @@ -950,6 +1007,36 @@ class Parameter(Variable): self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) + def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error, with_details=False): + """ + To debug string. + Args: + throw_on_error(bool): raise exception when self is not initialized + when throw_on_error is True + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + + Returns(str): The debug string. + + """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) + if with_details: + res_str = Variable.to_string(self, throw_on_error, True) + additional_attr = ("trainable", "optimize_attr", "regularizer", + "gradient_clip_attr") + for attr_name in additional_attr: + res_str += "%s: %s\n" % (attr_name, + str(getattr(self, attr_name))) + else: + res_str = Variable.to_string(self, throw_on_error, False) + return res_str + + __repr__ = __str__ + # program is a global instance. _main_program_ = Program() diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index d56ec45c538b580f5520bc060b4b339bb1be0539..613dc20b6ea5533d126a73b7ec47796b3f812db5 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -46,6 +46,9 @@ def is_parameter(var): def is_persistable(var): + if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ + var.desc.type() == core.VarDesc.VarType.FETCH_LIST: + return False return var.persistable @@ -60,7 +63,12 @@ def _clone_var_in_block_(block, var): persistable=True) -def save_vars(executor, dirname, main_program=None, vars=None, predicate=None): +def save_vars(executor, + dirname, + main_program=None, + vars=None, + predicate=None, + save_file_name=None): """ Save variables to directory by executor. @@ -69,9 +77,12 @@ def save_vars(executor, dirname, main_program=None, vars=None, predicate=None): :param main_program: program. If vars is None, then filter all variables in this program which fit `predicate`. Default default_main_program. :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the variables will be saved. - :param vars: variables need to be saved. If specify vars, program & predicate + as a bool. If it returns true, the corresponding input variable will be saved. + :param vars: variables need to be saved. If vars is specified, program & predicate will be ignored + :param save_file_name: The name of a single file that all vars are saved to. + If it is None, save variables to separate files. + :return: None """ if vars is None: @@ -83,21 +94,39 @@ def save_vars(executor, dirname, main_program=None, vars=None, predicate=None): save_vars( executor, dirname=dirname, - vars=filter(predicate, main_program.list_vars())) + vars=filter(predicate, main_program.list_vars()), + save_file_name=save_file_name) else: save_program = Program() save_block = save_program.global_block() + + save_var_map = {} for each_var in vars: new_var = _clone_var_in_block_(save_block, each_var) + if save_file_name is None: + save_block.append_op( + type='save', + inputs={'X': [new_var]}, + outputs={}, + attrs={'file_path': os.path.join(dirname, new_var.name)}) + else: + save_var_map[new_var.name] = new_var + + if save_file_name is not None: + save_var_list = [] + for name in sorted(save_var_map.keys()): + save_var_list.append(save_var_map[name]) + save_block.append_op( - type='save', - inputs={'X': [new_var]}, + type='save_combine', + inputs={'X': save_var_list}, outputs={}, - attrs={'file_path': os.path.join(dirname, new_var.name)}) + attrs={'file_path': os.path.join(dirname, save_file_name)}) + executor.run(save_program) -def save_params(executor, dirname, main_program=None): +def save_params(executor, dirname, main_program=None, save_file_name=None): """ Save all parameters to directory with executor. """ @@ -106,10 +135,12 @@ def save_params(executor, dirname, main_program=None): dirname=dirname, main_program=main_program, vars=None, - predicate=is_parameter) + predicate=is_parameter, + save_file_name=save_file_name) -def save_persistables(executor, dirname, main_program=None): +def save_persistables(executor, dirname, main_program=None, + save_file_name=None): """ Save all persistables to directory with executor. """ @@ -118,21 +149,30 @@ def save_persistables(executor, dirname, main_program=None): dirname=dirname, main_program=main_program, vars=None, - predicate=is_persistable) + predicate=is_persistable, + save_file_name=save_file_name) -def load_vars(executor, dirname, main_program=None, vars=None, predicate=None): +def load_vars(executor, + dirname, + main_program=None, + vars=None, + predicate=None, + load_file_name=None): """ Load variables from directory by executor. - :param executor: executor that save variable + :param executor: executor that load variable :param dirname: directory path :param main_program: program. If vars is None, then filter all variables in this program which fit `predicate`. Default default_main_program(). :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the variables will be loaded. - :param vars: variables need to be loaded. If specify vars, program & + as a bool. If it returns true, the corresponding input variable will be loaded. + :param vars: variables need to be loaded. If vars is specified, program & predicate will be ignored + :param load_file_name: The name of the single file that all vars are loaded from. + If it is None, load variables from separate files. + :return: None """ if vars is None: @@ -144,23 +184,40 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None): load_vars( executor, dirname=dirname, - vars=filter(predicate, main_program.list_vars())) + vars=filter(predicate, main_program.list_vars()), + load_file_name=load_file_name) else: load_prog = Program() load_block = load_prog.global_block() + + load_var_map = {} for each_var in vars: assert isinstance(each_var, Variable) new_var = _clone_var_in_block_(load_block, each_var) + if load_file_name is None: + load_block.append_op( + type='load', + inputs={}, + outputs={'Out': [new_var]}, + attrs={'file_path': os.path.join(dirname, new_var.name)}) + else: + load_var_map[new_var.name] = new_var + + if load_file_name is not None: + load_var_list = [] + for name in sorted(load_var_map.keys()): + load_var_list.append(load_var_map[name]) + load_block.append_op( - type='load', + type='load_combine', inputs={}, - outputs={"Out": [new_var]}, - attrs={'file_path': os.path.join(dirname, new_var.name)}) + outputs={"Out": load_var_list}, + attrs={'file_path': os.path.join(dirname, load_file_name)}) executor.run(load_prog) -def load_params(executor, dirname, main_program=None): +def load_params(executor, dirname, main_program=None, load_file_name=None): """ load all parameters from directory by executor. """ @@ -168,10 +225,12 @@ def load_params(executor, dirname, main_program=None): executor, dirname=dirname, main_program=main_program, - predicate=is_parameter) + predicate=is_parameter, + load_file_name=load_file_name) -def load_persistables(executor, dirname, main_program=None): +def load_persistables(executor, dirname, main_program=None, + load_file_name=None): """ load all persistables from directory by executor. """ @@ -179,7 +238,8 @@ def load_persistables(executor, dirname, main_program=None): executor, dirname=dirname, main_program=main_program, - predicate=is_persistable) + predicate=is_persistable, + load_file_name=load_file_name) def get_inference_program(target_vars, main_program=None): @@ -238,7 +298,8 @@ def save_inference_model(dirname, feeded_var_names, target_vars, executor, - main_program=None): + main_program=None, + save_file_name=None): """ Build a model especially for inference, and save it to directory by the executor. @@ -249,6 +310,8 @@ def save_inference_model(dirname, :param executor: executor that save inference model :param main_program: original program, which will be pruned to build the inference model. Default default_main_program(). + :param save_file_name: The name of a single file that all parameters are saved to. + If it is None, save parameters to separate files. :return: None """ @@ -283,25 +346,7 @@ def save_inference_model(dirname, with open(model_file_name, "wb") as f: f.write(inference_program.desc.serialize_to_string()) - save_params(executor, dirname, main_program) - - -def load_persistables_if_exist(executor, dirname, main_program=None): - filenames = next(os.walk(dirname))[2] - filenames = set(filenames) - - def _is_presistable_and_exist_(var): - if not is_persistable(var): - return False - else: - return var.name in filenames - - load_vars( - executor, - dirname, - main_program=main_program, - vars=None, - predicate=_is_presistable_and_exist_) + save_persistables(executor, dirname, inference_program, save_file_name) def get_feed_targets_names(program): @@ -322,13 +367,15 @@ def get_fetch_targets_names(program): return fetch_targets_names -def load_inference_model(dirname, executor): +def load_inference_model(dirname, executor, load_file_name=None): """ Load inference model from a directory :param dirname: directory path :param executor: executor that load inference model - + :param load_file_name: The name of the single file that all parameters are loaded from. + If it is None, load parameters from separate files. + :return: [program, feed_target_names, fetch_targets] program: program especially for inference. feed_target_names: Names of variables that need to feed data @@ -342,7 +389,7 @@ def load_inference_model(dirname, executor): program_desc_str = f.read() program = Program.parse_from_string(program_desc_str) - load_persistables_if_exist(executor, dirname, program) + load_persistables(executor, dirname, program, load_file_name) feed_target_names = get_feed_targets_names(program) fetch_target_names = get_fetch_targets_names(program) @@ -359,6 +406,7 @@ def get_parameter_value(para, executor): :param executor: executor for retrieving the value :param para: the given parameter + :return: the LoDTensor for the parameter """ assert is_parameter(para) @@ -377,6 +425,7 @@ def get_parameter_value_by_name(name, executor, program=None): :param name: the name of the parameter :param program: the program where the variable is found Default default_main_program(). + :return: the LoDTensor for the variable """ if program is None: diff --git a/python/paddle/v2/fluid/layers/io.py b/python/paddle/v2/fluid/layers/io.py index b7b2cf2296cc8868dd0b5eb6cd6d58b9ae795d5d..85e44a0e5149bd36f2787d9f2d516dbe4abdbb2e 100644 --- a/python/paddle/v2/fluid/layers/io.py +++ b/python/paddle/v2/fluid/layers/io.py @@ -108,7 +108,7 @@ class ListenAndServ(object): """ def __init__(self, endpoint, fan_in=1, optimizer_mode=True): - self.helper = LayerHelper("recv") + self.helper = LayerHelper("listen_and_serv") self.inputs = [] self.outputs = [] self.endpoint = endpoint @@ -158,7 +158,7 @@ class ListenAndServ(object): param_names = [p.name for p in params] grad_names = [g.name for g in grads] parent_block.append_op( - type='recv', + type='listen_and_serv', inputs={}, outputs={}, attrs={ @@ -196,3 +196,31 @@ def Send(endpoints, send_vars, get_vars): outputs={"Out": get_vars}, attrs={"endpoints": endpoints, "epmap": epmap}) + + +def Recv(endpoints, get_vars): + """ + Recv layer + + Args: + endpoints: comma seperated IP:PORT pairs in the order + of send_vars to send + send_vars: vars to send + get_vars: vars to get from server after send completes. + + Send variables to the server side, and get vars from server + side when server have finished running server side program. + """ + assert (type(send_vars) == list) + assert (type(get_vars) == list) + + epmap = endpoints.split(",") + endpoints = list(set(epmap)) + + helper = LayerHelper("Recv", **locals()) + helper.append_op( + type="recv", + inputs={"X": get_vars}, + outputs={"Out": get_vars}, + attrs={"endpoints": endpoints, + "epmap": epmap}) diff --git a/python/paddle/v2/fluid/layers/math_op_patch.py b/python/paddle/v2/fluid/layers/math_op_patch.py index f359e70126f7601b75261e795b5a37bdc241112e..79a130a3eb148e6c5a8fa3cdf174780b354c23c9 100644 --- a/python/paddle/v2/fluid/layers/math_op_patch.py +++ b/python/paddle/v2/fluid/layers/math_op_patch.py @@ -145,7 +145,9 @@ def monkey_patch_variable(): # a*b == b*a. Do not need to reverse explicitly ("__rmul__", "elementwise_mul", False), ("__div__", "elementwise_div", False), - ("__rdiv__", "elementwise_div", True)): + ("__rdiv__", "elementwise_div", True), + ("__pow__", "elementwise_pow", False), + ("__rpow__", "elementwise_pow", True)): setattr(Variable, method_name, _elemwise_method_creator_(method_name, op_type, reverse)) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index d11dccfd22124d58d8634c01a00527c373b92f00..c38e21087de1bf7076ce5aaf23d4d4faaebb50a7 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -847,7 +847,35 @@ def cos_sim(X, Y, **kwargs): return out -def dropout(x, dropout_prob, is_test=False, seed=0, **kwargs): +def dropout(x, dropout_prob, is_test=False, seed=None, **kwargs): + """ + Computes dropout. + + Drop or keep each element of `x` independently. Dropout is a regularization + technique for reducing overfitting by preventing neuron co-adaption during + training. The dropout operator randomly set (according to the given dropout + probability) the outputs of some units to zero, while others are remain + unchanged. + + Args: + x(variable): The input tensor. + dropout_prob(float): Probability of setting units to zero. + is_test(bool): A flag indicating whether it is in test phrase or not. + seed(int): A Python integer used to create random seeds. If this + parameter is set to None, a random seed is used. + NOTE: If an integer seed is given, always the same output + units will be dropped. DO NOT use a fixed seed in training. + + Returns: + Variable: A tensor variable. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + droped = fluid.layers.dropout(input=x, dropout_rate=0.5) + """ + helper = LayerHelper('dropout', **kwargs) out = helper.create_tmp_variable(dtype=x.dtype) mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True) @@ -856,9 +884,12 @@ def dropout(x, dropout_prob, is_test=False, seed=0, **kwargs): inputs={'X': [x]}, outputs={'Out': [out], 'Mask': [mask]}, - attrs={'dropout_prob': dropout_prob, - 'is_test': is_test, - 'seed': seed}) + attrs={ + 'dropout_prob': dropout_prob, + 'is_test': is_test, + 'fix_seed': seed is not None, + 'seed': seed if seed is not None else 0 + }) return out diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index 022a94cad440f13383a927233195bb008a688843..ee3172c7b8dfd65c693e5aee9b55179e654ce7be 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -56,6 +56,7 @@ __all__ = [ 'elementwise_mul', 'elementwise_max', 'elementwise_min', + 'elementwise_pow', 'clip', 'clip_by_norm', 'sequence_softmax', diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py index 6e7d09459c07c77a8579300a1c67ae36dc3d2ba2..c435c5206d1ef1ef57683a1a47bf089be6526f38 100644 --- a/python/paddle/v2/fluid/layers/tensor.py +++ b/python/paddle/v2/fluid/layers/tensor.py @@ -16,12 +16,14 @@ from ..layer_helper import LayerHelper from ..param_attr import ParamAttr from ..framework import convert_np_dtype_to_dtype_ from ..framework import Variable +from ..initializer import Constant from ..core import DataType import numpy __all__ = [ 'create_tensor', 'create_parameter', + 'create_global_var', 'cast', 'concat', 'sums', @@ -58,13 +60,22 @@ def create_parameter(shape, Returns: Parameter: the created parameter """ - helper = LayerHelper("create_parameter") + helper = LayerHelper("create_parameter", **locals()) if attr is None: attr = ParamAttr() return helper.create_parameter(attr, shape, dtype, is_bias, default_initializer) +def create_global_var(shape, value, dtype, persistable=False, name=None): + helper = LayerHelper("global_var", **locals()) + var = helper.create_global_variable( + dtype=dtype, shape=shape, persistable=persistable, name=name) + helper.set_variable_initializer( + var, initializer=Constant(value=float(value))) + return var + + def cast(x, dtype): """ This function takes in the input with input_dtype diff --git a/python/paddle/v2/fluid/learning_rate_decay.py b/python/paddle/v2/fluid/learning_rate_decay.py new file mode 100644 index 0000000000000000000000000000000000000000..96b3e9a0d73cede5d6e36308a53ab8927a95a6da --- /dev/null +++ b/python/paddle/v2/fluid/learning_rate_decay.py @@ -0,0 +1,125 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import layers +from framework import Variable + +__all__ = ['exponential_decay', 'natural_exp_decay', 'inverse_time_decay'] +""" +When training a model, it's often useful to decay the +learning rate during training process, this is called +learning_rate_decay. There are many strategies to do +this, this module will provide some classical method. +User can also implement their own learning_rate_decay +strategy according to this module. +""" + + +def exponential_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + """Applies exponential decay to the learning rate. + + ```python + decayed_learning_rate = learning_rate * + decay_rate ^ (global_step / decay_steps) + ``` + Args: + learning_rate: A scalar float32 value or a Variable. This + will be the initial learning rate during training + global_step: A Variable that record the training step. + decay_steps: A Python `int32` number. + decay_rate: A Python `float` number. + staircase: Boolean. If set true, decay the learning rate every decay_steps. + + Returns: + The decayed learning rate + """ + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for exponential_decay.") + + # update learning_rate + div_res = global_step / decay_steps + if staircase: + div_res = layers.floor(x=div_res) + return learning_rate * (decay_rate**div_res) + + +def natural_exp_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + """Applies natural exponential decay to the initial learning rate. + + ```python + if not staircase: + decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps)) + else: + decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps)) + ``` + Args: + learning_rate: A scalar float32 value or a Variable. This + will be the initial learning rate during training + global_step: A Variable that record the training step. + decay_steps: A Python `int32` number. + decay_rate: A Python `float` number. + staircase: Boolean. If set true, decay the learning rate every decay_steps. + + Returns: + The decayed learning rate + """ + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for natural_exp_decay.") + + div_res = global_step / decay_steps + if staircase: + div_res = layers.floor(x=div_res) + return learning_rate * layers.exp(x=(-1 * decay_rate * div_res)) + + +def inverse_time_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + """Applies inverse time decay to the initial learning rate. + + ```python + if staircase: + decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) + else + decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) + ``` + Args: + learning_rate: A scalar float32 value or a Variable. This + will be the initial learning rate during training + global_step: A Variable that record the training step. + decay_steps: A Python `int32` number. + decay_rate: A Python `float` number. + staircase: Boolean. If set true, decay the learning rate every decay_steps. + + Returns: + The decayed learning rate + """ + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for inverse_time_decay.") + + div_res = global_step / decay_steps + if staircase: + div_res = layers.floor(x=div_res) + + return learning_rate / (1 + decay_rate * div_res) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 0c3533b892176edd5dfd111fdd771cc17d468168..7844a4e2df1ce3989e48082f6472292560fbf1ee 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -15,6 +15,7 @@ from collections import defaultdict import framework +import layers from backward import append_backward from framework import unique_name, program_guard from initializer import Constant @@ -33,9 +34,11 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self, global_step=None, regularization=None): + def __init__(self, learning_rate, global_step=None, regularization=None): + assert learning_rate is not None self._global_step = global_step self.regularization = regularization + self._global_learning_rate = learning_rate # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. @@ -43,6 +46,28 @@ class Optimizer(object): self._accumulators = defaultdict(lambda: dict()) self.helper = None + def _create_global_learning_rate(self): + if isinstance(self._global_learning_rate, float): + self._global_learning_rate = layers.create_global_var( + name=unique_name("learning_rate"), + shape=[1], + value=float(self._global_learning_rate), + dtype='float32', + persistable=True) + + if not isinstance(self._global_learning_rate, framework.Variable): + raise ValueError("learning rate should be a Variable, " + "actual type is %s", + type(self._global_learning_rate)) + + @property + def global_learning_rate(self): + """ + get global decayed learning rate + :return: + """ + return self._global_learning_rate + def _append_optimize_op(self, block, param_and_grad): """ append optimize operator to block and return all the added optimize_op """ @@ -52,17 +77,7 @@ class Optimizer(object): # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] - param_lr_shape = [1] - param_lr_var = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=param_lr_shape, - lod_level=1, - persistable=True) - param_lr = param_lr * self._learning_rate - self.helper.set_variable_initializer( - var=param_lr_var, initializer=Constant(param_lr)) - return param_lr_var + return self._global_learning_rate * param_lr def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters @@ -163,7 +178,7 @@ class Optimizer(object): optimization. This will include parameter update ops, global step update ops and any other custom ops required by subclasses to manage their internal state. - :param startup_program: + :param startup_program: """ # This is a default implementation of create_optimization_pass that # can be shared by most optimizers. This implementation assumes that @@ -178,6 +193,7 @@ class Optimizer(object): self.helper = LayerHelper(self.__class__.__name__) self._create_accumulators(loss.block, [p[0] for p in parameters_and_grads]) + self._create_global_learning_rate() optimize_ops = [] for param_and_grad in parameters_and_grads: @@ -231,9 +247,9 @@ class SGDOptimizer(Optimizer): def __init__(self, learning_rate, **kwargs): assert learning_rate is not None - super(SGDOptimizer, self).__init__(**kwargs) + super(SGDOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "sgd" - self._learning_rate = learning_rate def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -259,9 +275,9 @@ class MomentumOptimizer(Optimizer): def __init__(self, learning_rate, momentum, use_nesterov=False, **kwargs): assert learning_rate is not None assert momentum is not None - super(MomentumOptimizer, self).__init__(**kwargs) + super(MomentumOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "momentum" - self._learning_rate = learning_rate self._momentum = momentum self._use_nesterov = bool(use_nesterov) @@ -303,9 +319,9 @@ class AdagradOptimizer(Optimizer): def __init__(self, learning_rate, epsilon=1.0e-6, **kwargs): assert learning_rate is not None assert epsilon is not None - super(AdagradOptimizer, self).__init__(**kwargs) + super(AdagradOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "adagrad" - self._learning_rate = learning_rate self._epsilon = epsilon def _create_accumulators(self, block, parameters): @@ -352,9 +368,9 @@ class AdamOptimizer(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamOptimizer, self).__init__(**kwargs) + super(AdamOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "adam" - self._learning_rate = learning_rate self._beta1 = beta1 self._beta2 = beta2 self._epsilon = epsilon @@ -457,9 +473,9 @@ class AdamaxOptimizer(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamaxOptimizer, self).__init__(**kwargs) + super(AdamaxOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "adamax" - self._learning_rate = learning_rate self._beta1 = beta1 self._beta2 = beta2 self._epsilon = epsilon @@ -535,9 +551,9 @@ class DecayedAdagradOptimizer(Optimizer): assert decay is not None assert epsilon is not None - super(DecayedAdagradOptimizer, self).__init__(**kwargs) + super(DecayedAdagradOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "decayed_adagrad" - self._learning_rate = learning_rate self._decay = decay self._epsilon = epsilon diff --git a/python/paddle/v2/fluid/profiler.py b/python/paddle/v2/fluid/profiler.py index 51c1c8aa705513825b46fb936c6c99090c50fb7d..d4a2cd7eeabecb60699b5be94d89cf7a916749e7 100644 --- a/python/paddle/v2/fluid/profiler.py +++ b/python/paddle/v2/fluid/profiler.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.fluid.core as core +import core from contextlib import contextmanager import os -__all__ = ['CudaProfiler'] +__all__ = ['cuda_profiler', 'reset_profiler', 'profiler'] NVPROF_CONFIG = [ "gpustarttimestamp", diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py index c2f28eecfda71e305d96c5a6b62c4f5f0fbf3fa6..0273da647afb6e95a136b5ecd0975347d9a378ff 100644 --- a/python/paddle/v2/fluid/regularizer.py +++ b/python/paddle/v2/fluid/regularizer.py @@ -87,6 +87,11 @@ class WeightDecayRegularizer(object): """ raise NotImplementedError() + def __str__(self): + """Debug string + """ + raise NotImplementedError() + class L2DecayRegularizer(WeightDecayRegularizer): """Implements the L2 Weight Decay Regularization @@ -123,6 +128,9 @@ class L2DecayRegularizer(WeightDecayRegularizer): return decay + def __str__(self): + return "L2Decay, regularization_coeff=%f" % self._regularization_coeff + class L1DecayRegularizer(WeightDecayRegularizer): """Implements the L1 Weight Decay Regularization @@ -163,6 +171,9 @@ class L1DecayRegularizer(WeightDecayRegularizer): return decay + def __str__(self): + return "L1Decay, regularization_coeff=%f" % self._regularization_coeff + # We short the class name, since users will use the regulaizer with the package # name. The sample code: diff --git a/python/paddle/v2/fluid/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/CMakeLists.txt index 628ce60b406d880d961d705a6abd2b5236fb1c8c..26a80abcb5839e80b5a22f9415315519ce3042e8 100644 --- a/python/paddle/v2/fluid/tests/CMakeLists.txt +++ b/python/paddle/v2/fluid/tests/CMakeLists.txt @@ -5,9 +5,11 @@ if(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_OPS test_recv_op) endif(NOT WITH_DISTRIBUTE) +list(REMOVE_ITEM TEST_OPS test_warpctc_op) foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() +py_test(test_warpctc_op SRCS test_warpctc_op.py ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR}) add_subdirectory(book) add_subdirectory(book_distribute) diff --git a/python/paddle/v2/fluid/tests/book/CMakeLists.txt b/python/paddle/v2/fluid/tests/book/CMakeLists.txt index dda02c03fd531445c1b33b39a6ded10921991d9c..a870478db8a086ea8f2d112e4e06444398d61f8a 100644 --- a/python/paddle/v2/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/v2/fluid/tests/book/CMakeLists.txt @@ -1,9 +1,7 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -list(REMOVE_ITEM TEST_OPS test_image_classification_train test_recognize_digits) -py_test(test_image_classification_train_resnet SRCS test_image_classification_train.py ARGS resnet) -py_test(test_image_classification_train_vgg SRCS test_image_classification_train.py ARGS vgg) +list(REMOVE_ITEM TEST_OPS test_recognize_digits) py_test(test_recognize_digits_mlp_cpu SRCS test_recognize_digits.py ARGS mlp) diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index 0b954c60b6bc2d721c0373243e747056f8f572cf..27f34b17339db31ef3c07555db946fa76d6f1922 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -12,44 +12,74 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import paddle.v2 as paddle import paddle.v2.fluid as fluid +import contextlib +import unittest -x = fluid.layers.data(name='x', shape=[13], dtype='float32') -y_predict = fluid.layers.fc(input=x, size=1, act=None) +def main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return -y = fluid.layers.data(name='y', shape=[1], dtype='float32') + x = fluid.layers.data(name='x', shape=[13], dtype='float32') -cost = fluid.layers.square_error_cost(input=y_predict, label=y) -avg_cost = fluid.layers.mean(x=cost) + y_predict = fluid.layers.fc(input=x, size=1, act=None) -sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) -sgd_optimizer.minimize(avg_cost) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') -BATCH_SIZE = 20 + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(x=cost) -train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.uci_housing.train(), buf_size=500), - batch_size=BATCH_SIZE) + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) -place = fluid.CPUPlace() -feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) -exe = fluid.Executor(place) + BATCH_SIZE = 20 -exe.run(fluid.default_startup_program()) + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.uci_housing.train(), buf_size=500), + batch_size=BATCH_SIZE) -PASS_NUM = 100 -for pass_id in range(PASS_NUM): - fluid.io.save_persistables(exe, "./fit_a_line.model/") - fluid.io.load_persistables(exe, "./fit_a_line.model/") - for data in train_reader(): - avg_loss_value, = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[avg_cost]) - print(avg_loss_value) - if avg_loss_value[0] < 10.0: - exit(0) # if avg cost less than 10.0, we think our code is good. -exit(1) + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + exe = fluid.Executor(place) + + exe.run(fluid.default_startup_program()) + + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + fluid.io.save_persistables(exe, "./fit_a_line.model/") + fluid.io.load_persistables(exe, "./fit_a_line.model/") + for data in train_reader(): + avg_loss_value, = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost]) + print(avg_loss_value) + if avg_loss_value[0] < 10.0: + return + raise AssertionError("Fit a line cost is too large, {0:2.2}".format( + avg_loss_value[0])) + + +class TestFitALine(unittest.TestCase): + def test_cpu(self): + with self.program_scope_guard(): + main(use_cuda=False) + + def test_cuda(self): + with self.program_scope_guard(): + main(use_cuda=True) + + @contextlib.contextmanager + def program_scope_guard(self): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index 30582a21d0a5eeab125f3a2764b45b51aa4f94b6..a4168d16db06f904faed811fdda3f0fe52f0b27b 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -14,10 +14,10 @@ from __future__ import print_function -import sys - import paddle.v2 as paddle import paddle.v2.fluid as fluid +import unittest +import contextlib def resnet_cifar10(input, depth=32): @@ -89,56 +89,89 @@ def vgg16_bn_drop(input): return fc2 -classdim = 10 -data_shape = [3, 32, 32] - -images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') -label = fluid.layers.data(name='label', shape=[1], dtype='int64') - -net_type = "vgg" -if len(sys.argv) >= 2: - net_type = sys.argv[1] - -if net_type == "vgg": - print("train vgg net") - net = vgg16_bn_drop(images) -elif net_type == "resnet": - print("train resnet") - net = resnet_cifar10(images, 32) -else: - raise ValueError("%s network is not supported" % net_type) - -predict = fluid.layers.fc(input=net, size=classdim, act='softmax') -cost = fluid.layers.cross_entropy(input=predict, label=label) -avg_cost = fluid.layers.mean(x=cost) - -optimizer = fluid.optimizer.Adam(learning_rate=0.001) -opts = optimizer.minimize(avg_cost) - -accuracy = fluid.evaluator.Accuracy(input=predict, label=label) - -BATCH_SIZE = 128 -PASS_NUM = 1 - -train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=128 * 10), - batch_size=BATCH_SIZE) - -place = fluid.CPUPlace() -exe = fluid.Executor(place) -feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) -exe.run(fluid.default_startup_program()) - -for pass_id in range(PASS_NUM): - accuracy.reset(exe) - for data in train_reader(): - loss, acc = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[avg_cost] + accuracy.metrics) - pass_acc = accuracy.eval(exe) - print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( - pass_acc)) - # this model is slow, so if we can train two mini batch, we think it works properly. - exit(0) -exit(1) +def main(net_type, use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + + classdim = 10 + data_shape = [3, 32, 32] + + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + if net_type == "vgg": + print("train vgg net") + net = vgg16_bn_drop(images) + elif net_type == "resnet": + print("train resnet") + net = resnet_cifar10(images, 32) + else: + raise ValueError("%s network is not supported" % net_type) + + predict = fluid.layers.fc(input=net, size=classdim, act='softmax') + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + optimizer = fluid.optimizer.Adam(learning_rate=0.001) + optimizer.minimize(avg_cost) + + accuracy = fluid.evaluator.Accuracy(input=predict, label=label) + + BATCH_SIZE = 128 + PASS_NUM = 1 + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(), buf_size=128 * 10), + batch_size=BATCH_SIZE) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) + exe.run(fluid.default_startup_program()) + + loss = 0.0 + for pass_id in range(PASS_NUM): + accuracy.reset(exe) + for data in train_reader(): + loss, acc = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost] + accuracy.metrics) + pass_acc = accuracy.eval(exe) + print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( + pass_acc)) + return + + raise AssertionError( + "Image classification loss is too large, {0:2.2}".format(loss)) + + +class TestImageClassification(unittest.TestCase): + def test_vgg_cuda(self): + with self.scope_prog_guard(): + main('vgg', use_cuda=True) + + def test_resnet_cuda(self): + with self.scope_prog_guard(): + main('resnet', use_cuda=True) + + def test_vgg_cpu(self): + with self.scope_prog_guard(): + main('vgg', use_cuda=False) + + def test_resnet_cpu(self): + with self.scope_prog_guard(): + main('resnet', use_cuda=False) + + @contextlib.contextmanager + def scope_prog_guard(self): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index 1a342bf1fbbc0e5f4e3c7d440424b66c4b9f732f..f85768de99adb8b5005b23278ad807a24c5bff65 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -175,7 +175,7 @@ def main(): paddle.reader.shuffle( paddle.dataset.conll05.test(), buf_size=8192), batch_size=BATCH_SIZE) - #place = fluid.CPUPlace() + # place = fluid.CPUPlace() place = fluid.CUDAPlace(0) feeder = fluid.DataFeeder( feed_list=[ diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py index ac7ef4046f9ff55c2cbfc28b50784b9bffb80d53..b4b6020f58e7538dfe0f98c17d61f3614c3c6fc4 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py @@ -45,8 +45,9 @@ BATCH_SIZE = 64 def loss_net(hidden, label): prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) - return fluid.layers.mean(x=loss), fluid.layers.accuracy( - input=prediction, label=label) + avg_loss = fluid.layers.mean(x=loss) + acc = fluid.layers.accuracy(input=prediction, label=label) + return prediction, avg_loss, acc def mlp(img, label): @@ -73,8 +74,7 @@ def conv_net(img, label): return loss_net(conv_pool_2, label) -def main(): - args = parse_arg() +def train(args, save_dirname=None): print("recognize digits with args: {0}".format(" ".join(sys.argv[1:]))) img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') @@ -91,7 +91,8 @@ def main(): with pd.do(): img_ = pd.read_input(img) label_ = pd.read_input(label) - for o in net_conf(img_, label_): + prediction, avg_loss, acc = net_conf(img_, label_) + for o in [avg_loss, acc]: pd.write_output(o) avg_loss, acc = pd() @@ -99,7 +100,7 @@ def main(): avg_loss = fluid.layers.mean(x=avg_loss) acc = fluid.layers.mean(x=acc) else: - avg_loss, acc = net_conf(img, label) + prediction, avg_loss, acc = net_conf(img, label) test_program = fluid.default_main_program().clone() @@ -137,7 +138,10 @@ def main(): acc_val = numpy.array(acc_set).mean() avg_loss_val = numpy.array(avg_loss_set).mean() if float(acc_val) > 0.85: # test acc > 85% - exit(0) + if save_dirname is not None: + fluid.io.save_inference_model(save_dirname, ["img"], + [prediction], exe) + return else: print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. @@ -145,5 +149,36 @@ def main(): float(avg_loss_val), float(acc_val))) +def infer(args, save_dirname=None): + if save_dirname is None: + return + + place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + # Use fluid.io.load_inference_model to obtain the inference program desc, + # the feed_target_names (the names of variables that will be feeded + # data using feed operators), and the fetch_targets (variables that + # we want to obtain data from using fetch operators). + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) + + # The input's dimension of conv should be 4-D or 5-D. + tensor_img = numpy.random.rand(1, 1, 28, 28).astype("float32") + + # Construct feed as a dictionary of {feed_target_name: feed_target_data} + # and results will contain a list of data corresponding to fetch_targets. + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) + print("infer results: ", results[0]) + + if __name__ == '__main__': - main() + args = parse_arg() + if not args.use_cuda and not args.parallel: + save_dirname = "recognize_digits_" + args.nn_type + ".inference.model" + else: + save_dirname = None + train(args, save_dirname) + infer(args, save_dirname) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py similarity index 52% rename from python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py rename to python/paddle/v2/fluid/tests/book/test_understand_sentiment.py index 529223eba8af6d968b490068f34559880312515d..2ba9077a26202b1c16cc480823115f7ad55c2c67 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +12,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import paddle.v2 as paddle +import unittest import paddle.v2.fluid as fluid +import paddle.v2 as paddle +import contextlib + + +def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, + hid_dim=32): + emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") + prediction = fluid.layers.fc(input=[conv_3, conv_4], + size=class_dim, + act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) + adam_optimizer.minimize(avg_cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, accuracy def stacked_lstm_net(data, @@ -51,63 +78,77 @@ def stacked_lstm_net(data, avg_cost = fluid.layers.mean(x=cost) adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) adam_optimizer.minimize(avg_cost) - accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) - return avg_cost, accuracy, accuracy.metrics[0] - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def main(): - BATCH_SIZE = 100 - PASS_NUM = 5 + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, accuracy - word_dict = paddle.dataset.imdb.word_dict() - print "load word dict successfully" + +def main(word_dict, net_method, use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + + BATCH_SIZE = 128 + PASS_NUM = 5 dict_dim = len(word_dict) class_dim = 2 data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") - cost, accuracy, acc_out = stacked_lstm_net( + cost, acc_out = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=BATCH_SIZE) - place = fluid.CPUPlace() + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): - accuracy.reset(exe) for data in train_data(): cost_val, acc_val = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[cost, acc_out]) - pass_acc = accuracy.eval(exe) - print("cost=" + str(cost_val) + " acc=" + str(acc_val) + - " pass_acc=" + str(pass_acc)) - if cost_val < 1.0 and acc_val > 0.8: - exit(0) - exit(1) + print("cost=" + str(cost_val) + " acc=" + str(acc_val)) + if cost_val < 0.4 and acc_val > 0.8: + return + raise AssertionError("Cost is too large for {0}".format( + net_method.__name__)) + + +class TestUnderstandSentiment(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.word_dict = paddle.dataset.imdb.word_dict() + + @contextlib.contextmanager + def new_program_scope(self): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + def test_conv_cpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=convolution_net, use_cuda=False) + + def test_stacked_lstm_cpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=stacked_lstm_net, use_cuda=False) + + def test_conv_gpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=convolution_net, use_cuda=True) + + def test_stacked_lstm_gpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=stacked_lstm_net, use_cuda=True) if __name__ == '__main__': - main() + unittest.main() diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py deleted file mode 100644 index df27399dd215a579d7e3f8a1659180a06b1e7f64..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import numpy as np -import paddle.v2 as paddle -import paddle.v2.fluid as fluid - - -def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, - hid_dim=32): - emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) - conv_3 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=3, - act="tanh", - pool_type="sqrt") - conv_4 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=4, - act="tanh", - pool_type="sqrt") - prediction = fluid.layers.fc(input=[conv_3, conv_4], - size=class_dim, - act="softmax") - cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) - adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) - adam_optimizer.minimize(avg_cost) - accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) - return avg_cost, accuracy, accuracy.metrics[0] - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def main(): - BATCH_SIZE = 100 - PASS_NUM = 5 - - word_dict = paddle.dataset.imdb.word_dict() - dict_dim = len(word_dict) - class_dim = 2 - - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - cost, accuracy, acc_out = convolution_net( - data, label, input_dim=dict_dim, class_dim=class_dim) - - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=BATCH_SIZE) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - feeder = fluid.DataFeeder(feed_list=[data, label], place=place) - - exe.run(fluid.default_startup_program()) - - for pass_id in xrange(PASS_NUM): - accuracy.reset(exe) - for data in train_data(): - cost_val, acc_val = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[cost, acc_out]) - pass_acc = accuracy.eval(exe) - print("cost=" + str(cost_val) + " acc=" + str(acc_val) + - " pass_acc=" + str(pass_acc)) - if cost_val < 1.0 and pass_acc > 0.8: - exit(0) - exit(1) - - -if __name__ == '__main__': - main() diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py deleted file mode 100644 index 117f74c59ad5bf6bb67711801cd7b9a41f39f1f8..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import paddle.v2 as paddle -import paddle.v2.fluid as fluid -from paddle.v2.fluid.layer_helper import LayerHelper - - -def lstm(x, c_pre_init, hidden_dim, forget_bias=None): - """ - This function helps create an operator for the LSTM (Long Short Term - Memory) cell that can be used inside an RNN. - """ - helper = LayerHelper('lstm_unit', **locals()) - rnn = fluid.layers.StaticRNN() - with rnn.step(): - c_pre = rnn.memory(init=c_pre_init) - x_t = rnn.step_input(x) - - before_fc = fluid.layers.concat(input=[x_t, c_pre], axis=1) - after_fc = fluid.layers.fc(input=before_fc, size=hidden_dim * 4) - - dtype = x.dtype - c = helper.create_tmp_variable(dtype) - h = helper.create_tmp_variable(dtype) - - helper.append_op( - type='lstm_unit', - inputs={"X": after_fc, - "C_prev": c_pre}, - outputs={"C": c, - "H": h}, - attrs={"forget_bias": forget_bias}) - - rnn.update_memory(c_pre, c) - rnn.output(h) - - return rnn() - - -def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): - data = fluid.layers.data( - name="words", - shape=[seq_len * batch_size, 1], - append_batch_size=False, - dtype="int64", - lod_level=1) - label = fluid.layers.data( - name="label", - shape=[batch_size, 1], - append_batch_size=False, - dtype="int64") - - emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) - emb = fluid.layers.reshape(x=emb, shape=[batch_size, seq_len, emb_dim]) - emb = fluid.layers.transpose(x=emb, perm=[1, 0, 2]) - - c_pre_init = fluid.layers.fill_constant( - dtype=emb.dtype, shape=[batch_size, emb_dim], value=0.0) - c_pre_init.stop_gradient = False - layer_1_out = lstm(emb, c_pre_init=c_pre_init, hidden_dim=emb_dim) - layer_1_out = fluid.layers.transpose(x=layer_1_out, perm=[1, 0, 2]) - - prediction = fluid.layers.fc(input=layer_1_out, - size=class_dim, - act="softmax") - cost = fluid.layers.cross_entropy(input=prediction, label=label) - - avg_cost = fluid.layers.mean(x=cost) - adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) - adam_optimizer.minimize(avg_cost) - acc = fluid.layers.accuracy(input=prediction, label=label) - - return avg_cost, acc - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def chop_data(data, chop_len=80, batch_size=50): - data = [(x[0][:chop_len], x[1]) for x in data if len(x[0]) >= chop_len] - - return data[:batch_size] - - -def prepare_feed_data(data, place): - tensor_words = to_lodtensor(map(lambda x: x[0], data), place) - - label = np.array(map(lambda x: x[1], data)).astype("int64") - label = label.reshape([len(label), 1]) - tensor_label = fluid.LoDTensor() - tensor_label.set(label, place) - - return tensor_words, tensor_label - - -def main(): - BATCH_SIZE = 100 - PASS_NUM = 5 - - word_dict = paddle.dataset.imdb.word_dict() - print "load word dict successfully" - dict_dim = len(word_dict) - class_dim = 2 - - cost, acc = lstm_net(dict_dim=dict_dim, class_dim=class_dim) - - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=BATCH_SIZE * 10), - batch_size=BATCH_SIZE) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - exe.run(fluid.default_startup_program()) - - for pass_id in xrange(PASS_NUM): - for data in train_data(): - chopped_data = chop_data(data) - tensor_words, tensor_label = prepare_feed_data(chopped_data, place) - - outs = exe.run(fluid.default_main_program(), - feed={"words": tensor_words, - "label": tensor_label}, - fetch_list=[cost, acc]) - cost_val = np.array(outs[0]) - acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if acc_val > 0.7: - exit(0) - exit(1) - - -if __name__ == '__main__': - main() diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index 8cf54846fe5dba2742ce69e34e0788e124a1a85d..766ba9681d1bb816170e0458f540b32511c02933 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -12,76 +12,145 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import paddle.v2 as paddle import paddle.v2.fluid as fluid +import unittest +import os -PASS_NUM = 100 -EMBED_SIZE = 32 -HIDDEN_SIZE = 256 -N = 5 -BATCH_SIZE = 32 -IS_SPARSE = True - -word_dict = paddle.dataset.imikolov.build_dict() -dict_size = len(word_dict) - -first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') -second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') -third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') -forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') -next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') - -embed_first = fluid.layers.embedding( - input=first_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') -embed_second = fluid.layers.embedding( - input=second_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') -embed_third = fluid.layers.embedding( - input=third_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') -embed_forth = fluid.layers.embedding( - input=forth_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') - -concat_embed = fluid.layers.concat( - input=[embed_first, embed_second, embed_third, embed_forth], axis=1) -hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') -predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') -cost = fluid.layers.cross_entropy(input=predict_word, label=next_word) -avg_cost = fluid.layers.mean(x=cost) -sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) -sgd_optimizer.minimize(avg_cost) - -train_reader = paddle.batch( - paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) - -place = fluid.CPUPlace() -exe = fluid.Executor(place) -feeder = fluid.DataFeeder( - feed_list=[first_word, second_word, third_word, forth_word, next_word], - place=place) - -exe.run(fluid.default_startup_program()) - -for pass_id in range(PASS_NUM): - for data in train_reader(): - avg_cost_np = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[avg_cost]) - if avg_cost_np[0] < 5.0: - exit(0) # if avg cost less than 10.0, we think our code is good. -exit(1) + +def main(use_cuda, is_sparse, parallel): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + + PASS_NUM = 100 + EMBED_SIZE = 32 + HIDDEN_SIZE = 256 + N = 5 + BATCH_SIZE = 32 + IS_SPARSE = is_sparse + + def __network__(words): + embed_first = fluid.layers.embedding( + input=words[0], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_second = fluid.layers.embedding( + input=words[1], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_third = fluid.layers.embedding( + input=words[2], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_forth = fluid.layers.embedding( + input=words[3], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + + concat_embed = fluid.layers.concat( + input=[embed_first, embed_second, embed_third, embed_forth], axis=1) + hidden1 = fluid.layers.fc(input=concat_embed, + size=HIDDEN_SIZE, + act='sigmoid') + predict_word = fluid.layers.fc(input=hidden1, + size=dict_size, + act='softmax') + cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) + avg_cost = fluid.layers.mean(x=cost) + return avg_cost + + word_dict = paddle.dataset.imikolov.build_dict() + dict_size = len(word_dict) + + first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') + second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') + third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') + forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') + next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') + + if not parallel: + avg_cost = __network__( + [first_word, second_word, third_word, forth_word, next_word]) + else: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places) + with pd.do(): + avg_cost = __network__( + map(pd.read_input, [ + first_word, second_word, third_word, forth_word, next_word + ])) + pd.write_output(avg_cost) + + avg_cost = fluid.layers.mean(x=pd()) + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + train_reader = paddle.batch( + paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + feeder = fluid.DataFeeder( + feed_list=[first_word, second_word, third_word, forth_word, next_word], + place=place) + + exe.run(fluid.default_startup_program()) + + for pass_id in range(PASS_NUM): + for data in train_reader(): + avg_cost_np = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost]) + if avg_cost_np[0] < 5.0: + return + raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0])) + + +FULL_TEST = os.getenv('FULL_TEST', + '0').lower() in ['true', '1', 't', 'y', 'yes', 'on'] +SKIP_REASON = "Only run minimum number of tests in CI server, to make CI faster" + + +class W2VTest(unittest.TestCase): + pass + + +def inject_test_method(use_cuda, is_sparse, parallel): + fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse" + if is_sparse else "dense", "parallel" + if parallel else "normal") + + def __impl__(*args, **kwargs): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + main(use_cuda=use_cuda, is_sparse=is_sparse, parallel=parallel) + + if use_cuda and is_sparse and parallel: + fn = __impl__ + else: + # skip the other test when on CI server + fn = unittest.skipUnless( + condition=FULL_TEST, reason=SKIP_REASON)(__impl__) + + setattr(W2VTest, fn_name, fn) + + +for use_cuda in (False, True): + for is_sparse in (False, True): + for parallel in (False, True): + inject_test_method(use_cuda, is_sparse, parallel) + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py index 107b9567dc4a8539532c2fff40df437cc72cc163..b0c55df9f58834688846c5362113464996eb286a 100644 --- a/python/paddle/v2/fluid/tests/test_dropout_op.py +++ b/python/paddle/v2/fluid/tests/test_dropout_op.py @@ -21,7 +21,7 @@ class TestDropoutOp(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_test': False} + self.attrs = {'dropout_prob': 0.0, 'fix_seed': True, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64)).astype('float32') @@ -38,7 +38,7 @@ class TestDropoutOp2(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 1.0, 'is_test': False} + self.attrs = {'dropout_prob': 1.0, 'fix_seed': True, 'is_test': False} self.outputs = { 'Out': np.zeros((32, 64)).astype('float32'), 'Mask': np.zeros((32, 64)).astype('float32') @@ -49,7 +49,7 @@ class TestDropoutOp3(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_test': False} + self.attrs = {'dropout_prob': 0.0, 'fix_seed': True, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2)).astype('float32') @@ -60,7 +60,7 @@ class TestDropoutOp4(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.35, 'is_test': True} + self.attrs = {'dropout_prob': 0.35, 'fix_seed': True, 'is_test': True} self.outputs = { 'Out': self.inputs['X'] * (1.0 - self.attrs['dropout_prob']) } diff --git a/python/paddle/v2/fluid/tests/test_elementwise_pow_op.py b/python/paddle/v2/fluid/tests/test_elementwise_pow_op.py new file mode 100644 index 0000000000000000000000000000000000000000..e31749df9baf10215fcd0cca3c1097f00c163ec7 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_elementwise_pow_op.py @@ -0,0 +1,43 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import numpy as np +from op_test import OpTest + + +class TestElementwisePowOp(OpTest): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + self.check_output() + + +class TestElementwisePowOp_scalar(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.rand(2, 3, 4).astype('float32'), + 'Y': np.random.rand(1).astype('float32') + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_label_smooth_op.py b/python/paddle/v2/fluid/tests/test_label_smooth_op.py new file mode 100644 index 0000000000000000000000000000000000000000..19a4df57446c0c83b415909df3e0246bf2716881 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_label_smooth_op.py @@ -0,0 +1,55 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +class TestLabelSmoothOp(OpTest): + def config(self): + self.op_type = "label_smooth" + self.epsilon = 0.1 + batch_size, self.label_dim = 5, 10 + self.label = np.zeros((batch_size, self.label_dim)).astype("float64") + nonzero_index = np.random.randint(self.label_dim, size=(batch_size)) + self.label[np.arange(batch_size), nonzero_index] = 1 + + def setUp(self): + self.config() + smoothed_label = (1 - self.epsilon + ) * self.label + self.epsilon / self.label_dim + self.inputs = {'X': self.label} + self.attrs = {'epsilon': self.epsilon} + self.outputs = {'Out': smoothed_label} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp): + def setUp(self): + self.config() + dist = np.random.random((1, self.label_dim)) + smoothed_label = (1 - self.epsilon) * self.label + self.epsilon * dist + self.inputs = {'X': self.label, 'PriorDist': dist} + self.attrs = {'epsilon': self.epsilon} + self.outputs = {'Out': smoothed_label} + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_layer_norm_op.py b/python/paddle/v2/fluid/tests/test_layer_norm_op.py new file mode 100644 index 0000000000000000000000000000000000000000..68cf8673cd46677065588f652482cd0df08b3450 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_layer_norm_op.py @@ -0,0 +1,252 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import numpy as np + +from operator import mul +from op_test import OpTest +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator +from paddle.v2.fluid.framework import grad_var_name + + +def _reference_layer_norm_naive(x, scale, beta, epsilon, begin_norm_axis=1): + x_shape = x.shape + N = reduce(mul, x_shape[0:begin_norm_axis], 1) + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + x.shape = [N, D] + + mean = np.mean(x, axis=1) + var = np.var(x, axis=1) + epsilon + output = scale.reshape([1, D]) * np.divide( + (x - mean.reshape([N, 1])), + (np.sqrt(var)).reshape([N, 1])) + beta.reshape([1, D]) + + x.shape, output.shape = x_shape, x_shape + return output, mean, var + + +def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1): + x_shape = x.shape + scale_shape = scale.shape + N = reduce(mul, x_shape[0:begin_norm_axis], 1) + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + x.shape, grad_y.shape = [N, D], [N, D] + var.shape, mean.shape = [N, 1], [N, 1] + scale.shape = [1, D] + + # d_bias + d_bias = np.sum(grad_y, axis=0).reshape([1, D]) + # d_scale + d_scale = np.sum(((x - mean) * np.sqrt(1 / var)) * grad_y, + axis=0).reshape([1, D]) + # dx + dx_end = scale * np.sqrt(1.0 / var) * grad_y + d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, axis=1).reshape( + [N, 1]) # the second part equals to zero. + d_mean = 1.0 / D * d_mean_0 + d_std = np.sum( + -(1.0 / var) * (x - mean) * grad_y * scale, axis=1).reshape([N, 1]) * ( + 1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean)) + + grad_x = dx_end + d_mean + d_std + + grad_y.shape = x_shape + x.shape = x_shape + scale.shape = scale_shape + return grad_x, d_scale, d_bias + + +def get_backward_op(scope, op, no_grad_set): + backward_op = core.Operator.backward(op, no_grad_set) + for input in backward_op.input_vars(): + var = scope.var(input) + var.get_tensor() + for output in backward_op.output_vars(): + var = scope.var(output) + var.get_tensor() + return backward_op + + +def create_or_get_tensor(scope, var_name, var, place): + tensor = scope.var(var_name).get_tensor() + if var is not None: + assert isinstance(var, np.ndarray) + tensor.set_lod([[]]) + tensor.set_dims(var.shape) + tensor.set(var, place) + return tensor + + +def set_output_grad(scope, outputs, place, feed_dict=None): + def __set_tensor__(name, data=None): + out_tensor = scope.find_var(name).get_tensor() + grad_tensor = scope.var(grad_var_name(name)).get_tensor() + out_dtype = out_tensor.dtype() + if data is None: + if out_dtype == core.DataType.FP64: + data = np.ones(out_tensor.shape(), dtype=np.float64) + elif out_dtype == core.DataType.FP32: + data = np.ones(out_tensor.shape(), dtype=np.float32) + else: + raise ValueError("Not supported data type " + str(out_dtype)) + grad_tensor.set(data, place) + + for output in outputs: + data = None + if output in feed_dict: + data = feed_dict[output] + __set_tensor__(output, data) + + +class TestLayerNormdOp(OpTest): + def __assert_close(self, tensor, np_array, msg, atol=1e-4): + self.assertTrue( + np.allclose( + np.array(tensor).reshape(np_array.shape), np_array, atol=atol), + msg) + + def __assert_grad_close(self, + tensor, + np_array, + name, + place, + max_relative_error=0.02): + a = np.array(tensor).reshape(np_array.shape) + b = np_array + abs_a = np.abs(a) + abs_a[abs_a < 1e-5] = 1 + + diff_mat = np.abs(a - b) / abs_a + max_diff = np.max(diff_mat) + + def err_msg(): + offset = np.argmax(diff_mat > max_relative_error) + return ("%s Variable %s max gradient diff %f over limit %f, " + "the first error element is %d, %f, %f") % ( + "Gradient Check On %s" % str(place), name, max_diff, + max_relative_error, offset, a.flatten()[offset], + b.flatten()[offset]) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) + + def check_forward_backward(self, shape, begin_norm_axis): + def test_with_place(place, shape, begin_norm_axis=1): + # setUp + assert begin_norm_axis > 0 and begin_norm_axis < len( + shape), 'begin_norm_axis must be between 0 and len(shape)-1.' + # attr + epsilon = 0.00001 + x_shape = shape + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + scale_shape = [D] + np.random.random(123) + x_val = np.random.random_sample(x_shape).astype(np.float32) + scale_val = np.random.random_sample(scale_shape).astype(np.float32) + bias_val = np.random.random_sample(scale_shape).astype(np.float32) + y_grad = np.random.random_sample(x_shape).astype(np.float32) + + # run forward + y_out, saved_mean, var_ref = _reference_layer_norm_naive( + x_val, scale_val, bias_val, epsilon, begin_norm_axis) + naive_fw = {"Y": y_out, "Mean": saved_mean, "Variance": var_ref} + + # get gradient + x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_layer_norm_grad( + x_val, y_grad, scale_val, saved_mean, var_ref, begin_norm_axis) + naive_grad = { + "X": x_grad_ref, + "Scale": scale_grad_ref, + "Bias": bias_grad_ref + } + + scope = core.Scope() + + # create input + input_map = {"X": x_val, "Scale": scale_val, "Bias": bias_val} + for i_name in input_map: + create_or_get_tensor(scope, i_name, input_map[i_name], place) + + # create output + output_map = {"Y": None, "Mean": None, "Variance": None} + output_tensor = {} + for o_name in output_map: + output_tensor[o_name] = create_or_get_tensor( + scope, o_name, output_map[o_name], place) + + layer_norm_op = Operator( + "layer_norm", + # inputs + X="X", + Scale="Scale", + Bias="Bias", + # outputs + Y="Y", + Mean="Mean", + Variance="Variance", + # attrs + epsilon=epsilon, + begin_norm_axis=begin_norm_axis) + + layer_norm_op.run(scope, place) + + # check forward result + atol = 5e-2 if isinstance(place, core.CUDAPlace) else 1e-4 + for o_tensor in output_tensor: + self.__assert_close(output_tensor[o_tensor], naive_fw[o_tensor], + o_tensor, atol) + + # run backward + layer_norm_op_grad = get_backward_op(scope, layer_norm_op, set()) + set_output_grad( + scope, ["Y", "Mean", "Variance"], + place, + feed_dict={"Y": y_grad}) + layer_norm_op_grad.run(scope, place) + + # get output + grad_tensor = {} + for o_name in naive_grad: + grad_tensor[o_name] = x_ = create_or_get_tensor( + scope, grad_var_name(o_name), None, place) + + # check gradient output + for o_grad in naive_grad: + self.__assert_grad_close(grad_tensor[o_grad], + naive_grad[o_grad], o_grad + "@GRAD", + place) + + places = [core.CPUPlace()] + if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): + places.append(core.CUDAPlace(0)) + + for place in places: + test_with_place(place, shape, begin_norm_axis) + + def test_check_forward_backward_with_scale_and_bias(self): + self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) + self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) + + def test_check_forward_backward_with_scale(self): + pass # TODO(zcd) + + def test_check_forward_backward_with_bias(self): + pass # TODO(zcd) + + def test_check_forward_backward(self): + pass # TODO(zcd) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_learning_rate_decay.py b/python/paddle/v2/fluid/tests/test_learning_rate_decay.py new file mode 100644 index 0000000000000000000000000000000000000000..dc348cf2d21693290095900f8ab63c29923b4673 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_learning_rate_decay.py @@ -0,0 +1,110 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import math +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid as fluid +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.learning_rate_decay as lr_decay + + +def exponential_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + exponent = float(global_step) / float(decay_steps) + if staircase: + exponent = math.floor(exponent) + return learning_rate * decay_rate**exponent + + +def natural_exp_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + exponent = float(global_step) / float(decay_steps) + if staircase: + exponent = math.floor(exponent) + return learning_rate * math.exp(-1 * decay_rate * exponent) + + +def inverse_time_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + temp = float(global_step) / float(decay_steps) + if staircase: + temp = math.floor(temp) + return learning_rate / (1 + decay_rate * temp) + + +class TestLearningRateDecay(unittest.TestCase): + def check_decay(self, python_decay_fn, fluid_decay_fn, staircase): + init_lr = 1.0 + decay_steps = 5 + decay_rate = 0.5 + + global_step = layers.create_global_var( + shape=[1], value=0.0, dtype='float32', persistable=True) + + decayed_lr = fluid_decay_fn( + learning_rate=init_lr, + global_step=global_step, + decay_steps=decay_steps, + decay_rate=decay_rate, + staircase=staircase) + layers.increment(global_step, 1.0) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(fluid.default_startup_program()) + for step in range(10): + step_val, lr_val = exe.run(fluid.default_main_program(), + feed=[], + fetch_list=[global_step, decayed_lr]) + python_decayed_lr = python_decay_fn( + learning_rate=init_lr, + global_step=step, + decay_steps=decay_steps, + decay_rate=decay_rate, + staircase=staircase) + self.assertAlmostEqual(python_decayed_lr, lr_val[0]) + + def test_decay(self): + decay_fns = [ + (exponential_decay, lr_decay.exponential_decay, True), + (exponential_decay, lr_decay.exponential_decay, False), + (natural_exp_decay, lr_decay.natural_exp_decay, True), + (natural_exp_decay, lr_decay.natural_exp_decay, False), + (inverse_time_decay, lr_decay.inverse_time_decay, True), + (inverse_time_decay, lr_decay.inverse_time_decay, False), + ] + + for py_decay_fn, fluid_decay_fn, staircase in decay_fns: + print("decay_fn=" + str(py_decay_fn) + " staircase=" + str( + staircase)) + main_program = framework.Program() + startup_program = framework.Program() + with framework.program_guard(main_program, startup_program): + self.check_decay(py_decay_fn, fluid_decay_fn, staircase) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_recv_op.py b/python/paddle/v2/fluid/tests/test_recv_op.py index 5c4cec028d354b99d6203281ec4c727d7e3eceac..3a02b882410fe896cd2add03060127a01cbdaa38 100644 --- a/python/paddle/v2/fluid/tests/test_recv_op.py +++ b/python/paddle/v2/fluid/tests/test_recv_op.py @@ -19,6 +19,7 @@ import paddle.v2.fluid.layers as layers import numpy from multiprocessing import Process import os, sys +import time class TestRecvOp(unittest.TestCase): @@ -28,6 +29,7 @@ class TestRecvOp(unittest.TestCase): p = Process(target=self.init_serv, args=(place, )) p.daemon = True p.start() + time.sleep(1) self.init_client(place) # FIXME(typhoonzero): find a way to gracefully shutdown the server. os.system("kill -9 %d" % p.pid) diff --git a/python/paddle/v2/fluid/tests/test_tensor.py b/python/paddle/v2/fluid/tests/test_tensor.py index d5cc235f588ad37b0d1293dc9894952c97411757..0219bef42b3ba133dda7412c1036cf989a170a36 100644 --- a/python/paddle/v2/fluid/tests/test_tensor.py +++ b/python/paddle/v2/fluid/tests/test_tensor.py @@ -108,9 +108,31 @@ class TestTensor(unittest.TestCase): scope = core.Scope() place = core.CPUPlace() lod_py = [[0, 2, 5], [0, 2, 4, 5]] - lod_tensor = core.LoDTensor(lod_py) + lod_tensor = core.LoDTensor() lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.set_lod(lod_py) + lod_tensor.alloc_float(place) + tensor_array = numpy.array(lod_tensor) + tensor_array[0, 0, 0, 0] = 1.0 + tensor_array[0, 0, 0, 1] = 2.0 + lod_tensor.set(tensor_array, place) + + lod_v = numpy.array(lod_tensor) + self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) + self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) + self.assertListEqual(lod_py, lod_tensor.lod()) + + def test_lod_tensor_gpu_init(self): + if not core.is_compiled_with_cuda(): + return + scope = core.Scope() + place = core.CUDAPlace(0) + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor = core.LoDTensor() + + lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.set_lod(lod_py) lod_tensor.alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0